337 files changed, 11233 insertions, 2598 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2e2cf358e413..543ee7f88079 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -81,14 +81,23 @@ if( LLVM_TARGETS_TO_BUILD STREQUAL "all" )
   set( LLVM_TARGETS_TO_BUILD ${LLVM_ALL_TARGETS} )
 endif()
 
+set(LLVM_ENUM_TARGETS "")
 foreach(c ${LLVM_TARGETS_TO_BUILD})
   list(FIND LLVM_ALL_TARGETS ${c} idx)
   if( idx LESS 0 )
     message(FATAL_ERROR "The target `${c}' does not exists.
     It should be one of\n${LLVM_ALL_TARGETS}")
+  else()
+    set(LLVM_ENUM_TARGETS "${LLVM_ENUM_TARGETS}LLVM_TARGET(${c})\n")
   endif()
 endforeach(c)
 
+# Produce llvm/Config/Targets.def
+configure_file(
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/Targets.def.in
+  ${LLVM_BINARY_DIR}/include/llvm/Config/Targets.def
+  )
+
 set(llvm_builded_incs_dir ${LLVM_BINARY_DIR}/include/llvm)
 
 # The USE_EXPLICIT_DEPENDENCIES variable will be TRUE to indicate that
@@ -250,14 +259,23 @@ add_subdirectory(lib/Linker)
 add_subdirectory(lib/Analysis)
 add_subdirectory(lib/Analysis/IPA)
 
-foreach(t ${LLVM_TARGETS_TO_BUILD})
+ set(LLVM_ENUM_ASM_PRINTERS "")
+ foreach(t ${LLVM_TARGETS_TO_BUILD})
   message(STATUS "Targeting ${t}")
   add_subdirectory(lib/Target/${t})
   if( EXISTS ${LLVM_MAIN_SRC_DIR}/lib/Target/${t}/AsmPrinter/CMakeLists.txt )
-    add_subdirectory(lib/Target/${t}/AsmPrinter)
-  endif( EXISTS ${LLVM_MAIN_SRC_DIR}/lib/Target/${t}/AsmPrinter/CMakeLists.txt )
+   add_subdirectory(lib/Target/${t}/AsmPrinter)
+    set(LLVM_ENUM_ASM_PRINTERS 
+        "${LLVM_ENUM_ASM_PRINTERS}LLVM_ASM_PRINTER(${t})\n")
+ endif( EXISTS ${LLVM_MAIN_SRC_DIR}/lib/Target/${t}/AsmPrinter/CMakeLists.txt )
 endforeach(t)
 
+# Produce llvm/Config/AsmPrinters.def
+configure_file(
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/AsmPrinters.def.in
+  ${LLVM_BINARY_DIR}/include/llvm/Config/AsmPrinters.def
+  )
+
 add_subdirectory(lib/ExecutionEngine)
 add_subdirectory(lib/ExecutionEngine/Interpreter)
 add_subdirectory(lib/ExecutionEngine/JIT)
@@ -269,7 +287,10 @@ add_subdirectory(lib/Archive)
 add_subdirectory(projects)
 add_subdirectory(tools)
 
-add_subdirectory(examples)
+option(LLVM_EXAMPLES "Build LLVM example programs." OFF)
+if (LLVM_EXAMPLES)
+  add_subdirectory(examples)
+endif ()
 
 install(DIRECTORY include
   DESTINATION .
diff --git a/Makefile b/Makefile
index 123595dc55ec..e750889ae45c 100644
--- a/Makefile
+++ b/Makefile
@@ -134,6 +134,8 @@ install-libs: install
 #------------------------------------------------------------------------
 FilesToConfig := \
   include/llvm/Config/config.h \
+  include/llvm/Config/Targets.def \
+	include/llvm/Config/AsmPrinters.def \
   include/llvm/Support/DataTypes.h \
   include/llvm/ADT/iterator.h
 FilesToConfigPATH  := $(addprefix $(LLVM_OBJ_ROOT)/,$(FilesToConfig))
diff --git a/Makefile.rules b/Makefile.rules
index 4a77bf536dff..9325ca4b5d1b 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -287,10 +287,8 @@ endif
 
 ifdef ENABLE_COVERAGE
   BuildMode := $(BuildMode)+Coverage
-  # These only go to .NoRelink because otherwise we will end up
-  # linking -lgcov into the .o libraries that get built.
-  CXX.Flags.NoRelink += -ftest-coverage -fprofile-arcs
-  C.Flags.NoRelink   += -ftest-coverage -fprofile-arcs
+  CXX.Flags += -ftest-coverage -fprofile-arcs
+  C.Flags   += -ftest-coverage -fprofile-arcs
 endif
 
 # If DISABLE_ASSERTIONS=1 is specified (make command line or configured),
@@ -315,7 +313,6 @@ endif
 # defined/on.
 ifdef LOADABLE_MODULE
   SHARED_LIBRARY := 1
-  DONT_BUILD_RELINKED := 1
   LINK_LIBS_IN_SHARED := 1
 endif
 
@@ -522,10 +519,10 @@ endif
 #----------------------------------------------------------
 
 ifndef NO_PEDANTIC
-CompileCommonOpts += -pedantic
+CompileCommonOpts += -pedantic -Wno-long-long
 endif
-CompileCommonOpts += -Wall -W -Wwrite-strings -Wno-long-long \
-                     -Wunused -Wno-unused-parameter $(EXTRA_OPTIONS)
+CompileCommonOpts += -Wall -W -Wno-unused-parameter -Wwrite-strings \
+                     $(EXTRA_OPTIONS)
 
 ifeq ($(OS),HP-UX)
   CompileCommonOpts := -D_REENTRANT -D_HPUX_SOURCE
@@ -548,10 +545,8 @@ ifdef UNIVERSAL
   endif
   UNIVERSAL_ARCH_OPTIONS := $(UNIVERSAL_ARCH:%=-arch %)
   CompileCommonOpts += $(UNIVERSAL_ARCH_OPTIONS)
-  Relink.Flags := $(UNIVERSAL_ARCH_OPTIONS)
   ifdef UNIVERSAL_SDK_PATH
     CompileCommonOpts += -isysroot $(UNIVERSAL_SDK_PATH)
-    Relink.Flags      += -isysroot $(UNIVERSAL_SDK_PATH)
   endif
 
   # Building universal cannot compute dependencies automatically.
@@ -582,27 +577,23 @@ CPP.Flags     += $(sort -I$(PROJ_OBJ_DIR) -I$(PROJ_SRC_DIR) \
 	         $(CPP.BaseFlags)
 
 ifeq ($(BUILD_COMPONENT), 1)
-  Compile.C     = $(BUILD_CC) $(CPP.Flags) $(C.Flags) $(C.Flags.NoRelink) \
+  Compile.C     = $(BUILD_CC) $(CPP.Flags) $(C.Flags) \
                   $(TargetCommonOpts) $(CompileCommonOpts) -c
-  Compile.CXX   = $(BUILD_CXX) $(CPP.Flags) $(CXX.Flags) $(CXX.Flags.NoRelink) \
+  Compile.CXX   = $(BUILD_CXX) $(CPP.Flags) $(CXX.Flags) \
                   $(TargetCommonOpts) $(CompileCommonOpts) -c
   Preprocess.CXX= $(BUILD_CXX) $(CPP.Flags) $(TargetCommonOpts) \
-                  $(CompileCommonOpts) $(CXX.Flags) $(CXX.Flags.NoRelink) -E
-  Link          = $(BUILD_CXX) $(CPP.Flags) $(CXX.Flags) $(CXX.Flags.NoRelink) \
+                  $(CompileCommonOpts) $(CXX.Flags) -E
+  Link          = $(BUILD_CXX) $(CPP.Flags) $(CXX.Flags) \
                   $(TargetCommonOpts) $(CompileCommonOpts) $(LD.Flags) $(Strip)
-  Relink        = $(BUILD_CXX) $(CPP.Flags) $(CXX.Flags) $(TargetCommonOpts) \
-                  $(CompileCommonOpts) $(Relink.Flags)
 else
-  Compile.C     = $(CC) $(CPP.Flags) $(C.Flags) $(C.Flags.NoRelink) \
+  Compile.C     = $(CC) $(CPP.Flags) $(C.Flags) \
                   $(TargetCommonOpts) $(CompileCommonOpts) -c
-  Compile.CXX   = $(CXX) $(CPP.Flags) $(CXX.Flags) $(CXX.Flags.NoRelink) \
+  Compile.CXX   = $(CXX) $(CPP.Flags) $(CXX.Flags) \
                   $(TargetCommonOpts) $(CompileCommonOpts) -c
   Preprocess.CXX= $(CXX) $(CPP.Flags) $(TargetCommonOpts) \
-                  $(CompileCommonOpts) $(CXX.Flags) $(CXX.Flags.NoRelink) -E
-  Link          = $(CXX) $(CPP.Flags) $(CXX.Flags) $(CXX.Flags.NoRelink) \
+                  $(CompileCommonOpts) $(CXX.Flags) -E
+  Link          = $(CXX) $(CPP.Flags) $(CXX.Flags) \
                   $(TargetCommonOpts)  $(CompileCommonOpts) $(LD.Flags) $(Strip)
-  Relink        = $(CXX) $(CPP.Flags) $(CXX.Flags) $(TargetCommonOpts) \
-                  $(CompileCommonOpts) $(Relink.Flags)
 endif
 
 BCCompile.C   = $(LLVMGCCWITHPATH) $(CPP.Flags) $(C.Flags) \
@@ -1048,48 +1039,13 @@ endif
 endif
 
 #---------------------------------------------------------
-# ReLinked Library Targets:
-#   If the user explicitly requests a relinked library with
-#   BUILD_RELINKED, provide it.  Otherwise, if they specify
-#   neither of BUILD_ARCHIVE or DONT_BUILD_RELINKED, give
-#   them one.
+# Library Targets:
+#   If neither BUILD_ARCHIVE or LOADABLE_MODULE are specified, default to
+#   building an archive.
 #---------------------------------------------------------
 ifndef BUILD_ARCHIVE
-ifndef DONT_BUILD_RELINKED
-BUILD_RELINKED = 1
-endif
-endif
-
-ifdef BUILD_RELINKED
-
-all-local:: $(LibName.O)
-
-$(LibName.O): $(ObjectsO) $(LibDir)/.dir
-	$(Echo) Linking $(BuildMode) Object Library $(notdir $@)
-	$(Verb) $(Relink) -r -nodefaultlibs -nostdlib -nostartfiles -o $@ $(ObjectsO)
-
-clean-local::
-ifneq ($(strip $(LibName.O)),)
-	-$(Verb) $(RM) -f $(LibName.O)
-endif
-
-ifdef NO_INSTALL
-install-local::
-	$(Echo) Install circumvented with NO_INSTALL
-uninstall-local::
-	$(Echo) Uninstall circumvented with NO_INSTALL
-else
-DestRelinkedLib = $(PROJ_libdir)/$(LIBRARYNAME).o
-
-install-local:: $(DestRelinkedLib)
-
-$(DestRelinkedLib): $(LibName.O) $(PROJ_libdir)
-	$(Echo) Installing $(BuildMode) Object Library $(DestRelinkedLib)
-	$(Verb) $(INSTALL) $(LibName.O) $(DestRelinkedLib)
-
-uninstall-local::
-	$(Echo) Uninstalling $(BuildMode) Object Library $(DestRelinkedLib)
-	-$(Verb) $(RM) -f $(DestRelinkedLib)
+ifndef LOADABLE_MODULE
+BUILD_ARCHIVE = 1
 endif
 endif
 
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index 64b78a49eb6d..bd05891a9dc7 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -234,6 +234,13 @@ if test "$llvm_cv_target_arch" = "Unknown" ; then
   AC_MSG_WARN([Configuring LLVM for an unknown target archicture])
 fi
 
+# Determine the LLVM native architecture for the target
+case "$llvm_cv_target_arch" in
+    x86)     LLVM_NATIVE_ARCH="X86" ;;
+    x86_64)  LLVM_NATIVE_ARCH="X86" ;;
+    *)       LLVM_NATIVE_ARCH="$llvm_cv_target_arch" ;;
+esac
+  
 dnl Define a substitution, ARCH, for the target architecture
 AC_SUBST(ARCH,$llvm_cv_target_arch)
 
@@ -436,6 +443,28 @@ case "$enableval" in
 esac
 AC_SUBST(TARGETS_TO_BUILD,$TARGETS_TO_BUILD)
 
+# Determine whether we are building LLVM support for the native architecture.
+# If so, define LLVM_NATIVE_ARCH to that LLVM target.
+for a_target in $TARGETS_TO_BUILD; do
+  if test "$a_target" = "$LLVM_NATIVE_ARCH"; then
+    AC_DEFINE_UNQUOTED(LLVM_NATIVE_ARCH,$LLVM_NATIVE_ARCH,
+      [LLVM architecture name for the native architecture, if available])
+  fi
+done
+
+# Build the LLVM_TARGET and LLVM_ASM_PRINTER macro uses for 
+# Targets.def and AsmPrinters.def.
+LLVM_ENUM_TARGETS=""
+LLVM_ENUM_ASM_PRINTERS=""
+for target_to_build in $TARGETS_TO_BUILD; do
+  LLVM_ENUM_TARGETS="LLVM_TARGET($target_to_build) $LLVM_ENUM_TARGETS"
+  if test -f ${srcdir}/lib/Target/${target_to_build}/AsmPrinter/Makefile ; then
+    LLVM_ENUM_ASM_PRINTERS="LLVM_ASM_PRINTER($target_to_build) $LLVM_ENUM_ASM_PRINTERS";
+  fi
+done
+AC_SUBST(LLVM_ENUM_TARGETS)
+AC_SUBST(LLVM_ENUM_ASM_PRINTERS)
+
 dnl Prevent the CBackend from using printf("%a") for floating point so older
 dnl C compilers that cannot deal with the 0x0p+0 hex floating point format
 dnl can still compile the CBE's output
@@ -796,6 +825,9 @@ if test "$ENABLE_THREADS" -eq 1 ; then
   AC_SEARCH_LIBS(pthread_mutex_lock,pthread,
                  AC_DEFINE([HAVE_PTHREAD_MUTEX_LOCK],[1],
                            [Have pthread_mutex_lock]))
+  AC_SEARCH_LIBS(pthread_rwlock_init,pthread,
+                 AC_DEFINE([HAVE_PTHREAD_RWLOCK_INIT],[1],
+                 [Have pthread_rwlock_init]))
 fi
 
 dnl Allow extra x86-disassembler library
@@ -919,6 +951,8 @@ AC_LINK_IFELSE(
         volatile unsigned long val = 1;
         __sync_synchronize();
         __sync_val_compare_and_swap(&val, 1, 0);
+        __sync_add_and_fetch(&val, 1);
+        __sync_sub_and_fetch(&val, 1);
         return 0;
       }
     ]]),
@@ -1108,6 +1142,8 @@ dnl you MUST also update Makefile.rules so that the variable FilesToConfig
 dnl contains the same list of files as AC_CONFIG_HEADERS below. This ensures the
 dnl files can be updated automatically when their *.in sources change.
 AC_CONFIG_HEADERS([include/llvm/Config/config.h])
+AC_CONFIG_FILES([include/llvm/Config/Targets.def])
+AC_CONFIG_FILES([include/llvm/Config/AsmPrinters.def])
 AC_CONFIG_HEADERS([include/llvm/Support/DataTypes.h])
 AC_CONFIG_HEADERS([include/llvm/ADT/iterator.h])
 
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index c18ac44172f9..c460fa2aa058 100755
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -78,6 +78,50 @@ include(GetTargetTriple)
 get_target_triple(LLVM_HOSTTRIPLE)
 message(STATUS "LLVM_HOSTTRIPLE: ${LLVM_HOSTTRIPLE}")
 
+# Determine the native architecture.
+# FIXME: this will have to change for cross-compiling.
+string(REGEX MATCH "^[^-]*" LLVM_NATIVE_ARCH ${LLVM_HOSTTRIPLE})
+if (LLVM_NATIVE_ARCH MATCHES "i[2-6]86")
+  set(LLVM_NATIVE_ARCH X86)
+elseif (LLVM_NATIVE_ARCH STREQUAL amd64)
+  set(LLVM_NATIVE_ARCH X86)
+elseif (LLVM_NATIVE_ARCH STREQUAL x86_64)
+  set(LLVM_NATIVE_ARCH X86)
+elseif (LLVM_NATIVE_ARCH MATCHES "sparc")
+  set(LLVM_NATIVE_ARCH Sparc)
+elseif (LLVM_NATIVE_ARCH MATCHES "powerpc")
+  set(LLVM_NATIVE_ARCH PowerPC)
+elseif (LLVM_NATIVE_ARCH MATCHES "alpha")
+  set(LLVM_NATIVE_ARCH Alpha)
+elseif (LLVM_NATIVE_ARCH MATCHES "ia64")
+  set(LLVM_NATIVE_ARCH IA64)
+elseif (LLVM_NATIVE_ARCH MATCHES "arm")
+  set(LLVM_NATIVE_ARCH ARM)
+elseif (LLVM_NATIVE_ARCH MATCHES "mips")
+  set(LLVM_NATIVE_ARCH Mips)
+elseif (LLVM_NATIVE_ARCH MATCHES "pic16")
+  set(LLVM_NATIVE_ARCH "PIC16")
+elseif (LLVM_NATIVE_ARCH MATCHES "xcore")
+  set(LLVM_NATIVE_ARCH XCore)
+elseif (LLVM_NATIVE_ARCH MATCHES "msp430")
+  set(LLVM_NATIVE_ARCH MSP430)
+else ()
+  message(STATUS 
+    "Unknown architecture ${LLVM_NATIVE_ARCH}; lli will not JIT code")
+  set(LLVM_NATIVE_ARCH)
+endif ()
+  
+if (LLVM_NATIVE_ARCH)
+  list(FIND LLVM_TARGETS_TO_BUILD ${LLVM_NATIVE_ARCH} NATIVE_ARCH_IDX)
+  if (NATIVE_ARCH_IDX EQUAL -1)
+    message(STATUS 
+      "Native target ${LLVM_NATIVE_ARCH} is not selected; lli will not JIT code")
+    set(LLVM_NATIVE_ARCH)
+  else ()
+    message(STATUS "Native target architecture is ${LLVM_NATIVE_ARCH}")
+  endif ()
+endif()
+
 if( MINGW )
   set(HAVE_LIBIMAGEHLP 1)
   set(HAVE_LIBPSAPI 1)
diff --git a/configure b/configure
index 9a30c297cd37..3c9925153abe 100755
--- a/configure
+++ b/configure
@@ -841,6 +841,8 @@ ENABLE_DOXYGEN
 ENABLE_THREADS
 ENABLE_PIC
 TARGETS_TO_BUILD
+LLVM_ENUM_TARGETS
+LLVM_ENUM_ASM_PRINTERS
 ENABLE_CBE_PRINTF_A
 EXTRA_OPTIONS
 BINUTILS_INCDIR
@@ -2401,6 +2403,13 @@ if test "$llvm_cv_target_arch" = "Unknown" ; then
 echo "$as_me: WARNING: Configuring LLVM for an unknown target archicture" >&2;}
 fi
 
+# Determine the LLVM native architecture for the target
+case "$llvm_cv_target_arch" in
+    x86)     LLVM_NATIVE_ARCH="X86" ;;
+    x86_64)  LLVM_NATIVE_ARCH="X86" ;;
+    *)       LLVM_NATIVE_ARCH="$llvm_cv_target_arch" ;;
+esac
+
 ARCH=$llvm_cv_target_arch
 
 
@@ -4959,6 +4968,31 @@ esac
 TARGETS_TO_BUILD=$TARGETS_TO_BUILD
 
 
+# Determine whether we are building LLVM support for the native architecture.
+# If so, define LLVM_NATIVE_ARCH to that LLVM target.
+for a_target in $TARGETS_TO_BUILD; do
+  if test "$a_target" = "$LLVM_NATIVE_ARCH"; then
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_NATIVE_ARCH $LLVM_NATIVE_ARCH
+_ACEOF
+
+  fi
+done
+
+# Build the LLVM_TARGET and LLVM_ASM_PRINTER macro uses for
+# Targets.def and AsmPrinters.def.
+LLVM_ENUM_TARGETS=""
+LLVM_ENUM_ASM_PRINTERS=""
+for target_to_build in $TARGETS_TO_BUILD; do
+  LLVM_ENUM_TARGETS="LLVM_TARGET($target_to_build) $LLVM_ENUM_TARGETS"
+  if test -f ${srcdir}/lib/Target/${target_to_build}/AsmPrinter/Makefile ; then
+    LLVM_ENUM_ASM_PRINTERS="LLVM_ASM_PRINTER($target_to_build) $LLVM_ENUM_ASM_PRINTERS";
+  fi
+done
+
+
+
 # Check whether --enable-cbe-printf-a was given.
 if test "${enable_cbe_printf_a+set}" = set; then
   enableval=$enable_cbe_printf_a;
@@ -10594,7 +10628,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 10597 "configure"
+#line 10631 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -12738,7 +12772,7 @@ ia64-*-hpux*)
   ;;
 *-*-irix6*)
   # Find out which ABI we are using.
-  echo '#line 12741 "configure"' > conftest.$ac_ext
+  echo '#line 12775 "configure"' > conftest.$ac_ext
   if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
   (eval $ac_compile) 2>&5
   ac_status=$?
@@ -14456,11 +14490,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:14459: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:14493: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:14463: \$? = $ac_status" >&5
+   echo "$as_me:14497: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -14724,11 +14758,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:14727: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:14761: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:14731: \$? = $ac_status" >&5
+   echo "$as_me:14765: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -14828,11 +14862,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:14831: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:14865: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>out/conftest.err)
    ac_status=$?
    cat out/conftest.err >&5
-   echo "$as_me:14835: \$? = $ac_status" >&5
+   echo "$as_me:14869: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s out/conftest2.$ac_objext
    then
      # The compiler can only warn and ignore the option if not recognized
@@ -17280,7 +17314,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 17283 "configure"
+#line 17317 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -17380,7 +17414,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 17383 "configure"
+#line 17417 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -19748,11 +19782,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:19751: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:19785: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:19755: \$? = $ac_status" >&5
+   echo "$as_me:19789: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -19852,11 +19886,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:19855: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:19889: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>out/conftest.err)
    ac_status=$?
    cat out/conftest.err >&5
-   echo "$as_me:19859: \$? = $ac_status" >&5
+   echo "$as_me:19893: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s out/conftest2.$ac_objext
    then
      # The compiler can only warn and ignore the option if not recognized
@@ -21422,11 +21456,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:21425: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:21459: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:21429: \$? = $ac_status" >&5
+   echo "$as_me:21463: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -21526,11 +21560,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:21529: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:21563: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>out/conftest.err)
    ac_status=$?
    cat out/conftest.err >&5
-   echo "$as_me:21533: \$? = $ac_status" >&5
+   echo "$as_me:21567: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s out/conftest2.$ac_objext
    then
      # The compiler can only warn and ignore the option if not recognized
@@ -23761,11 +23795,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:23764: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:23798: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:23768: \$? = $ac_status" >&5
+   echo "$as_me:23802: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -24029,11 +24063,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:24032: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:24066: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:24036: \$? = $ac_status" >&5
+   echo "$as_me:24070: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -24133,11 +24167,11 @@ else
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:24136: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:24170: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>out/conftest.err)
    ac_status=$?
    cat out/conftest.err >&5
-   echo "$as_me:24140: \$? = $ac_status" >&5
+   echo "$as_me:24174: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s out/conftest2.$ac_objext
    then
      # The compiler can only warn and ignore the option if not recognized
@@ -27896,6 +27930,109 @@ _ACEOF
 
 fi
 
+  { echo "$as_me:$LINENO: checking for library containing pthread_rwlock_init" >&5
+echo $ECHO_N "checking for library containing pthread_rwlock_init... $ECHO_C" >&6; }
+if test "${ac_cv_search_pthread_rwlock_init+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char pthread_rwlock_init ();
+int
+main ()
+{
+return pthread_rwlock_init ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' pthread; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_search_pthread_rwlock_init=$ac_res
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext
+  if test "${ac_cv_search_pthread_rwlock_init+set}" = set; then
+  break
+fi
+done
+if test "${ac_cv_search_pthread_rwlock_init+set}" = set; then
+  :
+else
+  ac_cv_search_pthread_rwlock_init=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_search_pthread_rwlock_init" >&5
+echo "${ECHO_T}$ac_cv_search_pthread_rwlock_init" >&6; }
+ac_res=$ac_cv_search_pthread_rwlock_init
+if test "$ac_res" != no; then
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_PTHREAD_RWLOCK_INIT 1
+_ACEOF
+
+fi
+
 fi
 
 
@@ -33642,6 +33779,8 @@ int main() {
         volatile unsigned long val = 1;
         __sync_synchronize();
         __sync_val_compare_and_swap(&val, 1, 0);
+        __sync_add_and_fetch(&val, 1);
+        __sync_sub_and_fetch(&val, 1);
         return 0;
       }
 
@@ -34120,6 +34259,10 @@ fi
 
 ac_config_headers="$ac_config_headers include/llvm/Config/config.h"
 
+ac_config_files="$ac_config_files include/llvm/Config/Targets.def"
+
+ac_config_files="$ac_config_files include/llvm/Config/AsmPrinters.def"
+
 ac_config_headers="$ac_config_headers include/llvm/Support/DataTypes.h"
 
 ac_config_headers="$ac_config_headers include/llvm/ADT/iterator.h"
@@ -34746,6 +34889,8 @@ for ac_config_target in $ac_config_targets
 do
   case $ac_config_target in
     "include/llvm/Config/config.h") CONFIG_HEADERS="$CONFIG_HEADERS include/llvm/Config/config.h" ;;
+    "include/llvm/Config/Targets.def") CONFIG_FILES="$CONFIG_FILES include/llvm/Config/Targets.def" ;;
+    "include/llvm/Config/AsmPrinters.def") CONFIG_FILES="$CONFIG_FILES include/llvm/Config/AsmPrinters.def" ;;
     "include/llvm/Support/DataTypes.h") CONFIG_HEADERS="$CONFIG_HEADERS include/llvm/Support/DataTypes.h" ;;
     "include/llvm/ADT/iterator.h") CONFIG_HEADERS="$CONFIG_HEADERS include/llvm/ADT/iterator.h" ;;
     "Makefile.config") CONFIG_FILES="$CONFIG_FILES Makefile.config" ;;
@@ -34914,6 +35059,8 @@ ENABLE_DOXYGEN!$ENABLE_DOXYGEN$ac_delim
 ENABLE_THREADS!$ENABLE_THREADS$ac_delim
 ENABLE_PIC!$ENABLE_PIC$ac_delim
 TARGETS_TO_BUILD!$TARGETS_TO_BUILD$ac_delim
+LLVM_ENUM_TARGETS!$LLVM_ENUM_TARGETS$ac_delim
+LLVM_ENUM_ASM_PRINTERS!$LLVM_ENUM_ASM_PRINTERS$ac_delim
 ENABLE_CBE_PRINTF_A!$ENABLE_CBE_PRINTF_A$ac_delim
 EXTRA_OPTIONS!$EXTRA_OPTIONS$ac_delim
 BINUTILS_INCDIR!$BINUTILS_INCDIR$ac_delim
@@ -34924,8 +35071,6 @@ NM!$NM$ac_delim
 ifGNUmake!$ifGNUmake$ac_delim
 LN_S!$LN_S$ac_delim
 CMP!$CMP$ac_delim
-CP!$CP$ac_delim
-DATE!$DATE$ac_delim
 _ACEOF
 
   if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then
@@ -34967,6 +35112,8 @@ _ACEOF
 ac_delim='%!_!# '
 for ac_last_try in false false false false false :; do
   cat >conf$$subs.sed <<_ACEOF
+CP!$CP$ac_delim
+DATE!$DATE$ac_delim
 FIND!$FIND$ac_delim
 MKDIR!$MKDIR$ac_delim
 MV!$MV$ac_delim
@@ -35048,7 +35195,7 @@ LIBOBJS!$LIBOBJS$ac_delim
 LTLIBOBJS!$LTLIBOBJS$ac_delim
 _ACEOF
 
-  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 79; then
+  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 81; then
     break
   elif $ac_last_try; then
     { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
diff --git a/docs/CodeGenerator.html b/docs/CodeGenerator.html
index 7fada98278d4..25101fc31bf6 100644
--- a/docs/CodeGenerator.html
+++ b/docs/CodeGenerator.html
@@ -1773,6 +1773,8 @@ define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
   <li><b>i386-pc-mingw32msvc</b> &mdash; MingW crosscompiler on Linux</li>
 
   <li><b>i686-apple-darwin*</b> &mdash; Apple Darwin on X86</li>
+
+  <li><b>x86_64-unknown-linux-gnu</b> &mdash; Linux</li>
 </ul>
 
 </div>
@@ -2116,7 +2118,7 @@ MOVSX32rm16 -&gt; movsx, 32-bit register, 16-bit memory
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-05-13 23:33:08 +0200 (Wed, 13 May 2009) $
+  Last modified: $Date: 2009-06-15 12:17:44 +0000 (Mon, 15 Jun 2009) $
 </address>
 
 </body>
diff --git a/docs/CommandLine.html b/docs/CommandLine.html
index c4e567b43020..d6cf48ce5111 100644
--- a/docs/CommandLine.html
+++ b/docs/CommandLine.html
@@ -1116,9 +1116,9 @@ command-line parser sees <b><tt>cl::init</tt></b>, it knows where to put the
 initial value. (You will get an error at runtime if you don't put them in
 the right order.)</li>
 
-<li><a name="cl::location">The <b><tt>cl::location</tt></b></a> attribute where to
-store the value for a parsed command line option if using external storage.  See
-the section on <a href="#storage">Internal vs External Storage</a> for more
+<li><a name="cl::location">The <b><tt>cl::location</tt></b></a> attribute where
+to store the value for a parsed command line option if using external storage.
+See the section on <a href="#storage">Internal vs External Storage</a> for more
 information.</li>
 
 <li><a name="cl::aliasopt">The <b><tt>cl::aliasopt</tt></b></a> attribute
@@ -1146,6 +1146,11 @@ specify macro options where the option name doesn't equal the enum name.  For
 this macro, the first argument is the enum value, the second is the flag name,
 and the second is the description.</li>
 
+</ol>
+
+You will get a compile time error if you try to use cl::values with a parser
+that does not support it.</li>
+
 <li><a name="cl::multi_val">The <b><tt>cl::multi_val</tt></b></a>
 attribute specifies that this option takes has multiple values
 (example: <tt>-sectalign segname sectname sectvalue</tt>). This
@@ -1156,12 +1161,6 @@ types). It is allowed to use all of the usual modifiers on
 multi-valued options (besides <tt>cl::ValueDisallowed</tt>,
 obviously).</li>
 
-
-</ol>
-
-You will get a compile time error if you try to use cl::values with a parser
-that does not support it.</li>
-
 </ul>
 
 </div>
@@ -1973,7 +1972,7 @@ tutorial.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-04-08 05:43:51 +0200 (Wed, 08 Apr 2009) $
+  Last modified: $Date: 2009-06-17 03:09:39 +0000 (Wed, 17 Jun 2009) $
 </address>
 
 </body>
diff --git a/docs/CompilerDriver.html b/docs/CompilerDriver.html
index ab436974b531..5b42148bdb3b 100644
--- a/docs/CompilerDriver.html
+++ b/docs/CompilerDriver.html
@@ -21,21 +21,22 @@ The ReST source lives in the directory 'tools/llvmc/doc'. -->
 <li><a class="reference internal" href="#compiling-with-llvmc" id="id5">Compiling with LLVMC</a></li>
 <li><a class="reference internal" href="#predefined-options" id="id6">Predefined options</a></li>
 <li><a class="reference internal" href="#compiling-llvmc-plugins" id="id7">Compiling LLVMC plugins</a></li>
-<li><a class="reference internal" href="#customizing-llvmc-the-compilation-graph" id="id8">Customizing LLVMC: the compilation graph</a></li>
-<li><a class="reference internal" href="#describing-options" id="id9">Describing options</a><ul>
-<li><a class="reference internal" href="#external-options" id="id10">External options</a></li>
+<li><a class="reference internal" href="#compiling-standalone-llvmc-based-drivers" id="id8">Compiling standalone LLVMC-based drivers</a></li>
+<li><a class="reference internal" href="#customizing-llvmc-the-compilation-graph" id="id9">Customizing LLVMC: the compilation graph</a></li>
+<li><a class="reference internal" href="#describing-options" id="id10">Describing options</a><ul>
+<li><a class="reference internal" href="#external-options" id="id11">External options</a></li>
 </ul>
 </li>
-<li><a class="reference internal" href="#conditional-evaluation" id="id11">Conditional evaluation</a></li>
-<li><a class="reference internal" href="#writing-a-tool-description" id="id12">Writing a tool description</a><ul>
-<li><a class="reference internal" href="#actions" id="id13">Actions</a></li>
+<li><a class="reference internal" href="#conditional-evaluation" id="id12">Conditional evaluation</a></li>
+<li><a class="reference internal" href="#writing-a-tool-description" id="id13">Writing a tool description</a><ul>
+<li><a class="reference internal" href="#actions" id="id14">Actions</a></li>
 </ul>
 </li>
-<li><a class="reference internal" href="#language-map" id="id14">Language map</a></li>
-<li><a class="reference internal" href="#more-advanced-topics" id="id15">More advanced topics</a><ul>
-<li><a class="reference internal" href="#hooks-and-environment-variables" id="id16">Hooks and environment variables</a></li>
-<li><a class="reference internal" href="#how-plugins-are-loaded" id="id17">How plugins are loaded</a></li>
-<li><a class="reference internal" href="#debugging" id="id18">Debugging</a></li>
+<li><a class="reference internal" href="#language-map" id="id15">Language map</a></li>
+<li><a class="reference internal" href="#more-advanced-topics" id="id16">More advanced topics</a><ul>
+<li><a class="reference internal" href="#hooks-and-environment-variables" id="id17">Hooks and environment variables</a></li>
+<li><a class="reference internal" href="#how-plugins-are-loaded" id="id18">How plugins are loaded</a></li>
+<li><a class="reference internal" href="#debugging" id="id19">Debugging</a></li>
 </ul>
 </li>
 </ul>
@@ -56,7 +57,7 @@ abstract graph. The structure of this graph is completely determined
 by plugins, which can be either statically or dynamically linked. This
 makes it possible to easily adapt LLVMC for other purposes - for
 example, as a build tool for game resources.</p>
-<p>Because LLVMC employs <a class="reference external" href="http://llvm.cs.uiuc.edu/docs/TableGenFundamentals.html">TableGen</a> as its configuration language, you
+<p>Because LLVMC employs <a class="reference external" href="http://llvm.org/docs/TableGenFundamentals.html">TableGen</a> as its configuration language, you
 need to be familiar with it to customize LLVMC.</p>
 </div>
 <div class="section" id="compiling-with-llvmc">
@@ -70,12 +71,12 @@ $ llvmc -O3 -Wall hello.cpp
 $ ./a.out
 hello
 </pre>
-<p>One nice feature of LLVMC is that one doesn't have to distinguish
-between different compilers for different languages (think <tt class="docutils literal"><span class="pre">g++</span></tt> and
-<tt class="docutils literal"><span class="pre">gcc</span></tt>) - the right toolchain is chosen automatically based on input
-language names (which are, in turn, determined from file
-extensions). If you want to force files ending with &quot;.c&quot; to compile as
-C++, use the <tt class="docutils literal"><span class="pre">-x</span></tt> option, just like you would do it with <tt class="docutils literal"><span class="pre">gcc</span></tt>:</p>
+<p>One nice feature of LLVMC is that one doesn't have to distinguish between
+different compilers for different languages (think <tt class="docutils literal"><span class="pre">g++</span></tt> vs.  <tt class="docutils literal"><span class="pre">gcc</span></tt>) - the
+right toolchain is chosen automatically based on input language names (which
+are, in turn, determined from file extensions). If you want to force files
+ending with &quot;.c&quot; to compile as C++, use the <tt class="docutils literal"><span class="pre">-x</span></tt> option, just like you would
+do it with <tt class="docutils literal"><span class="pre">gcc</span></tt>:</p>
 <pre class="literal-block">
 $ # hello.c is really a C++ file
 $ llvmc -x c++ hello.c
@@ -110,16 +111,17 @@ until the next -x option.</li>
 <li><tt class="docutils literal"><span class="pre">-v</span></tt> - Enable verbose mode, i.e. print out all executed commands.</li>
 <li><tt class="docutils literal"><span class="pre">--check-graph</span></tt> - Check the compilation for common errors like mismatched
 output/input language names, multiple default edges and cycles. Because of
-plugins, these checks can't be performed at compile-time. Exit with code zero if
-no errors were found, and return the number of found errors otherwise. Hidden
-option, useful for debugging LLVMC plugins.</li>
+plugins, these checks can't be performed at compile-time. Exit with code zero
+if no errors were found, and return the number of found errors
+otherwise. Hidden option, useful for debugging LLVMC plugins.</li>
 <li><tt class="docutils literal"><span class="pre">--view-graph</span></tt> - Show a graphical representation of the compilation graph
 and exit. Requires that you have <tt class="docutils literal"><span class="pre">dot</span></tt> and <tt class="docutils literal"><span class="pre">gv</span></tt> programs installed. Hidden
 option, useful for debugging LLVMC plugins.</li>
 <li><tt class="docutils literal"><span class="pre">--write-graph</span></tt> - Write a <tt class="docutils literal"><span class="pre">compilation-graph.dot</span></tt> file in the current
 directory with the compilation graph description in Graphviz format (identical
-to the file used by the <tt class="docutils literal"><span class="pre">--view-graph</span></tt> option). The <tt class="docutils literal"><span class="pre">-o</span></tt> option can be used
-to set the output file name. Hidden option, useful for debugging LLVMC plugins.</li>
+to the file used by the <tt class="docutils literal"><span class="pre">--view-graph</span></tt> option). The <tt class="docutils literal"><span class="pre">-o</span></tt> option can be
+used to set the output file name. Hidden option, useful for debugging LLVMC
+plugins.</li>
 <li><tt class="docutils literal"><span class="pre">--save-temps</span></tt> - Write temporary files to the current directory
 and do not delete them on exit. Hidden option, useful for debugging.</li>
 <li><tt class="docutils literal"><span class="pre">--help</span></tt>, <tt class="docutils literal"><span class="pre">--help-hidden</span></tt>, <tt class="docutils literal"><span class="pre">--version</span></tt> - These options have
@@ -154,33 +156,58 @@ generic:</p>
 <pre class="literal-block">
 $ mv Simple.td MyPlugin.td
 </pre>
-<p>Note that the plugin source directory must be placed under
-<tt class="docutils literal"><span class="pre">$LLVMC_DIR/plugins</span></tt> to make use of the existing build
-infrastructure. To build a version of the LLVMC executable called
-<tt class="docutils literal"><span class="pre">mydriver</span></tt> with your plugin compiled in, use the following command:</p>
-<pre class="literal-block">
-$ cd $LLVMC_DIR
-$ make BUILTIN_PLUGINS=MyPlugin DRIVER_NAME=mydriver
-</pre>
 <p>To build your plugin as a dynamic library, just <tt class="docutils literal"><span class="pre">cd</span></tt> to its source
 directory and run <tt class="docutils literal"><span class="pre">make</span></tt>. The resulting file will be called
-<tt class="docutils literal"><span class="pre">LLVMC$(LLVMC_PLUGIN).$(DLL_EXTENSION)</span></tt> (in our case,
-<tt class="docutils literal"><span class="pre">LLVMCMyPlugin.so</span></tt>). This library can be then loaded in with the
+<tt class="docutils literal"><span class="pre">plugin_llvmc_$(LLVMC_PLUGIN).$(DLL_EXTENSION)</span></tt> (in our case,
+<tt class="docutils literal"><span class="pre">plugin_llvmc_MyPlugin.so</span></tt>). This library can be then loaded in with the
 <tt class="docutils literal"><span class="pre">-load</span></tt> option. Example:</p>
 <pre class="literal-block">
 $ cd $LLVMC_DIR/plugins/Simple
 $ make
-$ llvmc -load $LLVM_DIR/Release/lib/LLVMCSimple.so
+$ llvmc -load $LLVM_DIR/Release/lib/plugin_llvmc_Simple.so
+</pre>
+</div>
+<div class="section" id="compiling-standalone-llvmc-based-drivers">
+<h1><a class="toc-backref" href="#id8">Compiling standalone LLVMC-based drivers</a></h1>
+<p>By default, the <tt class="docutils literal"><span class="pre">llvmc</span></tt> executable consists of a driver core plus several
+statically linked plugins (<tt class="docutils literal"><span class="pre">Base</span></tt> and <tt class="docutils literal"><span class="pre">Clang</span></tt> at the moment). You can
+produce a standalone LLVMC-based driver executable by linking the core with your
+own plugins. The recommended way to do this is by starting with the provided
+<tt class="docutils literal"><span class="pre">Skeleton</span></tt> example (<tt class="docutils literal"><span class="pre">$LLVMC_DIR/example/Skeleton</span></tt>):</p>
+<pre class="literal-block">
+$ cd $LLVMC_DIR/example/
+$ cp -r Skeleton mydriver
+$ cd mydriver
+$ vim Makefile
+[...]
+$ make
+</pre>
+<p>If you're compiling LLVM with different source and object directories, then you
+must perform the following additional steps before running <tt class="docutils literal"><span class="pre">make</span></tt>:</p>
+<pre class="literal-block">
+# LLVMC_SRC_DIR = $LLVM_SRC_DIR/tools/llvmc/
+# LLVMC_OBJ_DIR = $LLVM_OBJ_DIR/tools/llvmc/
+$ cp $LLVMC_SRC_DIR/example/mydriver/Makefile \
+  $LLVMC_OBJ_DIR/example/mydriver/
+$ cd $LLVMC_OBJ_DIR/example/mydriver
+$ make
+</pre>
+<p>Another way to do the same thing is by using the following command:</p>
+<pre class="literal-block">
+$ cd $LLVMC_DIR
+$ make LLVMC_BUILTIN_PLUGINS=MyPlugin LLVMC_BASED_DRIVER_NAME=mydriver
 </pre>
+<p>This works with both srcdir == objdir and srcdir != objdir, but assumes that the
+plugin source directory was placed under <tt class="docutils literal"><span class="pre">$LLVMC_DIR/plugins</span></tt>.</p>
 <p>Sometimes, you will want a 'bare-bones' version of LLVMC that has no
 built-in plugins. It can be compiled with the following command:</p>
 <pre class="literal-block">
 $ cd $LLVMC_DIR
-$ make BUILTIN_PLUGINS=&quot;&quot;
+$ make LLVMC_BUILTIN_PLUGINS=&quot;&quot;
 </pre>
 </div>
 <div class="section" id="customizing-llvmc-the-compilation-graph">
-<h1><a class="toc-backref" href="#id8">Customizing LLVMC: the compilation graph</a></h1>
+<h1><a class="toc-backref" href="#id9">Customizing LLVMC: the compilation graph</a></h1>
 <p>Each TableGen configuration file should include the common
 definitions:</p>
 <pre class="literal-block">
@@ -248,7 +275,7 @@ debugging), run <tt class="docutils literal"><span class="pre">llvmc</span> <spa
 <tt class="docutils literal"><span class="pre">gsview</span></tt> installed for this to work properly.</p>
 </div>
 <div class="section" id="describing-options">
-<h1><a class="toc-backref" href="#id9">Describing options</a></h1>
+<h1><a class="toc-backref" href="#id10">Describing options</a></h1>
 <p>Command-line options that the plugin supports are defined by using an
 <tt class="docutils literal"><span class="pre">OptionList</span></tt>:</p>
 <pre class="literal-block">
@@ -317,7 +344,7 @@ the <tt class="docutils literal"><span class="pre">one_or_more</span></tt> and <
 </li>
 </ul>
 <div class="section" id="external-options">
-<h2><a class="toc-backref" href="#id10">External options</a></h2>
+<h2><a class="toc-backref" href="#id11">External options</a></h2>
 <p>Sometimes, when linking several plugins together, one plugin needs to
 access options defined in some other plugin. Because of the way
 options are implemented, such options must be marked as
@@ -332,7 +359,7 @@ for. Example:</p>
 </div>
 </div>
 <div class="section" id="conditional-evaluation">
-<span id="case"></span><h1><a class="toc-backref" href="#id11">Conditional evaluation</a></h1>
+<span id="case"></span><h1><a class="toc-backref" href="#id12">Conditional evaluation</a></h1>
 <p>The 'case' construct is the main means by which programmability is
 achieved in LLVMC. It can be used to calculate edge weights, program
 actions and modify the shell commands to be executed. The 'case'
@@ -412,7 +439,7 @@ one of its arguments returns true. Example: <tt class="docutils literal"><span c
 </ul>
 </div>
 <div class="section" id="writing-a-tool-description">
-<h1><a class="toc-backref" href="#id12">Writing a tool description</a></h1>
+<h1><a class="toc-backref" href="#id13">Writing a tool description</a></h1>
 <p>As was said earlier, nodes in the compilation graph represent tools,
 which are described separately. A tool definition looks like this
 (taken from the <tt class="docutils literal"><span class="pre">include/llvm/CompilerDriver/Tools.td</span></tt> file):</p>
@@ -454,7 +481,7 @@ below).</li>
 </li>
 </ul>
 <div class="section" id="actions">
-<h2><a class="toc-backref" href="#id13">Actions</a></h2>
+<h2><a class="toc-backref" href="#id14">Actions</a></h2>
 <p>A tool often needs to react to command-line options, and this is
 precisely what the <tt class="docutils literal"><span class="pre">actions</span></tt> property is for. The next example
 illustrates this feature:</p>
@@ -515,7 +542,7 @@ Example: <tt class="docutils literal"><span class="pre">(unpack_values</span> <s
 </div>
 </div>
 <div class="section" id="language-map">
-<h1><a class="toc-backref" href="#id14">Language map</a></h1>
+<h1><a class="toc-backref" href="#id15">Language map</a></h1>
 <p>If you are adding support for a new language to LLVMC, you'll need to
 modify the language map, which defines mappings from file extensions
 to language names. It is used to choose the proper toolchain(s) for a
@@ -538,9 +565,9 @@ multiple output languages, for nodes &quot;inside&quot; the graph the input and
 output languages should match. This is enforced at compile-time.</p>
 </div>
 <div class="section" id="more-advanced-topics">
-<h1><a class="toc-backref" href="#id15">More advanced topics</a></h1>
+<h1><a class="toc-backref" href="#id16">More advanced topics</a></h1>
 <div class="section" id="hooks-and-environment-variables">
-<span id="hooks"></span><h2><a class="toc-backref" href="#id16">Hooks and environment variables</a></h2>
+<span id="hooks"></span><h2><a class="toc-backref" href="#id17">Hooks and environment variables</a></h2>
 <p>Normally, LLVMC executes programs from the system <tt class="docutils literal"><span class="pre">PATH</span></tt>. Sometimes,
 this is not sufficient: for example, we may want to specify tool paths
 or names in the configuration file. This can be easily achieved via
@@ -573,7 +600,7 @@ the <tt class="docutils literal"><span class="pre">case</span></tt> expression (
 </pre>
 </div>
 <div class="section" id="how-plugins-are-loaded">
-<span id="priorities"></span><h2><a class="toc-backref" href="#id17">How plugins are loaded</a></h2>
+<span id="priorities"></span><h2><a class="toc-backref" href="#id18">How plugins are loaded</a></h2>
 <p>It is possible for LLVMC plugins to depend on each other. For example,
 one can create edges between nodes defined in some other plugin. To
 make this work, however, that plugin should be loaded first. To
@@ -589,7 +616,7 @@ with 0. Therefore, the plugin with the highest priority value will be
 loaded last.</p>
 </div>
 <div class="section" id="debugging">
-<h2><a class="toc-backref" href="#id18">Debugging</a></h2>
+<h2><a class="toc-backref" href="#id19">Debugging</a></h2>
 <p>When writing LLVMC plugins, it can be useful to get a visual view of
 the resulting compilation graph. This can be achieved via the command
 line option <tt class="docutils literal"><span class="pre">--view-graph</span></tt>. This command assumes that <a class="reference external" href="http://www.graphviz.org/">Graphviz</a> and
@@ -615,7 +642,7 @@ errors as its status code.</p>
 <a href="mailto:foldr@codedgers.com">Mikhail Glushenkov</a><br />
 <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br />
 
-Last modified: $Date: 2009-05-06 03:41:47 +0200 (Wed, 06 May 2009) $
+Last modified: $Date: 2009-06-17 02:56:48 +0000 (Wed, 17 Jun 2009) $
 </address></div>
 </div>
 </div>
diff --git a/docs/CompilerDriverTutorial.html b/docs/CompilerDriverTutorial.html
index a57801737957..317b1d127127 100644
--- a/docs/CompilerDriverTutorial.html
+++ b/docs/CompilerDriverTutorial.html
@@ -48,23 +48,28 @@ command-line LLVMC usage, refer to the <tt class="docutils literal"><span class=
 </div>
 <div class="section" id="using-llvmc-to-generate-toolchain-drivers">
 <h1><a class="toc-backref" href="#id3">Using LLVMC to generate toolchain drivers</a></h1>
-<p>LLVMC plugins are written mostly using <a class="reference external" href="http://llvm.cs.uiuc.edu/docs/TableGenFundamentals.html">TableGen</a>, so you need to
+<p>LLVMC plugins are written mostly using <a class="reference external" href="http://llvm.org/docs/TableGenFundamentals.html">TableGen</a>, so you need to
 be familiar with it to get anything done.</p>
-<p>Start by compiling <tt class="docutils literal"><span class="pre">plugins/Simple/Simple.td</span></tt>, which is a primitive
-wrapper for <tt class="docutils literal"><span class="pre">gcc</span></tt>:</p>
+<p>Start by compiling <tt class="docutils literal"><span class="pre">example/Simple</span></tt>, which is a primitive wrapper for
+<tt class="docutils literal"><span class="pre">gcc</span></tt>:</p>
 <pre class="literal-block">
 $ cd $LLVM_DIR/tools/llvmc
-$ make DRIVER_NAME=mygcc BUILTIN_PLUGINS=Simple
+$ cp -r example/Simple plugins/Simple
+
+  # NB: A less verbose way to compile standalone LLVMC-based drivers is
+  # described in the reference manual.
+
+$ make LLVMC_BASED_DRIVER_NAME=mygcc LLVMC_BUILTIN_PLUGINS=Simple
 $ cat &gt; hello.c
 [...]
 $ mygcc hello.c
 $ ./hello.out
 Hello
 </pre>
-<p>Here we link our plugin with the LLVMC core statically to form an
-executable file called <tt class="docutils literal"><span class="pre">mygcc</span></tt>. It is also possible to build our
-plugin as a standalone dynamic library; this is described in the
-reference manual.</p>
+<p>Here we link our plugin with the LLVMC core statically to form an executable
+file called <tt class="docutils literal"><span class="pre">mygcc</span></tt>. It is also possible to build our plugin as a dynamic
+library to be loaded by the <tt class="docutils literal"><span class="pre">llvmc</span></tt> executable (or any other LLVMC-based
+standalone driver); this is described in the reference manual.</p>
 <p>Contents of the file <tt class="docutils literal"><span class="pre">Simple.td</span></tt> look like this:</p>
 <pre class="literal-block">
 // Include common definitions
diff --git a/docs/DeveloperPolicy.html b/docs/DeveloperPolicy.html
index 34d201810d00..13a908e5a9d3 100644
--- a/docs/DeveloperPolicy.html
+++ b/docs/DeveloperPolicy.html
@@ -108,7 +108,7 @@
   <li>Patches should be made with this command:
 <div class="doc_code">
 <pre>
-svn diff -x -u
+svn diff
 </pre>
 </div>
       or with the utility <tt>utils/mkpatch</tt>, which makes it easy to read
@@ -592,7 +592,7 @@ Changes</a></div>
   Written by the 
   <a href="mailto:llvm-oversight@cs.uiuc.edu">LLVM Oversight Group</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-04-05 14:38:44 +0200 (Sun, 05 Apr 2009) $
+  Last modified: $Date: 2009-06-15 04:18:54 +0000 (Mon, 15 Jun 2009) $
 </address>
 </body>
 </html>
diff --git a/docs/LangRef.html b/docs/LangRef.html
index 897654df9569..f229150ea300 100644
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -1106,8 +1106,9 @@ an <tt>ssp</tt> attribute, then the resulting function will have
 an <tt>sspreq</tt> attribute.</dd>
 
 <dt><tt>noredzone</tt></dt>
-<dd>This attribute indicates that the code generator should not enforce red zone
-mandated by target specific ABI.</dd>
+<dd>This attribute indicates that the code generator should not use a
+red zone, even if the target-specific ABI normally permits it.
+</dd>
 
 <dt><tt>noimplicitfloat</tt></dt>
 <dd>This attributes disables implicit floating point instructions.</dd>
@@ -3531,9 +3532,10 @@ address space (address space zero).</p>
 bytes of memory from the operating system and returns a pointer of the
 appropriate type to the program.  If "NumElements" is specified, it is the
 number of elements allocated, otherwise "NumElements" is defaulted to be one.
-If a constant alignment is specified, the value result of the allocation is guaranteed to
-be aligned to at least that boundary.  If not specified, or if zero, the target can
-choose to align the allocation on any convenient boundary.</p>
+If a constant alignment is specified, the value result of the allocation is
+guaranteed to be aligned to at least that boundary.  If not specified, or if
+zero, the target can choose to align the allocation on any convenient boundary
+compatible with the type.</p>
 
 <p>'<tt>type</tt>' must be a sized type.</p>
 
@@ -3624,9 +3626,10 @@ space (address space zero).</p>
 bytes of memory on the runtime stack, returning a pointer of the
 appropriate type to the program.  If "NumElements" is specified, it is the
 number of elements allocated, otherwise "NumElements" is defaulted to be one.
-If a constant alignment is specified, the value result of the allocation is guaranteed
-to be aligned to at least that boundary.  If not specified, or if zero, the target
-can choose to align the allocation on any convenient boundary.</p>
+If a constant alignment is specified, the value result of the allocation is
+guaranteed to be aligned to at least that boundary.  If not specified, or if
+zero, the target can choose to align the allocation on any convenient boundary
+compatible with the type.</p>
 
 <p>'<tt>type</tt>' may be any sized type.</p>
 
@@ -6428,9 +6431,6 @@ on any integer bit width.</p>
 
 <h5>Overview:</h5>
 
-<p><i><b>Warning:</b> '<tt>llvm.umul.with.overflow</tt>' is badly broken. It is
-actively being fixed, but it should not currently be used!</i></p>
-
 <p>The '<tt>llvm.umul.with.overflow</tt>' family of intrinsic functions perform
 a unsigned multiplication of the two arguments, and indicate whether an overflow
 occurred during the unsigned multiplication.</p>
@@ -7221,7 +7221,7 @@ declare void @llvm.stackprotector( i8* &lt;guard&gt;, i8** &lt;slot&gt; )
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-06-12 21:45:19 +0200 (Fri, 12 Jun 2009) $
+  Last modified: $Date: 2009-06-20 13:26:06 +0000 (Sat, 20 Jun 2009) $
 </address>
 
 </body>
diff --git a/docs/MakefileGuide.html b/docs/MakefileGuide.html
index a5e78efac807..39a04f7c84d1 100644
--- a/docs/MakefileGuide.html
+++ b/docs/MakefileGuide.html
@@ -232,17 +232,16 @@
   Normally, the makefile system will build all the software into a single
   <tt>libname.o</tt> (pre-linked) object. This means the library is not
   searchable and that the distinction between compilation units has been
-  dissolved. Optionally, you can ask for a shared library (.so), archive library
-  (.a) or to not have the default (relinked) library built. For example:</p>
+  dissolved. Optionally, you can ask for a shared library (.so) or archive
+  library (.a) built.  Archive libraries are the default. For example:</p>
   <pre><tt>
       LIBRARYNAME = mylib
       SHARED_LIBRARY = 1
       ARCHIVE_LIBRARY = 1
-      DONT_BUILD_RELINKED = 1
   </tt></pre>
   <p>says to build a library named "mylib" with both a shared library 
-  (<tt>mylib.so</tt>) and an archive library (<tt>mylib.a</tt>) version but
-  not to build the relinked object (<tt>mylib.o</tt>). The contents of all the
+  (<tt>mylib.so</tt>) and an archive library (<tt>mylib.a</tt>) version. The
+  contents of all the
   libraries produced will be the same, they are just constructed differently.
   Note that you normally do not need to specify the sources involved. The LLVM
   Makefile system will infer the source files from the contents of the source
@@ -307,8 +306,6 @@
     on.</li>
     <li>The <a href="#LINK_LIBS_IN_SHARED">LINK_LIBS_IN_SHARED</a> variable
     is turned on.</li>
-    <li>The <a href="#DONT_BUILD_RELINKED">DONT_BUILD_RELINKED</a> variable
-    is turned on.</li>
   </ol>
   <p>A loadable module is loaded by LLVM via the facilities of libtool's libltdl
   library which is part of <tt>lib/System</tt> implementation.</p>
@@ -637,11 +634,6 @@
     <dd>If set to any value, causes the makefiles to <b>not</b> automatically
     generate dependencies when running the compiler. Use of this feature is
     discouraged and it may be removed at a later date.</dd>
-    <dt><a name="DONT_BUILD_RELINKED"><tt>DONT_BUILD_RELINKED</tt></a></dt>
-    <dd>If set to any value, causes a relinked library (.o) not to be built. By
-    default, libraries are built as re-linked since most LLVM libraries are
-    needed in their entirety and re-linked libraries will be linked more quickly
-    than equivalent archive libraries.</dd>
     <dt><a name="ENABLE_OPTIMIZED"><tt>ENABLE_OPTIMIZED</tt></a></dt>
     <dd>If set to any value, causes the build to generate optimized objects,
     libraries and executables. This alters the flags specified to the compilers
@@ -960,7 +952,6 @@
     DestArchiveLib
     DestBitcodeLib
     DestModule
-    DestRelinkedLib
     DestSharedLib
     DestTool
     DistAlways
@@ -1004,7 +995,6 @@
     ProjUsedLibs
     Ranlib
     RecursiveTargets
-    Relink
     SrcMakefiles
     Strip
     StripWarnMsg
@@ -1026,7 +1016,7 @@
 
   <a href="mailto:rspencer@x10sys.com">Reid Spencer</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-04-26 00:08:52 +0200 (Sun, 26 Apr 2009) $
+  Last modified: $Date: 2009-06-16 23:00:42 +0000 (Tue, 16 Jun 2009) $
 </address>
 </body>
 </html>
diff --git a/docs/ProgrammersManual.html b/docs/ProgrammersManual.html
index e7b2ad0b80fd..b45a60b7611d 100644
--- a/docs/ProgrammersManual.html
+++ b/docs/ProgrammersManual.html
@@ -2,6 +2,7 @@
                       "http://www.w3.org/TR/html4/strict.dtd">
 <html>
 <head>
+  <meta http-equiv="Content-type" content="text/html;charset=UTF-8">
   <title>LLVM Programmer's Manual</title>
   <link rel="stylesheet" href="llvm.css" type="text/css">
 </head>
@@ -129,6 +130,15 @@ with another <tt>Value</tt></a> </li>
     </ul>
   </li>
 
+  <li><a href="#threading">Threads and LLVM</a>
+  <ul>
+    <li><a href="#startmultithreaded">Entering and Exiting Multithreaded Mode
+        </a></li>
+    <li><a href="#shutdown">Ending execution with <tt>llvm_shutdown()</tt></a></li>
+    <li><a href="#managedstatic">Lazy initialization with <tt>ManagedStatic</tt></a></li>
+  </ul>
+  </li>
+
   <li><a href="#advanced">Advanced Topics</a>
   <ul>
   <li><a href="#TypeResolve">LLVM Type Resolution</a>
@@ -176,8 +186,9 @@ with another <tt>Value</tt></a> </li>
   <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>, 
                 <a href="mailto:dhurjati@cs.uiuc.edu">Dinakar Dhurjati</a>, 
                 <a href="mailto:ggreif@gmail.com">Gabor Greif</a>, 
-                <a href="mailto:jstanley@cs.uiuc.edu">Joel Stanley</a> and
-                <a href="mailto:rspencer@x10sys.com">Reid Spencer</a></p>
+                <a href="mailto:jstanley@cs.uiuc.edu">Joel Stanley</a>,
+                <a href="mailto:rspencer@x10sys.com">Reid Spencer</a> and
+                <a href="mailto:owen@apple.com">Owen Anderson</a></p>
 </div>
 
 <!-- *********************************************************************** -->
@@ -2118,7 +2129,7 @@ FunctionType *ft = TypeBuilder&lt;types::i&lt;8&gt;(types::i&lt;32&gt;*), true&g
 
 <div class="doc_code">
 <pre>
-std::vector<const Type*> params;
+std::vector&lt;const Type*&gt; params;
 params.push_back(PointerType::getUnqual(Type::Int32Ty));
 FunctionType *ft = FunctionType::get(Type::Int8Ty, params, false);
 </pre>
@@ -2131,6 +2142,123 @@ comment</a> for more details.</p>
 
 <!-- *********************************************************************** -->
 <div class="doc_section">
+  <a name="threading">Threads and LLVM</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+<p>
+This section describes the interaction of the LLVM APIs with multithreading,
+both on the part of client applications, and in the JIT, in the hosted
+application.
+</p>
+
+<p>
+Note that LLVM's support for multithreading is still relatively young.  Up 
+through version 2.5, the execution of threaded hosted applications was
+supported, but not threaded client access to the APIs.  While this use case is
+now supported, clients <em>must</em> adhere to the guidelines specified below to
+ensure proper operation in multithreaded mode.
+</p>
+
+<p>
+Note that, on Unix-like platforms, LLVM requires the presence of GCC's atomic
+intrinsics in order to support threaded operation.  If you need a
+multhreading-capable LLVM on a platform without a suitably modern system
+compiler, consider compiling LLVM and LLVM-GCC in single-threaded mode, and 
+using the resultant compiler to build a copy of LLVM with multithreading
+support.
+</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="startmultithreaded">Entering and Exiting Multithreaded Mode</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+In order to properly protect its internal data structures while avoiding 
+excessive locking overhead in the single-threaded case, the LLVM must intialize
+certain data structures necessary to provide guards around its internals.  To do
+so, the client program must invoke <tt>llvm_start_multithreaded()</tt> before
+making any concurrent LLVM API calls.  To subsequently tear down these
+structures, use the <tt>llvm_stop_multithreaded()</tt> call.  You can also use
+the <tt>llvm_is_multithreaded()</tt> call to check the status of multithreaded
+mode.
+</p>
+
+<p>
+Note that both of these calls must be made <em>in isolation</em>.  That is to
+say that no other LLVM API calls may be executing at any time during the 
+execution of <tt>llvm_start_multithreaded()</tt> or <tt>llvm_stop_multithreaded
+</tt>.  It's is the client's responsibility to enforce this isolation.
+</p>
+
+<p>
+The return value of <tt>llvm_start_multithreaded()</tt> indicates the success or
+failure of the initialization.  Failure typically indicates that your copy of
+LLVM was built without multithreading support, typically because GCC atomic
+intrinsics were not found in your system compiler.  In this case, the LLVM API
+will not be safe for concurrent calls.  However, it <em>will</em> be safe for
+hosting threaded applications in the JIT, though care must be taken to ensure
+that side exits and the like do not accidentally result in concurrent LLVM API
+calls.
+</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="shutdown">Ending Execution with <tt>llvm_shutdown()</tt></a>
+</div>
+
+<div class="doc_text">
+<p>
+When you are done using the LLVM APIs, you should call <tt>llvm_shutdown()</tt>
+to deallocate memory used for internal structures.  This will also invoke 
+<tt>llvm_stop_multithreaded()</tt> if LLVM is operating in multithreaded mode.
+As such, <tt>llvm_shutdown()</tt> requires the same isolation guarantees as
+<tt>llvm_stop_multithreaded()</tt>.
+</p>
+
+<p>
+Note that, if you use scope-based shutdown, you can use the
+<tt>llvm_shutdown_obj</tt> class, which calls <tt>llvm_shutdown()</tt> in its
+destructor.
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="managedstatic">Lazy Initialization with <tt>ManagedStatic</tt></a>
+</div>
+
+<div class="doc_text">
+<p>
+<tt>ManagedStatic</tt> is a utility class in LLVM used to implement static
+initialization of static resources, such as the global type tables.  Before the
+invocation of <tt>llvm_shutdown()</tt>, it implements a simple lazy 
+initialization scheme.  Once <tt>llvm_start_multithreaded()</tt> returns,
+however, it uses double-checked locking to implement thread-safe lazy
+initialization.
+</p>
+
+<p>
+Note that, because no other threads are allowed to issue LLVM API calls before
+<tt>llvm_start_multithreaded()</tt> returns, it is possible to have 
+<tt>ManagedStatic</tt>s of <tt>llvm::sys::Mutex</tt>s.
+</p>
+
+<p>
+The <tt>llvm_acquire_global_lock()</tt> and <tt>llvm_release_global_lock</tt> 
+APIs provide access to the global lock used to implement the double-checked
+locking for lazy initialization.  These should only be used internally to LLVM,
+and only if you know what you're doing!
+</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
   <a name="advanced">Advanced Topics</a>
 </div>
 <!-- *********************************************************************** -->
@@ -3430,7 +3558,7 @@ never change at runtime).</p>
 
 <p><tt>#include "<a
 href="/doxygen/BasicBlock_8h-source.html">llvm/BasicBlock.h</a>"</tt><br>
-doxygen info: <a href="/doxygen/structllvm_1_1BasicBlock.html">BasicBlock
+doxygen info: <a href="/doxygen/classllvm_1_1BasicBlock.html">BasicBlock
 Class</a><br>
 Superclass: <a href="#Value"><tt>Value</tt></a></p>
 
@@ -3536,7 +3664,7 @@ arguments. An argument has a pointer to the parent Function.</p>
   <a href="mailto:dhurjati@cs.uiuc.edu">Dinakar Dhurjati</a> and
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-05-01 22:40:51 +0200 (Fri, 01 May 2009) $
+  Last modified: $Date: 2009-06-17 21:12:26 +0000 (Wed, 17 Jun 2009) $
 </address>
 
 </body>
diff --git a/docs/WritingAnLLVMPass.html b/docs/WritingAnLLVMPass.html
index a039f619c9da..b1b2c7874ed2 100644
--- a/docs/WritingAnLLVMPass.html
+++ b/docs/WritingAnLLVMPass.html
@@ -1537,7 +1537,7 @@ need some way to free analysis results when they are no longer useful.  The
 <p>If you are writing an analysis or any other pass that retains a significant
 amount of state (for use by another pass which "requires" your pass and uses the
 <a href="#getAnalysis">getAnalysis</a> method) you should implement
-<tt>releaseMEmory</tt> to, well, release the memory allocated to maintain this
+<tt>releaseMemory</tt> to, well, release the memory allocated to maintain this
 internal state.  This method is called after the <tt>run*</tt> method for the
 class, before the next call of <tt>run*</tt> in your pass.</p>
 
@@ -1821,7 +1821,7 @@ Despite that, we have kept the LLVM passes SMP ready, and you should too.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-02-18 06:09:16 +0100 (Wed, 18 Feb 2009) $
+  Last modified: $Date: 2009-06-15 18:22:49 +0000 (Mon, 15 Jun 2009) $
 </address>
 
 </body>
diff --git a/examples/BrainF/BrainFDriver.cpp b/examples/BrainF/BrainFDriver.cpp
index 34fb80617722..06e77d2e2a3e 100644
--- a/examples/BrainF/BrainFDriver.cpp
+++ b/examples/BrainF/BrainFDriver.cpp
@@ -34,6 +34,7 @@
 #include "llvm/ExecutionEngine/JIT.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Target/TargetSelect.h"
 #include <fstream>
 #include <iostream>
 using namespace llvm;
@@ -135,6 +136,8 @@ int main(int argc, char **argv) {
 
   //Write it out
   if (JIT) {
+    InitializeNativeTarget();
+
     std::cout << "------- Running JIT -------\n";
     ExistingModuleProvider *mp = new ExistingModuleProvider(mod);
     ExecutionEngine *ee = ExecutionEngine::create(mp, false);
diff --git a/examples/HowToUseJIT/HowToUseJIT.cpp b/examples/HowToUseJIT/HowToUseJIT.cpp
index b5c6d111914f..a9f10009e1ec 100644
--- a/examples/HowToUseJIT/HowToUseJIT.cpp
+++ b/examples/HowToUseJIT/HowToUseJIT.cpp
@@ -42,11 +42,15 @@
 #include "llvm/ExecutionEngine/JIT.h"
 #include "llvm/ExecutionEngine/Interpreter.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/Target/TargetSelect.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 int main() {
+  
+  InitializeNativeTarget();
+  
   // Create some module to put our function into it.
   Module *M = new Module("test");
 
diff --git a/examples/Kaleidoscope/toy.cpp b/examples/Kaleidoscope/toy.cpp
index bec430c41f5a..c75014a69ba4 100644
--- a/examples/Kaleidoscope/toy.cpp
+++ b/examples/Kaleidoscope/toy.cpp
@@ -5,6 +5,7 @@
 #include "llvm/PassManager.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetSelect.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Support/IRBuilder.h"
 #include <cstdio>
@@ -1081,6 +1082,8 @@ double printd(double X) {
 //===----------------------------------------------------------------------===//
 
 int main() {
+  InitializeNativeTarget();
+  
   // Install standard binary operators.
   // 1 is lowest precedence.
   BinopPrecedence['='] = 2;
diff --git a/examples/ParallelJIT/ParallelJIT.cpp b/examples/ParallelJIT/ParallelJIT.cpp
index e812d84eafc8..a6d7dcf7b556 100644
--- a/examples/ParallelJIT/ParallelJIT.cpp
+++ b/examples/ParallelJIT/ParallelJIT.cpp
@@ -26,6 +26,7 @@
 #include "llvm/ExecutionEngine/JIT.h"
 #include "llvm/ExecutionEngine/Interpreter.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/Target/TargetSelect.h"
 #include <iostream>
 using namespace llvm;
 
@@ -229,8 +230,9 @@ void* callFunc( void* param )
   return (void*)(intptr_t)gv.IntVal.getZExtValue();
 }
 
-int main()
-{
+int main() {
+  InitializeNativeTarget();
+
   // Create some module to put our function into it.
   Module *M = new Module("test");
 
diff --git a/include/llvm/ADT/PostOrderIterator.h b/include/llvm/ADT/PostOrderIterator.h
index bf7ce9d0bb6a..b477d0a8f0f5 100644
--- a/include/llvm/ADT/PostOrderIterator.h
+++ b/include/llvm/ADT/PostOrderIterator.h
@@ -18,6 +18,7 @@
 
 #include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/iterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include <set>
 #include <stack>
 #include <vector>
@@ -39,9 +40,9 @@ public:
 };
 
 template<class GraphT,
-         class SetType = std::set<typename GraphTraits<GraphT>::NodeType*>,
-         bool ExtStorage = false,
-         class GT = GraphTraits<GraphT> >
+  class SetType = llvm::SmallPtrSet<typename GraphTraits<GraphT>::NodeType*, 8>,
+  bool ExtStorage = false,
+  class GT = GraphTraits<GraphT> >
 class po_iterator : public forward_iterator<typename GT::NodeType, ptrdiff_t>,
                     public po_iterator_storage<SetType, ExtStorage> {
   typedef forward_iterator<typename GT::NodeType, ptrdiff_t> super;
diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h
index b260f984948e..b3f742e0a59b 100644
--- a/include/llvm/ADT/Triple.h
+++ b/include/llvm/ADT/Triple.h
@@ -49,6 +49,7 @@ public:
   enum OSType {
     UnknownOS,
 
+    AuroraUX,
     Darwin,
     DragonFly,
     FreeBSD,
diff --git a/include/llvm/Analysis/IVUsers.h b/include/llvm/Analysis/IVUsers.h
index 36ff07b678e6..ac785d5c54a9 100644
--- a/include/llvm/Analysis/IVUsers.h
+++ b/include/llvm/Analysis/IVUsers.h
@@ -35,9 +35,9 @@ class IVStrideUse : public CallbackVH, public ilist_node<IVStrideUse> {
 public:
   IVStrideUse(IVUsersOfOneStride *parent,
               const SCEVHandle &offset,
-              Instruction* U, Value *O, bool issigned)
+              Instruction* U, Value *O)
     : CallbackVH(U), Parent(parent), Offset(offset),
-      OperandValToReplace(O), IsSigned(issigned),
+      OperandValToReplace(O),
       IsUseOfPostIncrementedValue(false) {
   }
 
@@ -57,8 +57,7 @@ public:
 
   /// getOffset - Return the offset to add to a theoeretical induction
   /// variable that starts at zero and counts up by the stride to compute
-  /// the value for the use. This always has the same type as the stride,
-  /// which may need to be casted to match the type of the use.
+  /// the value for the use. This always has the same type as the stride.
   SCEVHandle getOffset() const { return Offset; }
 
   /// setOffset - Assign a new offset to this use.
@@ -78,13 +77,6 @@ public:
     OperandValToReplace = Op;
   }
 
-  /// isSigned - The stride (and thus also the Offset) of this use may be in
-  /// a narrower type than the use itself (OperandValToReplace->getType()).
-  /// When this is the case, isSigned() indicates whether the IV expression
-  /// should be signed-extended instead of zero-extended to fit the type of
-  /// the use.
-  bool isSigned() const { return IsSigned; }
-
   /// isUseOfPostIncrementedValue - True if this should use the
   /// post-incremented version of this IV, not the preincremented version.
   /// This can only be set in special cases, such as the terminating setcc
@@ -110,10 +102,6 @@ private:
   /// that this IVStrideUse is representing.
   WeakVH OperandValToReplace;
 
-  /// IsSigned - Determines whether the replacement value is sign or
-  /// zero extended to the type of the use.
-  bool IsSigned;
-
   /// IsUseOfPostIncrementedValue - True if this should use the
   /// post-incremented version of this IV, not the preincremented version.
   bool IsUseOfPostIncrementedValue;
@@ -170,9 +158,8 @@ public:
   /// initial value and the operand that uses the IV.
   ilist<IVStrideUse> Users;
 
-  void addUser(const SCEVHandle &Offset,Instruction *User, Value *Operand,
-               bool isSigned) {
-    Users.push_back(new IVStrideUse(this, Offset, User, Operand, isSigned));
+  void addUser(const SCEVHandle &Offset, Instruction *User, Value *Operand) {
+    Users.push_back(new IVStrideUse(this, Offset, User, Operand));
   }
 };
 
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index 41725be1ca3e..8d5136cea2c3 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -25,6 +25,7 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ValueHandle.h"
+#include "llvm/ADT/DenseMap.h"
 #include <iosfwd>
 
 namespace llvm {
@@ -34,6 +35,7 @@ namespace llvm {
   class SCEVHandle;
   class ScalarEvolution;
   class TargetData;
+  template<> struct DenseMapInfo<SCEVHandle>;
 
   /// SCEV - This class represents an analyzed expression in the program.  These
   /// are reference-counted opaque objects that the client is not allowed to
@@ -44,18 +46,22 @@ namespace llvm {
     mutable unsigned RefCount;
 
     friend class SCEVHandle;
+    friend class DenseMapInfo<SCEVHandle>;
     void addRef() const { ++RefCount; }
     void dropRef() const {
       if (--RefCount == 0)
         delete this;
     }
 
+    const ScalarEvolution* parent;
+
     SCEV(const SCEV &);            // DO NOT IMPLEMENT
     void operator=(const SCEV &);  // DO NOT IMPLEMENT
   protected:
     virtual ~SCEV();
   public:
-    explicit SCEV(unsigned SCEVTy) : SCEVType(SCEVTy), RefCount(0) {}
+    explicit SCEV(unsigned SCEVTy, const ScalarEvolution* p) : 
+      SCEVType(SCEVTy), RefCount(0), parent(p) {}
 
     unsigned getSCEVType() const { return SCEVType; }
 
@@ -123,7 +129,7 @@ namespace llvm {
   /// None of the standard SCEV operations are valid on this class, it is just a
   /// marker.
   struct SCEVCouldNotCompute : public SCEV {
-    SCEVCouldNotCompute();
+    SCEVCouldNotCompute(const ScalarEvolution* p);
     ~SCEVCouldNotCompute();
 
     // None of these methods are valid for this object.
@@ -197,6 +203,31 @@ namespace llvm {
   template<> struct simplify_type<SCEVHandle>
     : public simplify_type<const SCEVHandle> {};
 
+  // Specialize DenseMapInfo for SCEVHandle so that SCEVHandle may be used
+  // as a key in DenseMaps.
+  template<>
+  struct DenseMapInfo<SCEVHandle> {
+    static inline SCEVHandle getEmptyKey() {
+      static SCEVCouldNotCompute Empty(0);
+      if (Empty.RefCount == 0)
+        Empty.addRef();
+      return &Empty;
+    }
+    static inline SCEVHandle getTombstoneKey() {
+      static SCEVCouldNotCompute Tombstone(0);
+      if (Tombstone.RefCount == 0)
+        Tombstone.addRef();
+      return &Tombstone;
+    }
+    static unsigned getHashValue(const SCEVHandle &Val) {
+      return DenseMapInfo<const SCEV *>::getHashValue(Val);
+    }
+    static bool isEqual(const SCEVHandle &LHS, const SCEVHandle &RHS) {
+      return LHS == RHS;
+    }
+    static bool isPod() { return false; }
+  };
+
   /// ScalarEvolution - This class is the main scalar evolution driver.  Because
   /// client code (intentionally) can't do much with the SCEV objects directly,
   /// they must ask this class for services.
@@ -301,6 +332,13 @@ namespace llvm {
                                           const SCEVHandle &SymName,
                                           const SCEVHandle &NewVal);
 
+    /// getBECount - Subtract the end and start values and divide by the step,
+    /// rounding up, to get the number of times the backedge is executed. Return
+    /// CouldNotCompute if an intermediate computation overflows.
+    SCEVHandle getBECount(const SCEVHandle &Start,
+                          const SCEVHandle &End,
+                          const SCEVHandle &Step);
+
     /// getBackedgeTakenInfo - Return the BackedgeTakenInfo for the given
     /// loop, lazily computing new values if the loop hasn't been analyzed
     /// yet.
@@ -310,6 +348,31 @@ namespace llvm {
     /// loop will iterate.
     BackedgeTakenInfo ComputeBackedgeTakenCount(const Loop *L);
 
+    /// ComputeBackedgeTakenCountFromExit - Compute the number of times the
+    /// backedge of the specified loop will execute if it exits via the
+    /// specified block.
+    BackedgeTakenInfo ComputeBackedgeTakenCountFromExit(const Loop *L,
+                                                      BasicBlock *ExitingBlock);
+
+    /// ComputeBackedgeTakenCountFromExitCond - Compute the number of times the
+    /// backedge of the specified loop will execute if its exit condition
+    /// were a conditional branch of ExitCond, TBB, and FBB.
+    BackedgeTakenInfo
+      ComputeBackedgeTakenCountFromExitCond(const Loop *L,
+                                            Value *ExitCond,
+                                            BasicBlock *TBB,
+                                            BasicBlock *FBB);
+
+    /// ComputeBackedgeTakenCountFromExitCondICmp - Compute the number of
+    /// times the backedge of the specified loop will execute if its exit
+    /// condition were a conditional branch of the ICmpInst ExitCond, TBB,
+    /// and FBB.
+    BackedgeTakenInfo
+      ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
+                                                ICmpInst *ExitCond,
+                                                BasicBlock *TBB,
+                                                BasicBlock *FBB);
+
     /// ComputeLoadConstantCompareBackedgeTakenCount - Given an exit condition
     /// of 'icmp op load X, cst', try to see if we can compute the trip count.
     SCEVHandle
@@ -390,28 +453,29 @@ namespace llvm {
 
     SCEVHandle getConstant(ConstantInt *V);
     SCEVHandle getConstant(const APInt& Val);
+    SCEVHandle getConstant(const Type *Ty, uint64_t V, bool isSigned = false);
     SCEVHandle getTruncateExpr(const SCEVHandle &Op, const Type *Ty);
     SCEVHandle getZeroExtendExpr(const SCEVHandle &Op, const Type *Ty);
     SCEVHandle getSignExtendExpr(const SCEVHandle &Op, const Type *Ty);
     SCEVHandle getAnyExtendExpr(const SCEVHandle &Op, const Type *Ty);
-    SCEVHandle getAddExpr(std::vector<SCEVHandle> &Ops);
+    SCEVHandle getAddExpr(SmallVectorImpl<SCEVHandle> &Ops);
     SCEVHandle getAddExpr(const SCEVHandle &LHS, const SCEVHandle &RHS) {
-      std::vector<SCEVHandle> Ops;
+      SmallVector<SCEVHandle, 2> Ops;
       Ops.push_back(LHS);
       Ops.push_back(RHS);
       return getAddExpr(Ops);
     }
     SCEVHandle getAddExpr(const SCEVHandle &Op0, const SCEVHandle &Op1,
                           const SCEVHandle &Op2) {
-      std::vector<SCEVHandle> Ops;
+      SmallVector<SCEVHandle, 3> Ops;
       Ops.push_back(Op0);
       Ops.push_back(Op1);
       Ops.push_back(Op2);
       return getAddExpr(Ops);
     }
-    SCEVHandle getMulExpr(std::vector<SCEVHandle> &Ops);
+    SCEVHandle getMulExpr(SmallVectorImpl<SCEVHandle> &Ops);
     SCEVHandle getMulExpr(const SCEVHandle &LHS, const SCEVHandle &RHS) {
-      std::vector<SCEVHandle> Ops;
+      SmallVector<SCEVHandle, 2> Ops;
       Ops.push_back(LHS);
       Ops.push_back(RHS);
       return getMulExpr(Ops);
@@ -419,17 +483,19 @@ namespace llvm {
     SCEVHandle getUDivExpr(const SCEVHandle &LHS, const SCEVHandle &RHS);
     SCEVHandle getAddRecExpr(const SCEVHandle &Start, const SCEVHandle &Step,
                              const Loop *L);
-    SCEVHandle getAddRecExpr(std::vector<SCEVHandle> &Operands,
+    SCEVHandle getAddRecExpr(SmallVectorImpl<SCEVHandle> &Operands,
                              const Loop *L);
-    SCEVHandle getAddRecExpr(const std::vector<SCEVHandle> &Operands,
+    SCEVHandle getAddRecExpr(const SmallVectorImpl<SCEVHandle> &Operands,
                              const Loop *L) {
-      std::vector<SCEVHandle> NewOp(Operands);
+      SmallVector<SCEVHandle, 4> NewOp(Operands.begin(), Operands.end());
       return getAddRecExpr(NewOp, L);
     }
     SCEVHandle getSMaxExpr(const SCEVHandle &LHS, const SCEVHandle &RHS);
-    SCEVHandle getSMaxExpr(std::vector<SCEVHandle> Operands);
+    SCEVHandle getSMaxExpr(SmallVectorImpl<SCEVHandle> &Operands);
     SCEVHandle getUMaxExpr(const SCEVHandle &LHS, const SCEVHandle &RHS);
-    SCEVHandle getUMaxExpr(std::vector<SCEVHandle> Operands);
+    SCEVHandle getUMaxExpr(SmallVectorImpl<SCEVHandle> &Operands);
+    SCEVHandle getSMinExpr(const SCEVHandle &LHS, const SCEVHandle &RHS);
+    SCEVHandle getUMinExpr(const SCEVHandle &LHS, const SCEVHandle &RHS);
     SCEVHandle getUnknown(Value *V);
     SCEVHandle getCouldNotCompute();
 
@@ -481,6 +547,12 @@ namespace llvm {
     /// specified signed integer value and return a SCEV for the constant.
     SCEVHandle getIntegerSCEV(int Val, const Type *Ty);
 
+    /// getUMaxFromMismatchedTypes - Promote the operands to the wider of
+    /// the types using zero-extension, and then perform a umax operation
+    /// with them.
+    SCEVHandle getUMaxFromMismatchedTypes(const SCEVHandle &LHS,
+                                          const SCEVHandle &RHS);
+
     /// hasSCEV - Return true if the SCEV for this value has already been
     /// computed.
     bool hasSCEV(Value *V) const;
@@ -539,6 +611,20 @@ namespace llvm {
     /// is deleted.
     void forgetLoopBackedgeTakenCount(const Loop *L);
 
+    /// GetMinTrailingZeros - Determine the minimum number of zero bits that S is
+    /// guaranteed to end in (at every loop iteration).  It is, at the same time,
+    /// the minimum number of times S is divisible by 2.  For example, given {4,+,8}
+    /// it returns 2.  If S is guaranteed to be 0, it returns the bitwidth of S.
+    uint32_t GetMinTrailingZeros(const SCEVHandle &S);
+
+    /// GetMinLeadingZeros - Determine the minimum number of zero bits that S is
+    /// guaranteed to begin with (at every loop iteration).
+    uint32_t GetMinLeadingZeros(const SCEVHandle &S);
+
+    /// GetMinSignBits - Determine the minimum number of sign bits that S is
+    /// guaranteed to begin with.
+    uint32_t GetMinSignBits(const SCEVHandle &S);
+
     virtual bool runOnFunction(Function &F);
     virtual void releaseMemory();
     virtual void getAnalysisUsage(AnalysisUsage &AU) const;
diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h
index 1978055b1af6..28423569d2e3 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpressions.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h
@@ -36,7 +36,8 @@ namespace llvm {
     friend class ScalarEvolution;
 
     ConstantInt *V;
-    explicit SCEVConstant(ConstantInt *v) : SCEV(scConstant), V(v) {}
+    explicit SCEVConstant(ConstantInt *v, const ScalarEvolution* p) :
+      SCEV(scConstant, p), V(v) {}
 
     virtual ~SCEVConstant();
   public:
@@ -79,7 +80,8 @@ namespace llvm {
     SCEVHandle Op;
     const Type *Ty;
 
-    SCEVCastExpr(unsigned SCEVTy, const SCEVHandle &op, const Type *ty);
+    SCEVCastExpr(unsigned SCEVTy, const SCEVHandle &op, const Type *ty,
+                 const ScalarEvolution* p);
     virtual ~SCEVCastExpr();
 
   public:
@@ -112,7 +114,8 @@ namespace llvm {
   class SCEVTruncateExpr : public SCEVCastExpr {
     friend class ScalarEvolution;
 
-    SCEVTruncateExpr(const SCEVHandle &op, const Type *ty);
+    SCEVTruncateExpr(const SCEVHandle &op, const Type *ty,
+                     const ScalarEvolution* p);
     virtual ~SCEVTruncateExpr();
 
   public:
@@ -141,7 +144,8 @@ namespace llvm {
   class SCEVZeroExtendExpr : public SCEVCastExpr {
     friend class ScalarEvolution;
 
-    SCEVZeroExtendExpr(const SCEVHandle &op, const Type *ty);
+    SCEVZeroExtendExpr(const SCEVHandle &op, const Type *ty,
+                       const ScalarEvolution* p);
     virtual ~SCEVZeroExtendExpr();
 
   public:
@@ -170,7 +174,8 @@ namespace llvm {
   class SCEVSignExtendExpr : public SCEVCastExpr {
     friend class ScalarEvolution;
 
-    SCEVSignExtendExpr(const SCEVHandle &op, const Type *ty);
+    SCEVSignExtendExpr(const SCEVHandle &op, const Type *ty,
+                       const ScalarEvolution* p);
     virtual ~SCEVSignExtendExpr();
 
   public:
@@ -199,10 +204,11 @@ namespace llvm {
   ///
   class SCEVNAryExpr : public SCEV {
   protected:
-    std::vector<SCEVHandle> Operands;
+    SmallVector<SCEVHandle, 8> Operands;
 
-    SCEVNAryExpr(enum SCEVTypes T, const std::vector<SCEVHandle> &ops)
-      : SCEV(T), Operands(ops) {}
+    SCEVNAryExpr(enum SCEVTypes T, const SmallVectorImpl<SCEVHandle> &ops,
+                 const ScalarEvolution* p)
+      : SCEV(T, p), Operands(ops.begin(), ops.end()) {}
     virtual ~SCEVNAryExpr() {}
 
   public:
@@ -212,8 +218,8 @@ namespace llvm {
       return Operands[i];
     }
 
-    const std::vector<SCEVHandle> &getOperands() const { return Operands; }
-    typedef std::vector<SCEVHandle>::const_iterator op_iterator;
+    const SmallVectorImpl<SCEVHandle> &getOperands() const { return Operands; }
+    typedef SmallVectorImpl<SCEVHandle>::const_iterator op_iterator;
     op_iterator op_begin() const { return Operands.begin(); }
     op_iterator op_end() const { return Operands.end(); }
 
@@ -259,8 +265,10 @@ namespace llvm {
   ///
   class SCEVCommutativeExpr : public SCEVNAryExpr {
   protected:
-    SCEVCommutativeExpr(enum SCEVTypes T, const std::vector<SCEVHandle> &ops)
-      : SCEVNAryExpr(T, ops) {}
+    SCEVCommutativeExpr(enum SCEVTypes T,
+                        const SmallVectorImpl<SCEVHandle> &ops,
+                        const ScalarEvolution* p)
+      : SCEVNAryExpr(T, ops, p) {}
     ~SCEVCommutativeExpr();
 
   public:
@@ -289,8 +297,9 @@ namespace llvm {
   class SCEVAddExpr : public SCEVCommutativeExpr {
     friend class ScalarEvolution;
 
-    explicit SCEVAddExpr(const std::vector<SCEVHandle> &ops)
-      : SCEVCommutativeExpr(scAddExpr, ops) {
+    explicit SCEVAddExpr(const SmallVectorImpl<SCEVHandle> &ops,
+                         const ScalarEvolution* p)
+      : SCEVCommutativeExpr(scAddExpr, ops, p) {
     }
 
   public:
@@ -309,8 +318,9 @@ namespace llvm {
   class SCEVMulExpr : public SCEVCommutativeExpr {
     friend class ScalarEvolution;
 
-    explicit SCEVMulExpr(const std::vector<SCEVHandle> &ops)
-      : SCEVCommutativeExpr(scMulExpr, ops) {
+    explicit SCEVMulExpr(const SmallVectorImpl<SCEVHandle> &ops,
+                         const ScalarEvolution* p)
+      : SCEVCommutativeExpr(scMulExpr, ops, p) {
     }
 
   public:
@@ -331,8 +341,9 @@ namespace llvm {
     friend class ScalarEvolution;
 
     SCEVHandle LHS, RHS;
-    SCEVUDivExpr(const SCEVHandle &lhs, const SCEVHandle &rhs)
-      : SCEV(scUDivExpr), LHS(lhs), RHS(rhs) {}
+    SCEVUDivExpr(const SCEVHandle &lhs, const SCEVHandle &rhs,
+                 const ScalarEvolution* p)
+      : SCEV(scUDivExpr, p), LHS(lhs), RHS(rhs) {}
 
     virtual ~SCEVUDivExpr();
   public:
@@ -387,8 +398,9 @@ namespace llvm {
 
     const Loop *L;
 
-    SCEVAddRecExpr(const std::vector<SCEVHandle> &ops, const Loop *l)
-      : SCEVNAryExpr(scAddRecExpr, ops), L(l) {
+    SCEVAddRecExpr(const SmallVectorImpl<SCEVHandle> &ops, const Loop *l,
+                   const ScalarEvolution* p)
+      : SCEVNAryExpr(scAddRecExpr, ops, p), L(l) {
       for (size_t i = 0, e = Operands.size(); i != e; ++i)
         assert(Operands[i]->isLoopInvariant(l) &&
                "Operands of AddRec must be loop-invariant!");
@@ -404,7 +416,7 @@ namespace llvm {
     /// of degree N, it returns a chrec of degree N-1.
     SCEVHandle getStepRecurrence(ScalarEvolution &SE) const {
       if (isAffine()) return getOperand(1);
-      return SE.getAddRecExpr(std::vector<SCEVHandle>(op_begin()+1,op_end()),
+      return SE.getAddRecExpr(SmallVector<SCEVHandle, 3>(op_begin()+1,op_end()),
                               getLoop());
     }
 
@@ -463,8 +475,9 @@ namespace llvm {
   class SCEVSMaxExpr : public SCEVCommutativeExpr {
     friend class ScalarEvolution;
 
-    explicit SCEVSMaxExpr(const std::vector<SCEVHandle> &ops)
-      : SCEVCommutativeExpr(scSMaxExpr, ops) {
+    explicit SCEVSMaxExpr(const SmallVectorImpl<SCEVHandle> &ops,
+                          const ScalarEvolution* p)
+      : SCEVCommutativeExpr(scSMaxExpr, ops, p) {
     }
 
   public:
@@ -484,8 +497,9 @@ namespace llvm {
   class SCEVUMaxExpr : public SCEVCommutativeExpr {
     friend class ScalarEvolution;
 
-    explicit SCEVUMaxExpr(const std::vector<SCEVHandle> &ops)
-      : SCEVCommutativeExpr(scUMaxExpr, ops) {
+    explicit SCEVUMaxExpr(const SmallVectorImpl<SCEVHandle> &ops,
+                          const ScalarEvolution* p)
+      : SCEVCommutativeExpr(scUMaxExpr, ops, p) {
     }
 
   public:
@@ -508,7 +522,8 @@ namespace llvm {
     friend class ScalarEvolution;
 
     Value *V;
-    explicit SCEVUnknown(Value *v) : SCEV(scUnknown), V(v) {}
+    explicit SCEVUnknown(Value *v, const ScalarEvolution* p) :
+      SCEV(scUnknown, p), V(v) {}
 
   protected:
     ~SCEVUnknown();
diff --git a/include/llvm/CallingConv.h b/include/llvm/CallingConv.h
index 072f7c386302..318ea287510e 100644
--- a/include/llvm/CallingConv.h
+++ b/include/llvm/CallingConv.h
@@ -57,7 +57,18 @@ namespace CallingConv {
     /// X86_FastCall - 'fast' analog of X86_StdCall. Passes first two arguments
     /// in ECX:EDX registers, others - via stack. Callee is responsible for
     /// stack cleaning.
-    X86_FastCall = 65
+    X86_FastCall = 65,
+
+    /// ARM_APCS - ARM Procedure Calling Standard calling convention (obsolete,
+    /// but still used on some targets).
+    ARM_APCS = 66,
+
+    /// ARM_AAPCS - ARM Architecture Procedure Calling Standard calling
+    /// convention (aka EABI). Soft float variant.
+    ARM_AAPCS = 67,
+
+    /// ARM_AAPCS_VFP - Same as ARM_AAPCS, but uses hard floating point ABI.
+    ARM_AAPCS_VFP = 68
   };
 } // End CallingConv namespace
 
diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h
index 38c1710d7103..c7b1a42d06b6 100644
--- a/include/llvm/CodeGen/FastISel.h
+++ b/include/llvm/CodeGen/FastISel.h
@@ -16,7 +16,6 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallSet.h"
-#include "llvm/CodeGen/DebugLoc.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 
 namespace llvm {
diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h
index f1ae587ac195..0cb7e9004387 100644
--- a/include/llvm/CodeGen/LiveInterval.h
+++ b/include/llvm/CodeGen/LiveInterval.h
@@ -29,29 +29,112 @@
 
 namespace llvm {
   class MachineInstr;
+  class MachineRegisterInfo;
   class TargetRegisterInfo;
   struct LiveInterval;
 
-  /// VNInfo - If the value number definition is undefined (e.g. phi
-  /// merge point), it contains ~0u,x. If the value number is not in use, it
-  /// contains ~1u,x to indicate that the value # is not used. 
-  ///   def   - Instruction # of the definition.
-  ///         - or reg # of the definition if it's a stack slot liveinterval.
-  ///   copy  - Copy iff val# is defined by a copy; zero otherwise.
-  ///   hasPHIKill - One or more of the kills are PHI nodes.
-  ///   redefByEC - Re-defined by early clobber somewhere during the live range.
-  ///   kills - Instruction # of the kills.
-  struct VNInfo {
+  /// VNInfo - Value Number Information.
+  /// This class holds information about a machine level values, including
+  /// definition and use points.
+  ///
+  /// Care must be taken in interpreting the def index of the value. The 
+  /// following rules apply:
+  ///
+  /// If the isDefAccurate() method returns false then def does not contain the
+  /// index of the defining MachineInstr, or even (necessarily) to a
+  /// MachineInstr at all. In general such a def index is not meaningful
+  /// and should not be used. The exception is that, for values originally
+  /// defined by PHI instructions, after PHI elimination def will contain the
+  /// index of the MBB in which the PHI originally existed. This can be used
+  /// to insert code (spills or copies) which deals with the value, which will
+  /// be live in to the block.
+
+  class VNInfo {
+  private:
+    enum {
+      HAS_PHI_KILL    = 1,                         
+      REDEF_BY_EC     = 1 << 1,
+      IS_PHI_DEF      = 1 << 2,
+      IS_UNUSED       = 1 << 3,
+      IS_DEF_ACCURATE = 1 << 4
+    };
+
+    unsigned char flags;
+
+  public:
+    /// The ID number of this value.
     unsigned id;
+    
+    /// The index of the defining instruction (if isDefAccurate() returns true).
     unsigned def;
     MachineInstr *copy;
-    bool hasPHIKill : 1;
-    bool redefByEC : 1;
     SmallVector<unsigned, 4> kills;
+
     VNInfo()
-      : id(~1U), def(~1U), copy(0), hasPHIKill(false), redefByEC(false) {}
+      : flags(IS_UNUSED), id(~1U), def(0), copy(0) {}
+
+    /// VNInfo constructor.
+    /// d is presumed to point to the actual defining instr. If it doesn't
+    /// setIsDefAccurate(false) should be called after construction.
     VNInfo(unsigned i, unsigned d, MachineInstr *c)
-      : id(i), def(d), copy(c), hasPHIKill(false), redefByEC(false) {}
+      : flags(IS_DEF_ACCURATE), id(i), def(d), copy(c) {}
+
+    /// VNInfo construtor, copies values from orig, except for the value number.
+    VNInfo(unsigned i, const VNInfo &orig)
+      : flags(orig.flags), id(i), def(orig.def), copy(orig.copy),
+        kills(orig.kills) {}
+
+    /// Used for copying value number info.
+    unsigned getFlags() const { return flags; }
+    void setFlags(unsigned flags) { this->flags = flags; }
+
+    /// Returns true if one or more kills are PHI nodes.
+    bool hasPHIKill() const { return flags & HAS_PHI_KILL; }
+    void setHasPHIKill(bool hasKill) {
+      if (hasKill)
+        flags |= HAS_PHI_KILL;
+      else
+        flags &= ~HAS_PHI_KILL;
+    }
+
+    /// Returns true if this value is re-defined by an early clobber somewhere
+    /// during the live range.
+    bool hasRedefByEC() const { return flags & REDEF_BY_EC; }
+    void setHasRedefByEC(bool hasRedef) {
+      if (hasRedef)
+        flags |= REDEF_BY_EC;
+      else
+        flags &= ~REDEF_BY_EC;
+    }
+  
+    /// Returns true if this value is defined by a PHI instruction (or was,
+    /// PHI instrucions may have been eliminated).
+    bool isPHIDef() const { return flags & IS_PHI_DEF; }
+    void setIsPHIDef(bool phiDef) {
+      if (phiDef)
+        flags |= IS_PHI_DEF;
+      else
+        flags &= ~IS_PHI_DEF;
+    }
+
+    /// Returns true if this value is unused.
+    bool isUnused() const { return flags & IS_UNUSED; }
+    void setIsUnused(bool unused) {
+      if (unused)
+        flags |= IS_UNUSED;
+      else
+        flags &= ~IS_UNUSED;
+    }
+
+    /// Returns true if the def is accurate.
+    bool isDefAccurate() const { return flags & IS_DEF_ACCURATE; }
+    void setIsDefAccurate(bool defAccurate) {
+      if (defAccurate)
+        flags |= IS_DEF_ACCURATE;
+      else 
+        flags &= ~IS_DEF_ACCURATE;
+    }
+
   };
 
   /// LiveRange structure - This represents a simple register range in the
@@ -108,7 +191,6 @@ namespace llvm {
     unsigned reg;        // the register or stack slot of this interval
                          // if the top bits is set, it represents a stack slot.
     float weight;        // weight of this interval
-    unsigned short preference; // preferred register for this interval
     Ranges ranges;       // the ranges in which this register is live
     VNInfoList valnos;   // value#'s
 
@@ -134,7 +216,7 @@ namespace llvm {
     };
 
     LiveInterval(unsigned Reg, float Weight, bool IsSS = false)
-      : reg(Reg), weight(Weight), preference(0)  {
+      : reg(Reg), weight(Weight) {
       if (IsSS)
         reg = reg | (1U << (sizeof(unsigned)*CHAR_BIT-1));
     }
@@ -210,15 +292,17 @@ namespace llvm {
     void copyValNumInfo(VNInfo *DstValNo, const VNInfo *SrcValNo) {
       DstValNo->def = SrcValNo->def;
       DstValNo->copy = SrcValNo->copy;
-      DstValNo->hasPHIKill = SrcValNo->hasPHIKill;
-      DstValNo->redefByEC = SrcValNo->redefByEC;
+      DstValNo->setFlags(SrcValNo->getFlags());
       DstValNo->kills = SrcValNo->kills;
     }
 
     /// getNextValue - Create a new value number and return it.  MIIdx specifies
     /// the instruction that defines the value number.
     VNInfo *getNextValue(unsigned MIIdx, MachineInstr *CopyMI,
-                         BumpPtrAllocator &VNInfoAllocator) {
+                         bool isDefAccurate, BumpPtrAllocator &VNInfoAllocator) {
+
+      assert(MIIdx != ~0u && MIIdx != ~1u &&
+             "PHI def / unused flags should now be passed explicitly.");
 #ifdef __GNUC__
       unsigned Alignment = (unsigned)__alignof__(VNInfo);
 #else
@@ -229,6 +313,26 @@ namespace llvm {
         static_cast<VNInfo*>(VNInfoAllocator.Allocate((unsigned)sizeof(VNInfo),
                                                       Alignment));
       new (VNI) VNInfo((unsigned)valnos.size(), MIIdx, CopyMI);
+      VNI->setIsDefAccurate(isDefAccurate);
+      valnos.push_back(VNI);
+      return VNI;
+    }
+
+    /// Create a copy of the given value. The new value will be identical except
+    /// for the Value number.
+    VNInfo *createValueCopy(const VNInfo *orig, BumpPtrAllocator &VNInfoAllocator) {
+
+#ifdef __GNUC__
+      unsigned Alignment = (unsigned)__alignof__(VNInfo);
+#else
+      // FIXME: ugly.
+      unsigned Alignment = 8;
+#endif
+      VNInfo *VNI =
+        static_cast<VNInfo*>(VNInfoAllocator.Allocate((unsigned)sizeof(VNInfo),
+                                                      Alignment));
+    
+      new (VNI) VNInfo((unsigned)valnos.size(), *orig);
       valnos.push_back(VNI);
       return VNI;
     }
@@ -339,7 +443,8 @@ namespace llvm {
 
     /// Copy - Copy the specified live interval. This copies all the fields
     /// except for the register of the interval.
-    void Copy(const LiveInterval &RHS, BumpPtrAllocator &VNInfoAllocator);
+    void Copy(const LiveInterval &RHS, MachineRegisterInfo *MRI,
+              BumpPtrAllocator &VNInfoAllocator);
     
     bool empty() const { return ranges.empty(); }
 
@@ -416,7 +521,8 @@ namespace llvm {
     /// the intervals are not joinable, this aborts.
     void join(LiveInterval &Other, const int *ValNoAssignments,
               const int *RHSValNoAssignments,
-              SmallVector<VNInfo*, 16> &NewVNInfo);
+              SmallVector<VNInfo*, 16> &NewVNInfo,
+              MachineRegisterInfo *MRI);
 
     /// isInOneLiveRange - Return true if the range specified is entirely in the
     /// a single LiveRange of the live interval.
diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h
index a110e5846ac9..0074f1a5f2ca 100644
--- a/include/llvm/CodeGen/MachineFunction.h
+++ b/include/llvm/CodeGen/MachineFunction.h
@@ -19,7 +19,7 @@
 #define LLVM_CODEGEN_MACHINEFUNCTION_H
 
 #include "llvm/ADT/ilist.h"
-#include "llvm/CodeGen/DebugLoc.h"
+#include "llvm/Support/DebugLoc.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/Support/Annotation.h"
 #include "llvm/Support/Allocator.h"
diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h
index d61e5d8a5eb4..2b2f24a88371 100644
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@@ -22,7 +22,7 @@
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/Target/TargetInstrDesc.h"
-#include "llvm/CodeGen/DebugLoc.h"
+#include "llvm/Support/DebugLoc.h"
 #include <list>
 #include <vector>
 
@@ -104,7 +104,7 @@ public:
 
   /// getDebugLoc - Returns the debug location id of this MachineInstr.
   ///
-  const DebugLoc getDebugLoc() const { return debugLoc; }
+  DebugLoc getDebugLoc() const { return debugLoc; }
   
   /// getDesc - Returns the target instruction descriptor of this
   /// MachineInstr.
diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h
index 7a4168447b62..ba538d795ffd 100644
--- a/include/llvm/CodeGen/MachineOperand.h
+++ b/include/llvm/CodeGen/MachineOperand.h
@@ -33,15 +33,15 @@ class raw_ostream;
 class MachineOperand {
 public:
   enum MachineOperandType {
-    MO_Register,                ///< Register operand.
-    MO_Immediate,               ///< Immediate operand
-    MO_FPImmediate,             ///< Floating-point immediate operand
-    MO_MachineBasicBlock,       ///< MachineBasicBlock reference
-    MO_FrameIndex,              ///< Abstract Stack Frame Index
-    MO_ConstantPoolIndex,       ///< Address of indexed Constant in Constant Pool
-    MO_JumpTableIndex,          ///< Address of indexed Jump Table for switch
-    MO_ExternalSymbol,          ///< Name of external global symbol
-    MO_GlobalAddress            ///< Address of a global value
+    MO_Register,               ///< Register operand.
+    MO_Immediate,              ///< Immediate operand
+    MO_FPImmediate,            ///< Floating-point immediate operand
+    MO_MachineBasicBlock,      ///< MachineBasicBlock reference
+    MO_FrameIndex,             ///< Abstract Stack Frame Index
+    MO_ConstantPoolIndex,      ///< Address of indexed Constant in Constant Pool
+    MO_JumpTableIndex,         ///< Address of indexed Jump Table for switch
+    MO_ExternalSymbol,         ///< Name of external global symbol
+    MO_GlobalAddress           ///< Address of a global value
   };
 
 private:
diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h
index 02f9b7c686e2..80c37b39ca0c 100644
--- a/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -37,6 +37,15 @@ class MachineRegisterInfo {
   /// virtual registers. For each target register class, it keeps a list of
   /// virtual registers belonging to the class.
   std::vector<std::vector<unsigned> > RegClass2VRegMap;
+
+  /// RegAllocHints - This vector records register allocation hints for virtual
+  /// registers. For each virtual register, it keeps a register and hint type
+  /// pair making up the allocation hint. Hint type is target specific except
+  /// for the value 0 which means the second value of the pair is the preferred
+  /// register for allocation. For example, if the hint is <0, 1024>, it means
+  /// the allocator should prefer the physical register allocated to the virtual
+  /// register of the hint.
+  std::vector<std::pair<unsigned, unsigned> > RegAllocHints;
   
   /// PhysRegUseDefLists - This is an array of the head of the use/def list for
   /// physical registers.
@@ -170,7 +179,25 @@ public:
   std::vector<unsigned> &getRegClassVirtRegs(const TargetRegisterClass *RC) {
     return RegClass2VRegMap[RC->getID()];
   }
-  
+
+  /// setRegAllocationHint - Specify a register allocation hint for the
+  /// specified virtual register.
+  void setRegAllocationHint(unsigned Reg, unsigned Type, unsigned PrefReg) {
+    Reg -= TargetRegisterInfo::FirstVirtualRegister;
+    assert(Reg < VRegInfo.size() && "Invalid vreg!");
+    RegAllocHints[Reg].first  = Type;
+    RegAllocHints[Reg].second = PrefReg;
+  }
+
+  /// getRegAllocationHint - Return the register allocation hint for the
+  /// specified virtual register.
+  std::pair<unsigned, unsigned>
+  getRegAllocationHint(unsigned Reg) const {
+    Reg -= TargetRegisterInfo::FirstVirtualRegister;
+    assert(Reg < VRegInfo.size() && "Invalid vreg!");
+    return RegAllocHints[Reg];
+  }
+
   //===--------------------------------------------------------------------===//
   // Physical Register Use Info
   //===--------------------------------------------------------------------===//
diff --git a/include/llvm/CodeGen/RuntimeLibcalls.h b/include/llvm/CodeGen/RuntimeLibcalls.h
index dd76fcc11b5e..7f2c8bc36840 100644
--- a/include/llvm/CodeGen/RuntimeLibcalls.h
+++ b/include/llvm/CodeGen/RuntimeLibcalls.h
@@ -153,6 +153,8 @@ namespace RTLIB {
     FPROUND_PPCF128_F32,
     FPROUND_F80_F64,
     FPROUND_PPCF128_F64,
+    FPTOSINT_F32_I8,
+    FPTOSINT_F32_I16,
     FPTOSINT_F32_I32,
     FPTOSINT_F32_I64,
     FPTOSINT_F32_I128,
@@ -165,6 +167,8 @@ namespace RTLIB {
     FPTOSINT_PPCF128_I32,
     FPTOSINT_PPCF128_I64,
     FPTOSINT_PPCF128_I128,
+    FPTOUINT_F32_I8,
+    FPTOUINT_F32_I16,
     FPTOUINT_F32_I32,
     FPTOUINT_F32_I64,
     FPTOUINT_F32_I128,
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index ad485103fb7e..1b6fecd556e1 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -31,7 +31,7 @@
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/RecyclingAllocator.h"
 #include "llvm/Support/DataTypes.h"
-#include "llvm/CodeGen/DebugLoc.h"
+#include "llvm/Support/DebugLoc.h"
 #include <cassert>
 #include <climits>
 
diff --git a/include/llvm/Config/AsmPrinters.def.in b/include/llvm/Config/AsmPrinters.def.in
new file mode 100644
index 000000000000..9729bd75eb40
--- /dev/null
+++ b/include/llvm/Config/AsmPrinters.def.in
@@ -0,0 +1,29 @@
+//===- llvm/Config/AsmPrinters.def - LLVM Assembly Printers -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file enumerates all of the assembly-language printers
+// supported by this build of LLVM. Clients of this file should define
+// the LLVM_ASM_PRINTER macro to be a function-like macro with a
+// single parameter (the name of the target whose assembly can be
+// generated); including this file will then enumerate all of the
+// targets with assembly printers.
+//
+// The set of targets supported by LLVM is generated at configuration
+// time, at which point this header is generated. Do not modify this
+// header directly.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ASM_PRINTER
+#  error Please define the macro LLVM_ASM_PRINTER(TargetName)
+#endif
+
+@LLVM_ENUM_ASM_PRINTERS@
+
+#undef LLVM_ASM_PRINTER
diff --git a/include/llvm/Config/Targets.def.in b/include/llvm/Config/Targets.def.in
new file mode 100644
index 000000000000..a3884729e118
--- /dev/null
+++ b/include/llvm/Config/Targets.def.in
@@ -0,0 +1,28 @@
+//===- llvm/Config/Targets.def - LLVM Target Architectures ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file enumerates all of the target architectures supported by
+// this build of LLVM. Clients of this file should define the
+// LLVM_TARGET macro to be a function-like macro with a single
+// parameter (the name of the target); including this file will then
+// enumerate all of the targets. 
+//
+// The set of targets supported by LLVM is generated at configuration
+// time, at which point this header is generated. Do not modify this
+// header directly.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET
+#  error Please define the macro LLVM_TARGET(TargetName)
+#endif
+
+@LLVM_ENUM_TARGETS@
+
+#undef LLVM_TARGET
diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake
index 33e2e002e231..c59ed23d8b25 100644
--- a/include/llvm/Config/config.h.cmake
+++ b/include/llvm/Config/config.h.cmake
@@ -580,3 +580,6 @@
 
 /* Define to a function implementing strdup */
 #cmakedefine strdup ${strdup}
+
+/* Native LLVM architecture */
+#cmakedefine LLVM_NATIVE_ARCH ${LLVM_NATIVE_ARCH}
diff --git a/include/llvm/Config/config.h.in b/include/llvm/Config/config.h.in
index eee2f946148b..ac60f4dc3a50 100644
--- a/include/llvm/Config/config.h.in
+++ b/include/llvm/Config/config.h.in
@@ -285,6 +285,9 @@
 /* Have pthread_mutex_lock */
 #undef HAVE_PTHREAD_MUTEX_LOCK
 
+/* Have pthread_rwlock_init */
+#undef HAVE_PTHREAD_RWLOCK_INIT
+
 /* Define to 1 if srand48/lrand48/drand48 exist in <stdlib.h> */
 #undef HAVE_RAND48
 
@@ -473,6 +476,9 @@
 /* Build multithreading support into LLVM */
 #undef LLVM_MULTITHREADED
 
+/* LLVM architecture name for the native architecture, if available */
+#undef LLVM_NATIVE_ARCH
+
 /* Define if this is Unixish platform */
 #undef LLVM_ON_UNIX
 
diff --git a/include/llvm/Constants.h b/include/llvm/Constants.h
index ed0fe2740f92..52fff2020c74 100644
--- a/include/llvm/Constants.h
+++ b/include/llvm/Constants.h
@@ -102,19 +102,28 @@ public:
     return CreateTrueFalseVals(false);
   }
 
-  /// Return a ConstantInt with the specified value for the specified type. The
-  /// value V will be canonicalized to an unsigned APInt. Accessing it with
-  /// either getSExtValue() or getZExtValue() will yield a correctly sized and
-  /// signed value for the type Ty.
+  /// Return a ConstantInt with the specified integer value for the specified
+  /// type. If the type is wider than 64 bits, the value will be zero-extended
+  /// to fit the type, unless isSigned is true, in which case the value will
+  /// be interpreted as a 64-bit signed integer and sign-extended to fit
+  /// the type.
   /// @brief Get a ConstantInt for a specific value.
-  static ConstantInt *get(const Type *Ty, uint64_t V, bool isSigned = false);
+  static ConstantInt *get(const IntegerType *Ty,
+                          uint64_t V, bool isSigned = false);
+
+  /// If Ty is a vector type, return a Constant with a splat of the given
+  /// value. Otherwise return a ConstantInt for the given value.
+  static Constant *get(const Type *Ty, uint64_t V, bool isSigned = false);
 
   /// Return a ConstantInt with the specified value for the specified type. The
   /// value V will be canonicalized to a an unsigned APInt. Accessing it with
   /// either getSExtValue() or getZExtValue() will yield a correctly sized and
   /// signed value for the type Ty.
   /// @brief Get a ConstantInt for a specific signed value.
-  static ConstantInt *getSigned(const Type *Ty, int64_t V) {
+  static ConstantInt *getSigned(const IntegerType *Ty, int64_t V) {
+    return get(Ty, V, true);
+  }
+  static Constant *getSigned(const Type *Ty, int64_t V) {
     return get(Ty, V, true);
   }
 
@@ -122,6 +131,10 @@ public:
   /// type is the integer type that corresponds to the bit width of the value.
   static ConstantInt *get(const APInt &V);
 
+  /// If Ty is a vector type, return a Constant with a splat of the given
+  /// value. Otherwise return a ConstantInt for the given value.
+  static Constant *get(const Type *Ty, const APInt &V);
+
   /// getType - Specialize the getType() method to always return an IntegerType,
   /// which reduces the amount of casting needed in parts of the compiler.
   ///
@@ -248,10 +261,11 @@ public:
   /// get() - Static factory methods - Return objects of the specified value
   static ConstantFP *get(const APFloat &V);
 
-  /// get() - This returns a constant fp for the specified value in the
-  /// specified type.  This should only be used for simple constant values like
-  /// 2.0/1.0 etc, that are known-valid both as double and as the target format.
-  static ConstantFP *get(const Type *Ty, double V);
+  /// get() - This returns a ConstantFP, or a vector containing a splat of a
+  /// ConstantFP, for the specified value in the specified type.  This should
+  /// only be used for simple constant values like 2.0/1.0 etc, that are
+  /// known-valid both as host double and as the target format.
+  static Constant *get(const Type *Ty, double V);
 
   /// isValueValidForType - return true if Ty is big enough to represent V.
   static bool isValueValidForType(const Type *Ty, const APFloat& V);
diff --git a/include/llvm/DerivedTypes.h b/include/llvm/DerivedTypes.h
index fa951bf70d05..b5824f8f8156 100644
--- a/include/llvm/DerivedTypes.h
+++ b/include/llvm/DerivedTypes.h
@@ -50,6 +50,10 @@ protected:
   ///
   void dropAllTypeUses();
 
+  /// unlockedRefineAbstractTypeTo - Internal version of refineAbstractTypeTo
+  /// that performs no locking.  Only used for internal recursion.
+  void unlockedRefineAbstractTypeTo(const Type *NewType);
+  
 public:
 
   //===--------------------------------------------------------------------===//
diff --git a/include/llvm/MC/MCInst.h b/include/llvm/MC/MCInst.h
new file mode 100644
index 000000000000..cadc23ab7109
--- /dev/null
+++ b/include/llvm/MC/MCInst.h
@@ -0,0 +1,125 @@
+//===-- llvm/MC/MCInst.h - MCInst class -------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MCInst and MCOperand classes, which
+// is the basic representation used to represent low-level machine code
+// instructions.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_MC_MCINST_H
+#define LLVM_MC_MCINST_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/DebugLoc.h"
+
+namespace llvm {
+
+/// MCOperand - Instances of this class represent operands of the MCInst class.
+/// This is a simple discriminated union.
+class MCOperand {
+  enum MachineOperandType {
+    kInvalid,                 ///< Uninitialized.
+    kRegister,                ///< Register operand.
+    kImmediate,               ///< Immediate operand.
+    kMBBLabel                 ///< Basic block label.
+  };
+  unsigned char Kind;
+  
+  union {
+    unsigned RegVal;
+    int64_t ImmVal;
+    struct {
+      unsigned FunctionNo;
+      unsigned BlockNo;
+    } MBBLabel;
+  };
+public:
+  
+  MCOperand() : Kind(kInvalid) {}
+  MCOperand(const MCOperand &RHS) { *this = RHS; }
+
+  bool isReg() const { return Kind == kRegister; }
+  bool isImm() const { return Kind == kImmediate; }
+  bool isMBBLabel() const { return Kind == kMBBLabel; }
+  
+  /// getReg - Returns the register number.
+  unsigned getReg() const {
+    assert(isReg() && "This is not a register operand!");
+    return RegVal;
+  }
+
+  /// setReg - Set the register number.
+  void setReg(unsigned Reg) {
+    assert(isReg() && "This is not a register operand!");
+    RegVal = Reg;
+  }
+  
+  int64_t getImm() const {
+    assert(isImm() && "This is not an immediate");
+    return ImmVal;
+  }
+  void setImm(int64_t Val) {
+    assert(isImm() && "This is not an immediate");
+    ImmVal = Val;
+  }
+  
+  unsigned getMBBLabelFunction() const {
+    assert(isMBBLabel() && "Wrong accessor");
+    return MBBLabel.FunctionNo; 
+  }
+  unsigned getMBBLabelBlock() const {
+    assert(isMBBLabel() && "Wrong accessor");
+    return MBBLabel.BlockNo; 
+  }
+  
+  void MakeReg(unsigned Reg) {
+    Kind = kRegister;
+    RegVal = Reg;
+  }
+  void MakeImm(int64_t Val) {
+    Kind = kImmediate;
+    ImmVal = Val;
+  }
+  void MakeMBBLabel(unsigned Fn, unsigned MBB) {
+    Kind = kMBBLabel;
+    MBBLabel.FunctionNo = Fn;
+    MBBLabel.BlockNo = MBB;
+  }
+};
+
+  
+/// MCInst - Instances of this class represent a single low-level machine
+/// instruction. 
+class MCInst {
+  unsigned Opcode;
+  SmallVector<MCOperand, 8> Operands;
+public:
+  MCInst() : Opcode(~0U) {}
+  
+  void setOpcode(unsigned Op) { Opcode = Op; }
+  
+  unsigned getOpcode() const { return Opcode; }
+  DebugLoc getDebugLoc() const { return DebugLoc(); }
+  
+  const MCOperand &getOperand(unsigned i) const { return Operands[i]; }
+  MCOperand &getOperand(unsigned i) { return Operands[i]; }
+  
+  void addOperand(const MCOperand &Op) {
+    Operands.push_back(Op);
+  }
+  
+};
+
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Support/CommandLine.h b/include/llvm/Support/CommandLine.h
index fa3b8701d349..3ae50136e4a9 100644
--- a/include/llvm/Support/CommandLine.h
+++ b/include/llvm/Support/CommandLine.h
@@ -539,7 +539,7 @@ template<>
 class parser<bool> : public basic_parser<bool> {
   const char *ArgStr;
 public:
-  
+
   // parse - Return true on error.
   bool parse(Option &O, const char *ArgName, const std::string &Arg, bool &Val);
 
@@ -1105,7 +1105,7 @@ public:
   }
 };
 
-// multi_arg - Modifier to set the number of additional values.
+// multi_val - Modifier to set the number of additional values.
 struct multi_val {
   unsigned AdditionalVals;
   explicit multi_val(unsigned N) : AdditionalVals(N) {}
diff --git a/include/llvm/CodeGen/DebugLoc.h b/include/llvm/Support/DebugLoc.h
index 77e6733f696a..5c089efc98ce 100644
--- a/include/llvm/CodeGen/DebugLoc.h
+++ b/include/llvm/Support/DebugLoc.h
@@ -1,4 +1,4 @@
-//===---- llvm/CodeGen/DebugLoc.h - Debug Location Information --*- C++ -*-===//
+//===---- llvm/DebugLoc.h - Debug Location Information ----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,13 +7,13 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines a number of light weight data structures used by the code
-// generator to describe and track debug location information.
+// This file defines a number of light weight data structures used
+// to describe and track debug location information.
 // 
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_CODEGEN_DEBUGLOC_H
-#define LLVM_CODEGEN_DEBUGLOC_H
+#ifndef LLVM_DEBUGLOC_H
+#define LLVM_DEBUGLOC_H
 
 #include "llvm/ADT/DenseMap.h"
 #include <vector>
@@ -98,4 +98,4 @@ namespace llvm {
   
 } // end namespace llvm
 
-#endif /* LLVM_CODEGEN_DEBUGLOC_H */
+#endif /* LLVM_DEBUGLOC_H */
diff --git a/include/llvm/Support/IRBuilder.h b/include/llvm/Support/IRBuilder.h
index 7942de7857a2..ed6a3f19ef7a 100644
--- a/include/llvm/Support/IRBuilder.h
+++ b/include/llvm/Support/IRBuilder.h
@@ -17,6 +17,7 @@
 
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
+#include "llvm/GlobalAlias.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/Function.h"
 #include "llvm/Support/ConstantFolder.h"
@@ -202,7 +203,7 @@ public:
   Value *CreateFMul(Value *LHS, Value *RHS, const char *Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Folder.CreateMul(LC, RC);
+        return Folder.CreateFMul(LC, RC);
     return Insert(BinaryOperator::CreateFMul(LHS, RHS), Name);
   }
   Value *CreateUDiv(Value *LHS, Value *RHS, const char *Name = "") {
@@ -291,6 +292,11 @@ public:
       return Folder.CreateNeg(VC);
     return Insert(BinaryOperator::CreateNeg(V), Name);
   }
+  Value *CreateFNeg(Value *V, const char *Name = "") {
+    if (Constant *VC = dyn_cast<Constant>(V))
+      return Folder.CreateFNeg(VC);
+    return Insert(BinaryOperator::CreateFNeg(V), Name);
+  }
   Value *CreateNot(Value *V, const char *Name = "") {
     if (Constant *VC = dyn_cast<Constant>(V))
       return Folder.CreateNot(VC);
diff --git a/include/llvm/Support/ManagedStatic.h b/include/llvm/Support/ManagedStatic.h
index 619cc2055250..4fc648319ad4 100644
--- a/include/llvm/Support/ManagedStatic.h
+++ b/include/llvm/Support/ManagedStatic.h
@@ -15,6 +15,7 @@
 #define LLVM_SUPPORT_MANAGED_STATIC_H
 
 #include "llvm/System/Atomic.h"
+#include "llvm/System/Threading.h"
 
 namespace llvm {
 
@@ -60,28 +61,28 @@ public:
   // Accessors.
   C &operator*() {
     void* tmp = Ptr;
-    sys::MemoryFence();
+    if (llvm_is_multithreaded()) sys::MemoryFence();
     if (!tmp) RegisterManagedStatic(object_creator<C>, object_deleter<C>);
 
     return *static_cast<C*>(Ptr);
   }
   C *operator->() {
     void* tmp = Ptr;
-    sys::MemoryFence();
+    if (llvm_is_multithreaded()) sys::MemoryFence();
     if (!tmp) RegisterManagedStatic(object_creator<C>, object_deleter<C>);
 
     return static_cast<C*>(Ptr);
   }
   const C &operator*() const {
     void* tmp = Ptr;
-    sys::MemoryFence();
+    if (llvm_is_multithreaded()) sys::MemoryFence();
     if (!tmp) RegisterManagedStatic(object_creator<C>, object_deleter<C>);
 
     return *static_cast<C*>(Ptr);
   }
   const C *operator->() const {
     void* tmp = Ptr;
-    sys::MemoryFence();
+    if (llvm_is_multithreaded()) sys::MemoryFence();
     if (!tmp) RegisterManagedStatic(object_creator<C>, object_deleter<C>);
 
     return static_cast<C*>(Ptr);
@@ -94,13 +95,6 @@ public:
   void Register() { RegisterManagedStatic(0, CleanupFn); }
 };
 
-
-/// llvm_start_multithreaded - Allocate and initialize structures needed to
-/// make LLVM safe for multithreading.  The return value indicates whether
-/// multithreaded initialization succeeded.  LLVM will still be operational
-/// on "failed" return, but will not be safe to run multithreaded.
-bool llvm_start_multithreaded();
-
 /// llvm_shutdown - Deallocate and destroy all ManagedStatic variables.
 void llvm_shutdown();
 
diff --git a/utils/TableGen/TGSourceMgr.h b/include/llvm/Support/SourceMgr.h
index 69fb74ca20c6..25775cb74419 100644
--- a/utils/TableGen/TGSourceMgr.h
+++ b/include/llvm/Support/SourceMgr.h
@@ -1,4 +1,4 @@
-//===- TGSourceMgr.h - Manager for Source Buffers & Diagnostics -*- C++ -*-===//
+//===- SourceMgr.h - Manager for Source Buffers & Diagnostics ---*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,12 +7,14 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares the TGSourceMgr class.
+// This file declares the SourceMgr class.  This class is used as a simple
+// substrate for diagnostics, #include handling, and other low level things for
+// simple parsers.
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef TGSOURCEMGR_H
-#define TGSOURCEMGR_H
+#ifndef SUPPORT_SOURCEMGR_H
+#define SUPPORT_SOURCEMGR_H
 
 #include <string>
 #include <vector>
@@ -20,46 +22,54 @@
 
 namespace llvm {
   class MemoryBuffer;
-  class TGSourceMgr;
+  class SourceMgr;
   
-class TGLoc {
+class SMLoc {
   const char *Ptr;
 public:
-  TGLoc() : Ptr(0) {}
-  TGLoc(const TGLoc &RHS) : Ptr(RHS.Ptr) {}
+  SMLoc() : Ptr(0) {}
+  SMLoc(const SMLoc &RHS) : Ptr(RHS.Ptr) {}
   
-  bool operator==(const TGLoc &RHS) const { return RHS.Ptr == Ptr; }
-  bool operator!=(const TGLoc &RHS) const { return RHS.Ptr != Ptr; }
+  bool operator==(const SMLoc &RHS) const { return RHS.Ptr == Ptr; }
+  bool operator!=(const SMLoc &RHS) const { return RHS.Ptr != Ptr; }
 
   const char *getPointer() const { return Ptr; }
   
-  static TGLoc getFromPointer(const char *Ptr) {
-    TGLoc L;
+  static SMLoc getFromPointer(const char *Ptr) {
+    SMLoc L;
     L.Ptr = Ptr;
     return L;
   }
 };
 
-/// TGSourceMgr - This owns the files read by tblgen, handles include stacks,
+/// SourceMgr - This owns the files read by tblgen, handles include stacks,
 /// and handles printing of diagnostics.
-class TGSourceMgr {
+class SourceMgr {
   struct SrcBuffer {
     /// Buffer - The memory buffer for the file.
     MemoryBuffer *Buffer;
     
     /// IncludeLoc - This is the location of the parent include, or null if at
     /// the top level.
-    TGLoc IncludeLoc;
+    SMLoc IncludeLoc;
   };
   
   /// Buffers - This is all of the buffers that we are reading from.
   std::vector<SrcBuffer> Buffers;
   
-  TGSourceMgr(const TGSourceMgr&);    // DO NOT IMPLEMENT
-  void operator=(const TGSourceMgr&); // DO NOT IMPLEMENT
+  // IncludeDirectories - This is the list of directories we should search for
+  // include files in.
+  std::vector<std::string> IncludeDirectories;
+  
+  SourceMgr(const SourceMgr&);    // DO NOT IMPLEMENT
+  void operator=(const SourceMgr&); // DO NOT IMPLEMENT
 public:
-  TGSourceMgr() {}
-  ~TGSourceMgr();
+  SourceMgr() {}
+  ~SourceMgr();
+  
+  void setIncludeDirs(const std::vector<std::string> &Dirs) {
+    IncludeDirectories = Dirs;
+  }
   
   const SrcBuffer &getBufferInfo(unsigned i) const {
     assert(i < Buffers.size() && "Invalid Buffer ID!");
@@ -71,12 +81,12 @@ public:
     return Buffers[i].Buffer;
   }
   
-  TGLoc getParentIncludeLoc(unsigned i) const {
+  SMLoc getParentIncludeLoc(unsigned i) const {
     assert(i < Buffers.size() && "Invalid Buffer ID!");
     return Buffers[i].IncludeLoc;
   }
   
-  unsigned AddNewSourceBuffer(MemoryBuffer *F, TGLoc IncludeLoc) {
+  unsigned AddNewSourceBuffer(MemoryBuffer *F, SMLoc IncludeLoc) {
     SrcBuffer NB;
     NB.Buffer = F;
     NB.IncludeLoc = IncludeLoc;
@@ -84,21 +94,25 @@ public:
     return Buffers.size()-1;
   }
   
+  /// AddIncludeFile - Search for a file with the specified name in the current
+  /// directory or in one of the IncludeDirs.  If no file is found, this returns
+  /// ~0, otherwise it returns the buffer ID of the stacked file.
+  unsigned AddIncludeFile(const std::string &Filename, SMLoc IncludeLoc);
+  
   /// FindBufferContainingLoc - Return the ID of the buffer containing the
   /// specified location, returning -1 if not found.
-  int FindBufferContainingLoc(TGLoc Loc) const;
+  int FindBufferContainingLoc(SMLoc Loc) const;
   
   /// FindLineNumber - Find the line number for the specified location in the
   /// specified file.  This is not a fast method.
-  unsigned FindLineNumber(TGLoc Loc, int BufferID = -1) const;
-  
+  unsigned FindLineNumber(SMLoc Loc, int BufferID = -1) const;
   
-  /// PrintError - Emit an error message about the specified location with the
+  /// PrintMessage - Emit a message about the specified location with the
   /// specified string.
-  void PrintError(TGLoc ErrorLoc, const std::string &Msg) const;
+  void PrintMessage(SMLoc Loc, const std::string &Msg) const;
   
 private:
-  void PrintIncludeStack(TGLoc IncludeLoc) const;
+  void PrintIncludeStack(SMLoc IncludeLoc) const;
 };
   
 }  // end llvm namespace
diff --git a/include/llvm/System/Atomic.h b/include/llvm/System/Atomic.h
index cb9277cc35ec..adbb975298e8 100644
--- a/include/llvm/System/Atomic.h
+++ b/include/llvm/System/Atomic.h
@@ -24,6 +24,8 @@ namespace llvm {
     cas_flag CompareAndSwap(volatile cas_flag* ptr,
                             cas_flag new_value,
                             cas_flag old_value);
+    cas_flag AtomicIncrement(volatile cas_flag* ptr);
+    cas_flag AtomicDecrement(volatile cas_flag* ptr);
   }
 }
 
diff --git a/include/llvm/System/Mutex.h b/include/llvm/System/Mutex.h
index 4f3849341aa1..d2c457dbc91c 100644
--- a/include/llvm/System/Mutex.h
+++ b/include/llvm/System/Mutex.h
@@ -14,12 +14,15 @@
 #ifndef LLVM_SYSTEM_MUTEX_H
 #define LLVM_SYSTEM_MUTEX_H
 
+#include "llvm/System/Threading.h"
+#include <cassert>
+
 namespace llvm
 {
   namespace sys
   {
     /// @brief Platform agnostic Mutex class.
-    class Mutex
+    class MutexImpl
     {
     /// @name Constructors
     /// @{
@@ -30,11 +33,11 @@ namespace llvm
       /// also more likely to deadlock (same thread can't acquire more than
       /// once).
       /// @brief Default Constructor.
-      explicit Mutex(bool recursive = true);
+      explicit MutexImpl(bool recursive = true);
 
       /// Releases and removes the lock
       /// @brief Destructor
-      ~Mutex();
+      ~MutexImpl();
 
     /// @}
     /// @name Methods
@@ -66,18 +69,81 @@ namespace llvm
     /// @name Platform Dependent Data
     /// @{
     private:
-#ifdef ENABLE_THREADS
       void* data_; ///< We don't know what the data will be
-#endif
 
     /// @}
     /// @name Do Not Implement
     /// @{
     private:
-      Mutex(const Mutex & original);
-      void operator=(const Mutex &);
+      MutexImpl(const MutexImpl & original);
+      void operator=(const MutexImpl &);
     /// @}
     };
+    
+    
+    /// SmartMutex - A mutex with a compile time constant parameter that 
+    /// indicates whether this mutex should become a no-op when we're not
+    /// running in multithreaded mode.
+    template<bool mt_only>
+    class SmartMutex : public MutexImpl {
+      unsigned acquired;
+      bool recursive;
+    public:
+      explicit SmartMutex(bool rec = true) :
+        MutexImpl(rec), acquired(0), recursive(rec) { }
+      
+      bool acquire() {
+        if (!mt_only || llvm_is_multithreaded())
+          return MutexImpl::acquire();
+        
+        // Single-threaded debugging code.  This would be racy in multithreaded
+        // mode, but provides not sanity checks in single threaded mode.
+        assert((recursive || acquired == 0) && "Lock already acquired!!");
+        ++acquired;
+        return true;
+      }
+
+      bool release() {
+        if (!mt_only || llvm_is_multithreaded())
+          return MutexImpl::release();
+        
+        // Single-threaded debugging code.  This would be racy in multithreaded
+        // mode, but provides not sanity checks in single threaded mode.
+        assert(((recursive && acquired) || (acquired == 1)) &&
+               "Lock not acquired before release!");
+        --acquired;
+        return true;
+      }
+
+      bool tryacquire() {
+        if (!mt_only || llvm_is_multithreaded())
+          return MutexImpl::tryacquire();
+        return true;
+      }
+      
+      private:
+        SmartMutex(const SmartMutex<mt_only> & original);
+        void operator=(const SmartMutex<mt_only> &);
+    };
+    
+    /// Mutex - A standard, always enforced mutex.
+    typedef SmartMutex<false> Mutex;
+    
+    template<bool mt_only>
+    class SmartScopedLock  {
+      SmartMutex<mt_only>* mtx;
+      
+    public:
+      SmartScopedLock(SmartMutex<mt_only>* m) : mtx(m) {
+        mtx->acquire();
+      }
+      
+      ~SmartScopedLock() {
+        mtx->release();
+      }
+    };
+    
+    typedef SmartScopedLock<false> ScopedLock;
   }
 }
 
diff --git a/include/llvm/System/Path.h b/include/llvm/System/Path.h
index de2f173ae417..05be2212758b 100644
--- a/include/llvm/System/Path.h
+++ b/include/llvm/System/Path.h
@@ -309,6 +309,11 @@ namespace sys {
       /// @brief Determine if the path is absolute.
       bool isAbsolute() const;
 
+      /// This function determines if the path name is absolute, as opposed to
+      /// relative.
+      /// @brief Determine if the path is absolute.
+      static bool isAbsolute(const char *NameStart, unsigned NameLen);
+
       /// This function opens the file associated with the path name provided by
       /// the Path object and reads its magic number. If the magic number at the
       /// start of the file matches \p magic, true is returned. In all other
diff --git a/include/llvm/System/RWMutex.h b/include/llvm/System/RWMutex.h
new file mode 100644
index 000000000000..e577d457afb5
--- /dev/null
+++ b/include/llvm/System/RWMutex.h
@@ -0,0 +1,175 @@
+//===- RWMutex.h - Reader/Writer Mutual Exclusion Lock ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the llvm::sys::RWMutex class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_RWMUTEX_H
+#define LLVM_SYSTEM_RWMUTEX_H
+
+#include "llvm/System/Threading.h"
+#include <cassert>
+
+namespace llvm
+{
+  namespace sys
+  {
+    /// @brief Platform agnostic RWMutex class.
+    class RWMutexImpl
+    {
+    /// @name Constructors
+    /// @{
+    public:
+
+      /// Initializes the lock but doesn't acquire it.
+      /// @brief Default Constructor.
+      explicit RWMutexImpl();
+
+      /// Releases and removes the lock
+      /// @brief Destructor
+      ~RWMutexImpl();
+
+    /// @}
+    /// @name Methods
+    /// @{
+    public:
+
+      /// Attempts to unconditionally acquire the lock in reader mode. If the
+      /// lock is held by a writer, this method will wait until it can acquire
+      /// the lock. 
+      /// @returns false if any kind of error occurs, true otherwise.
+      /// @brief Unconditionally acquire the lock in reader mode.
+      bool reader_acquire();
+
+      /// Attempts to release the lock in reader mode.
+      /// @returns false if any kind of error occurs, true otherwise.
+      /// @brief Unconditionally release the lock in reader mode.
+      bool reader_release();
+
+      /// Attempts to unconditionally acquire the lock in reader mode. If the
+      /// lock is held by any readers, this method will wait until it can
+      /// acquire the lock. 
+      /// @returns false if any kind of error occurs, true otherwise.
+      /// @brief Unconditionally acquire the lock in writer mode.
+      bool writer_acquire();
+
+      /// Attempts to release the lock in writer mode.
+      /// @returns false if any kind of error occurs, true otherwise.
+      /// @brief Unconditionally release the lock in write mode.
+      bool writer_release();
+
+    //@}
+    /// @name Platform Dependent Data
+    /// @{
+    private:
+      void* data_; ///< We don't know what the data will be
+
+    /// @}
+    /// @name Do Not Implement
+    /// @{
+    private:
+      RWMutexImpl(const RWMutexImpl & original);
+      void operator=(const RWMutexImpl &);
+    /// @}
+    };
+    
+    /// SmartMutex - An R/W mutex with a compile time constant parameter that 
+    /// indicates whether this mutex should become a no-op when we're not
+    /// running in multithreaded mode.
+    template<bool mt_only>
+    class SmartRWMutex : public RWMutexImpl {
+      unsigned readers, writers;
+    public:
+      explicit SmartRWMutex() : RWMutexImpl(), readers(0), writers(0) { }
+      
+      bool reader_acquire() {
+        if (!mt_only || llvm_is_multithreaded())
+          return RWMutexImpl::reader_acquire();
+        
+        // Single-threaded debugging code.  This would be racy in multithreaded
+        // mode, but provides not sanity checks in single threaded mode.
+        ++readers;
+        return true;
+      }
+      
+      bool reader_release() {
+        if (!mt_only || llvm_is_multithreaded())
+          return RWMutexImpl::reader_release();
+        
+        // Single-threaded debugging code.  This would be racy in multithreaded
+        // mode, but provides not sanity checks in single threaded mode.
+        assert(readers > 0 && "Reader lock not acquired before release!");
+        --readers;
+        return true;
+      }
+      
+      bool writer_acquire() {
+        if (!mt_only || llvm_is_multithreaded())
+          return RWMutexImpl::writer_acquire();
+        
+        // Single-threaded debugging code.  This would be racy in multithreaded
+        // mode, but provides not sanity checks in single threaded mode.
+        assert(writers == 0 && "Writer lock already acquired!");
+        ++writers;
+        return true;
+      }
+      
+      bool writer_release() {
+        if (!mt_only || llvm_is_multithreaded())
+          return RWMutexImpl::writer_release();
+        
+        // Single-threaded debugging code.  This would be racy in multithreaded
+        // mode, but provides not sanity checks in single threaded mode.
+        assert(writers == 1 && "Writer lock not acquired before release!");
+        --writers;
+        return true;
+      }
+      
+    private:
+      SmartRWMutex(const SmartRWMutex<mt_only> & original);
+      void operator=(const SmartRWMutex<mt_only> &);
+    };
+    typedef SmartRWMutex<false> RWMutex;
+    
+    /// ScopedReader - RAII acquisition of a reader lock
+    template<bool mt_only>
+    struct SmartScopedReader {
+      SmartRWMutex<mt_only>* mutex;
+      
+      explicit SmartScopedReader(SmartRWMutex<mt_only>* m) {
+        mutex = m;
+        mutex->reader_acquire();
+      }
+      
+      ~SmartScopedReader() {
+        mutex->reader_release();
+      }
+    };
+    typedef SmartScopedReader<false> ScopedReader;
+    
+    /// ScopedWriter - RAII acquisition of a writer lock
+    template<bool mt_only>
+    struct SmartScopedWriter {
+      SmartRWMutex<mt_only>* mutex;
+      
+      explicit SmartScopedWriter(SmartRWMutex<mt_only>* m) {
+        mutex = m;
+        mutex->writer_acquire();
+      }
+      
+      ~SmartScopedWriter() {
+        mutex->writer_release();
+      }
+    };
+    typedef SmartScopedWriter<false> ScopedWriter;
+  }
+}
+
+#endif
diff --git a/include/llvm/System/Threading.h b/include/llvm/System/Threading.h
new file mode 100644
index 000000000000..42d2f89bcb82
--- /dev/null
+++ b/include/llvm/System/Threading.h
@@ -0,0 +1,45 @@
+//===-- llvm/System/Threading.h - Control multithreading mode --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TThis file defines llvm_start_multithreaded() and friends.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_THREADING_H
+#define LLVM_SYSTEM_THREADING_H
+
+namespace llvm {
+  /// llvm_start_multithreaded - Allocate and initialize structures needed to
+  /// make LLVM safe for multithreading.  The return value indicates whether
+  /// multithreaded initialization succeeded.  LLVM will still be operational
+  /// on "failed" return, and will still be safe for hosting threading 
+  /// applications in the JIT, but will not be safe for concurrent calls to the
+  /// LLVM APIs.
+  /// THIS MUST EXECUTE IN ISOLATION FROM ALL OTHER LLVM API CALLS.
+  bool llvm_start_multithreaded();
+  
+  /// llvm_stop_multithreaded - Deallocate structures necessary to make LLVM
+  /// safe for multithreading.
+  /// THIS MUST EXECUTE IN ISOLATION FROM ALL OTHER LLVM API CALLS.
+  void llvm_stop_multithreaded();
+  
+  /// llvm_is_multithreaded - Check whether LLVM is executing in thread-safe
+  /// mode or not.
+  bool llvm_is_multithreaded();
+  
+  /// acquire_global_lock - Acquire the global lock.  This is a no-op if called
+  /// before llvm_start_multithreaded().
+  void llvm_acquire_global_lock();
+  
+  /// release_global_lock - Release the global lock.  This is a no-op if called
+  /// before llvm_start_multithreaded().
+  void llvm_release_global_lock();
+}
+
+#endif
diff --git a/include/llvm/Target/DarwinTargetAsmInfo.h b/include/llvm/Target/DarwinTargetAsmInfo.h
index 6241ffe29b8f..171a6b3e1177 100644
--- a/include/llvm/Target/DarwinTargetAsmInfo.h
+++ b/include/llvm/Target/DarwinTargetAsmInfo.h
@@ -23,7 +23,7 @@ namespace llvm {
   class Type;
   class Mangler;
 
-  struct DarwinTargetAsmInfo: public TargetAsmInfo {
+  struct DarwinTargetAsmInfo : public TargetAsmInfo {
     const Section* TextCoalSection;
     const Section* ConstTextCoalSection;
     const Section* ConstDataCoalSection;
diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td
index 3f1cdd27ca39..ebd826a6f4a1 100644
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@@ -274,6 +274,7 @@ def unknown;
 class Operand<ValueType ty> {
   ValueType Type = ty;
   string PrintMethod = "printOperand";
+  string AsmOperandLowerMethod = ?;
   dag MIOperandInfo = (ops);
 }
 
diff --git a/include/llvm/Target/TargetAsmInfo.h b/include/llvm/Target/TargetAsmInfo.h
index f223f4765f98..670b0996cc35 100644
--- a/include/llvm/Target/TargetAsmInfo.h
+++ b/include/llvm/Target/TargetAsmInfo.h
@@ -130,7 +130,6 @@ namespace llvm {
   private:
     mutable StringMap<Section> Sections;
     mutable SectionFlags::FlagsStringsMapType FlagsStrings;
-    void fillDefaultValues();
   protected:
     /// TM - The current TargetMachine.
     const TargetMachine &TM;
@@ -278,6 +277,10 @@ namespace llvm {
     /// use '\1' as the first character.
     const char *StringConstantPrefix;     // Defaults to ".str"
 
+    /// AllowQuotesInName - This is true if the assembler allows for complex
+    /// symbol names to be surrounded in quotes.  This defaults to false.
+    bool AllowQuotesInName;
+    
     //===--- Data Emission Directives -------------------------------------===//
 
     /// ZeroDirective - this should be set to the directive used to get some
@@ -308,8 +311,7 @@ namespace llvm {
     /// directives for various sizes and non-default address spaces.
     virtual const char *getASDirective(unsigned size, 
                                        unsigned AS) const {
-      assert (AS > 0 
-              && "Dont know the directives for default addr space");
+      assert(AS > 0 && "Dont know the directives for default addr space");
       return NULL;
     }
 
@@ -472,10 +474,6 @@ namespace llvm {
     /// encode inline subroutine information.
     bool DwarfUsesInlineInfoSection; // Defaults to false.
 
-    /// SupportsMacInfo - true if the Dwarf output supports macro information
-    ///
-    bool SupportsMacInfoSection;            // Defaults to true
-
     /// NonLocalEHFrameLabel - If set, the EH_frame label needs to be non-local.
     ///
     bool NonLocalEHFrameLabel;              // Defaults to false.
@@ -536,9 +534,9 @@ namespace llvm {
     ///
     const char *DwarfRangesSection; // Defaults to ".debug_ranges".
 
-    /// DwarfMacInfoSection - Section directive for Dwarf info.
+    /// DwarfMacroInfoSection - Section directive for DWARF macro info.
     ///
-    const char *DwarfMacInfoSection; // Defaults to ".debug_macinfo".
+    const char *DwarfMacroInfoSection; // Defaults to ".debug_macinfo".
     
     /// DwarfEHFrameSection - Section directive for Exception frames.
     ///
@@ -749,6 +747,9 @@ namespace llvm {
     const char *getStringConstantPrefix() const {
       return StringConstantPrefix;
     }
+    bool doesAllowQuotesInName() const {
+      return AllowQuotesInName;
+    }
     const char *getZeroDirective() const {
       return ZeroDirective;
     }
@@ -866,9 +867,6 @@ namespace llvm {
     bool doesDwarfUsesInlineInfoSection() const {
       return DwarfUsesInlineInfoSection;
     }
-    bool doesSupportMacInfoSection() const {
-      return SupportsMacInfoSection;
-    }
     bool doesRequireNonLocalEHFrameLabel() const {
       return NonLocalEHFrameLabel;
     }
@@ -914,8 +912,8 @@ namespace llvm {
     const char *getDwarfRangesSection() const {
       return DwarfRangesSection;
     }
-    const char *getDwarfMacInfoSection() const {
-      return DwarfMacInfoSection;
+    const char *getDwarfMacroInfoSection() const {
+      return DwarfMacroInfoSection;
     }
     const char *getDwarfEHFrameSection() const {
       return DwarfEHFrameSection;
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 47dcc6c8e48f..40b0e7be480b 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -30,7 +30,7 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/CodeGen/DebugLoc.h"
+#include "llvm/Support/DebugLoc.h"
 #include "llvm/Target/TargetMachine.h"
 #include <climits>
 #include <map>
diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h
index a8db68c59789..33fc45161a6e 100644
--- a/include/llvm/Target/TargetMachine.h
+++ b/include/llvm/Target/TargetMachine.h
@@ -78,11 +78,13 @@ namespace CodeGenOpt {
   };
 }
 
+
+// Possible float ABI settings. Used with FloatABIType in TargetOptions.h.
 namespace FloatABI {
   enum ABIType {
-    Default,
-    Soft,
-    Hard
+    Default, // Target-specific (either soft of hard depending on triple, etc).
+    Soft, // Soft float.
+    Hard  // Hard float.
   };
 }
 
diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h
index 0218bfdb2ae3..91e8f80fd108 100644
--- a/include/llvm/Target/TargetRegisterInfo.h
+++ b/include/llvm/Target/TargetRegisterInfo.h
@@ -519,6 +519,36 @@ public:
     return NULL;
   }
 
+  /// getAllocationOrder - Returns the register allocation order for a specified
+  /// register class in the form of a pair of TargetRegisterClass iterators.
+  virtual std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
+  getAllocationOrder(const TargetRegisterClass *RC,
+                     unsigned HintType, unsigned HintReg,
+                     const MachineFunction &MF) const {
+    return std::make_pair(RC->allocation_order_begin(MF),
+                          RC->allocation_order_end(MF));
+  }
+
+  /// ResolveRegAllocHint - Resolves the specified register allocation hint
+  /// to a physical register. Returns the physical register if it is successful.
+  virtual unsigned ResolveRegAllocHint(unsigned Type, unsigned Reg,
+                                       const MachineFunction &MF) const {
+    if (Type == 0 && Reg && isPhysicalRegister(Reg))
+      return Reg;
+    return 0;
+  }
+
+  /// UpdateRegAllocHint - A callback to allow target a chance to update
+  /// register allocation hints when a register is "changed" (e.g. coalesced)
+  /// to another register. e.g. On ARM, some virtual registers should target
+  /// register pairs, if one of pair is coalesced to another register, the
+  /// allocation hint of the other half of the pair should be changed to point
+  /// to the new register.
+  virtual void UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
+                                  MachineFunction &MF) const {
+    // Do nothing.
+  }
+
   /// targetHandlesStackFrameRounding - Returns true if the target is
   /// responsible for rounding up the stack frame (probably at emitPrologue
   /// time).
diff --git a/include/llvm/Target/TargetSelect.h b/include/llvm/Target/TargetSelect.h
new file mode 100644
index 000000000000..8544eed76be7
--- /dev/null
+++ b/include/llvm/Target/TargetSelect.h
@@ -0,0 +1,65 @@
+//===- TargetSelect.h - Target Selection & Registration -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides utilities to make sure that certain classes of targets are
+// linked into the main application executable, and initialize them as
+// appropriate.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGETSELECT_H
+#define LLVM_TARGET_TARGETSELECT_H
+
+#include "llvm/Config/config.h"
+
+namespace llvm {
+  // Declare all of the target-initialization functions that are available.
+#define LLVM_TARGET(TargetName) void Initialize##TargetName##Target();
+#include "llvm/Config/Targets.def"
+  
+  // Declare all of the available asm-printer initialization functions.
+  // Declare all of the target-initialization functions.
+#define LLVM_ASM_PRINTER(TargetName) void Initialize##TargetName##AsmPrinter();
+#include "llvm/Config/AsmPrinters.def"
+  
+  /// InitializeAllTargets - The main program should call this function if it
+  /// wants to link in all available targets that LLVM is configured to support.
+  inline void InitializeAllTargets() {
+#define LLVM_TARGET(TargetName) llvm::Initialize##TargetName##Target();
+#include "llvm/Config/Targets.def"
+  }
+  
+  /// InitializeAllAsmPrinters - The main program should call this function if
+  /// it wants all asm printers that LLVM is configured to support.  This will
+  /// cause them to be linked into its executable.
+  inline void InitializeAllAsmPrinters() {
+#define LLVM_ASM_PRINTER(TargetName) Initialize##TargetName##AsmPrinter();
+#include "llvm/Config/AsmPrinters.def"
+  }
+  
+  
+  /// InitializeNativeTarget - The main program should call this function to
+  /// initialize the native target corresponding to the host.  This is useful 
+  /// for JIT applications to ensure that the target gets linked in correctly.
+  inline bool InitializeNativeTarget() {
+  // If we have a native target, initialize it to ensure it is linked in.
+#ifdef LLVM_NATIVE_ARCH
+#define DoInit2(TARG, MOD)   llvm::Initialize ## TARG ## MOD()
+#define DoInit(T, M) DoInit2(T, M)
+    DoInit(LLVM_NATIVE_ARCH, Target);
+    return false;
+#undef DoInit
+#undef DoInit2
+#else
+    return true;
+#endif
+  }  
+}
+
+#endif
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h
index 2c3fdd4a7884..971baeef9100 100644
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -143,10 +143,10 @@ Pass *createLoopIndexSplitPass();
 // this pass is:
 //
 //        FROM CODE                           TO CODE
-//   %X = alloca int, uint 1                 ret int 42
-//   store int 42, int *%X
-//   %Y = load int* %X
-//   ret int %Y
+//   %X = alloca i32, i32 1                 ret i32 42
+//   store i32 42, i32 *%X
+//   %Y = load i32* %X
+//   ret i32 %Y
 //
 FunctionPass *createPromoteMemoryToRegisterPass();
 extern const PassInfo *const PromoteMemoryToRegisterID;
diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h
index 5ea1a500c59e..7ab8721af76c 100644
--- a/include/llvm/Transforms/Utils/Local.h
+++ b/include/llvm/Transforms/Utils/Local.h
@@ -31,6 +31,16 @@ struct DbgInfoIntrinsic;
 template<typename T> class SmallVectorImpl;
   
 //===----------------------------------------------------------------------===//
+//  Local analysis.
+//
+
+/// isSafeToLoadUnconditionally - Return true if we know that executing a load
+/// from this value cannot trap.  If it is not obviously safe to load from the
+/// specified pointer, we do a quick local scan of the basic block containing
+/// ScanFrom, to determine if the address is already accessed.
+bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom);
+
+//===----------------------------------------------------------------------===//
 //  Local constant propagation.
 //
 
diff --git a/include/llvm/Type.h b/include/llvm/Type.h
index c1732af09bdd..d439233d8c05 100644
--- a/include/llvm/Type.h
+++ b/include/llvm/Type.h
@@ -14,6 +14,7 @@
 #include "llvm/AbstractTypeUser.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/System/Atomic.h"
 #include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/iterator.h"
 #include <string>
@@ -102,7 +103,7 @@ private:
   /// has no AbstractTypeUsers, the type is deleted.  This is only sensical for
   /// derived types.
   ///
-  mutable unsigned RefCount;
+  mutable sys::cas_flag RefCount;
 
   const Type *getForwardedTypeInternal() const;
 
@@ -268,19 +269,16 @@ public:
   /// primitive type.
   ///
   unsigned getPrimitiveSizeInBits() const;
-  
+
+  /// getScalarSizeInBits - If this is a vector type, return the
+  /// getPrimitiveSizeInBits value for the element type. Otherwise return the
+  /// getPrimitiveSizeInBits value for this type.
+  unsigned getScalarSizeInBits() const;
+
   /// getFPMantissaWidth - Return the width of the mantissa of this type.  This
-  /// is only valid on scalar floating point types.  If the FP type does not
+  /// is only valid on floating point types.  If the FP type does not
   /// have a stable mantissa (e.g. ppc long double), this method returns -1.
-  int getFPMantissaWidth() const {
-    assert(isFloatingPoint() && "Not a floating point type!");
-    if (ID == FloatTyID) return 24;
-    if (ID == DoubleTyID) return 53;
-    if (ID == X86_FP80TyID) return 64;
-    if (ID == FP128TyID) return 113;
-    assert(ID == PPC_FP128TyID && "unknown fp type");
-    return -1;
-  }
+  int getFPMantissaWidth() const;
 
   /// getForwardedType - Return the type that this type has been resolved to if
   /// it has been resolved to anything.  This is used to implement the
@@ -296,6 +294,10 @@ public:
   /// function.
   const Type *getVAArgsPromotedType() const; 
 
+  /// getScalarType - If this is a vector type, return the element type,
+  /// otherwise return this.
+  const Type *getScalarType() const;
+
   //===--------------------------------------------------------------------===//
   // Type Iteration support
   //
@@ -336,7 +338,7 @@ public:
 
   void addRef() const {
     assert(isAbstract() && "Cannot add a reference to a non-abstract type!");
-    ++RefCount;
+    sys::AtomicIncrement(&RefCount);
   }
 
   void dropRef() const {
@@ -345,17 +347,15 @@ public:
 
     // If this is the last PATypeHolder using this object, and there are no
     // PATypeHandles using it, the type is dead, delete it now.
-    if (--RefCount == 0 && AbstractTypeUsers.empty())
+    sys::cas_flag OldCount = sys::AtomicDecrement(&RefCount);
+    if (OldCount == 0 && AbstractTypeUsers.empty())
       this->destroy();
   }
   
   /// addAbstractTypeUser - Notify an abstract type that there is a new user of
   /// it.  This function is called primarily by the PATypeHandle class.
   ///
-  void addAbstractTypeUser(AbstractTypeUser *U) const {
-    assert(isAbstract() && "addAbstractTypeUser: Current type not abstract!");
-    AbstractTypeUsers.push_back(U);
-  }
+  void addAbstractTypeUser(AbstractTypeUser *U) const;
   
   /// removeAbstractTypeUser - Notify an abstract type that a user of the class
   /// no longer has a handle to the type.  This function is called primarily by
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 261c635feb4a..5aa4d56c4e67 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -365,7 +365,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) {
       if (TD && CE->getOpcode() == Instruction::IntToPtr) {
         Constant *Input = CE->getOperand(0);
-        unsigned InWidth = Input->getType()->getPrimitiveSizeInBits();
+        unsigned InWidth = Input->getType()->getScalarSizeInBits();
         if (TD->getPointerSizeInBits() < InWidth) {
           Constant *Mask = 
             ConstantInt::get(APInt::getLowBitsSet(InWidth,
@@ -384,7 +384,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) {
       if (TD &&
           TD->getPointerSizeInBits() <=
-          CE->getType()->getPrimitiveSizeInBits()) {
+          CE->getType()->getScalarSizeInBits()) {
         if (CE->getOpcode() == Instruction::PtrToInt) {
           Constant *Input = CE->getOperand(0);
           Constant *C = FoldBitCast(Input, DestTy, *TD);
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index 6bdb64c975cc..adda5eeeb93a 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -352,7 +352,7 @@ Constant *DIFactory::GetStringConstant(const std::string &String) {
   
   const PointerType *DestTy = PointerType::getUnqual(Type::Int8Ty);
   
-  // If empty string then use a sbyte* null instead.
+  // If empty string then use a i8* null instead.
   if (String.empty())
     return Slot = ConstantPointerNull::get(DestTy);
 
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index 7af91304754d..6a53a83665c2 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -82,11 +82,8 @@ static bool containsAddRecFromDifferentLoop(SCEVHandle S, Loop *L) {
 /// outer loop of the current loop.
 static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, Loop *UseLoop,
                                   SCEVHandle &Start, SCEVHandle &Stride,
-                                  bool &isSigned,
                                   ScalarEvolution *SE, DominatorTree *DT) {
   SCEVHandle TheAddRec = Start;   // Initialize to zero.
-  bool isSExt = false;
-  bool isZExt = false;
 
   // If the outer level is an AddExpr, the operands are all start values except
   // for a nested AddRecExpr.
@@ -101,13 +98,6 @@ static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, Loop *UseLoop,
       } else {
         Start = SE->getAddExpr(Start, AE->getOperand(i));
       }
-
-  } else if (const SCEVZeroExtendExpr *Z = dyn_cast<SCEVZeroExtendExpr>(SH)) {
-    TheAddRec = Z->getOperand();
-    isZExt = true;
-  } else if (const SCEVSignExtendExpr *S = dyn_cast<SCEVSignExtendExpr>(SH)) {
-    TheAddRec = S->getOperand();
-    isSExt = true;
   } else if (isa<SCEVAddRecExpr>(SH)) {
     TheAddRec = SH;
   } else {
@@ -120,9 +110,8 @@ static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, Loop *UseLoop,
   // Use getSCEVAtScope to attempt to simplify other loops out of
   // the picture.
   SCEVHandle AddRecStart = AddRec->getStart();
-  SCEVHandle BetterAddRecStart = SE->getSCEVAtScope(AddRecStart, UseLoop);
-  if (!isa<SCEVCouldNotCompute>(BetterAddRecStart))
-    AddRecStart = BetterAddRecStart;
+  AddRecStart = SE->getSCEVAtScope(AddRecStart, UseLoop);
+  SCEVHandle AddRecStride = AddRec->getStepRecurrence(*SE);
 
   // FIXME: If Start contains an SCEVAddRecExpr from a different loop, other
   // than an outer loop of the current loop, reject it.  LSR has no concept of
@@ -131,24 +120,20 @@ static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, Loop *UseLoop,
   if (containsAddRecFromDifferentLoop(AddRecStart, L))
     return false;
 
-  if (isSExt || isZExt)
-    Start = SE->getTruncateExpr(Start, AddRec->getType());
-
   Start = SE->getAddExpr(Start, AddRecStart);
 
-  if (!isa<SCEVConstant>(AddRec->getStepRecurrence(*SE))) {
-    // If stride is an instruction, make sure it dominates the loop preheader.
-    // Otherwise we could end up with a use before def situation.
+  // If stride is an instruction, make sure it dominates the loop preheader.
+  // Otherwise we could end up with a use before def situation.
+  if (!isa<SCEVConstant>(AddRecStride)) {
     BasicBlock *Preheader = L->getLoopPreheader();
-    if (!AddRec->getStepRecurrence(*SE)->dominates(Preheader, DT))
+    if (!AddRecStride->dominates(Preheader, DT))
       return false;
 
     DOUT << "[" << L->getHeader()->getName()
          << "] Variable stride: " << *AddRec << "\n";
   }
 
-  Stride = AddRec->getStepRecurrence(*SE);
-  isSigned = isSExt;
+  Stride = AddRecStride;
   return true;
 }
 
@@ -218,9 +203,8 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
   Loop *UseLoop = LI->getLoopFor(I->getParent());
   SCEVHandle Start = SE->getIntegerSCEV(0, ISE->getType());
   SCEVHandle Stride = Start;
-  bool isSigned = false; // Arbitrary initial value - pacifies compiler.
 
-  if (!getSCEVStartAndStride(ISE, L, UseLoop, Start, Stride, isSigned, SE, DT))
+  if (!getSCEVStartAndStride(ISE, L, UseLoop, Start, Stride, SE, DT))
     return false;  // Non-reducible symbolic expression, bail out.
 
   SmallPtrSet<Instruction *, 4> UniqueUsers;
@@ -271,11 +255,11 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
         // The value used will be incremented by the stride more than we are
         // expecting, so subtract this off.
         SCEVHandle NewStart = SE->getMinusSCEV(Start, Stride);
-        StrideUses->addUser(NewStart, User, I, isSigned);
+        StrideUses->addUser(NewStart, User, I);
         StrideUses->Users.back().setIsUseOfPostIncrementedValue(true);
         DOUT << "   USING POSTINC SCEV, START=" << *NewStart<< "\n";
       } else {
-        StrideUses->addUser(Start, User, I, isSigned);
+        StrideUses->addUser(Start, User, I);
       }
     }
   }
@@ -312,7 +296,6 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
 /// getReplacementExpr - Return a SCEV expression which computes the
 /// value of the OperandValToReplace of the given IVStrideUse.
 SCEVHandle IVUsers::getReplacementExpr(const IVStrideUse &U) const {
-  const Type *UseTy = U.getOperandValToReplace()->getType();
   // Start with zero.
   SCEVHandle RetVal = SE->getIntegerSCEV(0, U.getParent()->Stride->getType());
   // Create the basic add recurrence.
@@ -326,17 +309,9 @@ SCEVHandle IVUsers::getReplacementExpr(const IVStrideUse &U) const {
   // Evaluate the expression out of the loop, if possible.
   if (!L->contains(U.getUser()->getParent())) {
     SCEVHandle ExitVal = SE->getSCEVAtScope(RetVal, L->getParentLoop());
-    if (!isa<SCEVCouldNotCompute>(ExitVal) && ExitVal->isLoopInvariant(L))
+    if (ExitVal->isLoopInvariant(L))
       RetVal = ExitVal;
   }
-  // Promote the result to the type of the use.
-  if (SE->getTypeSizeInBits(RetVal->getType()) !=
-      SE->getTypeSizeInBits(UseTy)) {
-    if (U.isSigned())
-      RetVal = SE->getSignExtendExpr(RetVal, UseTy);
-    else
-      RetVal = SE->getZeroExtendExpr(RetVal, UseTy);
-  }
   return RetVal;
 }
 
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 98ab6f484ea1..68aa595aa8dd 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -68,6 +68,7 @@
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Support/CommandLine.h"
@@ -132,7 +133,8 @@ bool SCEV::isOne() const {
   return false;
 }
 
-SCEVCouldNotCompute::SCEVCouldNotCompute() : SCEV(scCouldNotCompute) {}
+SCEVCouldNotCompute::SCEVCouldNotCompute(const ScalarEvolution* p) :
+  SCEV(scCouldNotCompute, p) {}
 SCEVCouldNotCompute::~SCEVCouldNotCompute() {}
 
 bool SCEVCouldNotCompute::isLoopInvariant(const Loop *L) const {
@@ -178,7 +180,7 @@ SCEVConstant::~SCEVConstant() {
 
 SCEVHandle ScalarEvolution::getConstant(ConstantInt *V) {
   SCEVConstant *&R = (*SCEVConstants)[V];
-  if (R == 0) R = new SCEVConstant(V);
+  if (R == 0) R = new SCEVConstant(V, this);
   return R;
 }
 
@@ -186,6 +188,11 @@ SCEVHandle ScalarEvolution::getConstant(const APInt& Val) {
   return getConstant(ConstantInt::get(Val));
 }
 
+SCEVHandle
+ScalarEvolution::getConstant(const Type *Ty, uint64_t V, bool isSigned) {
+  return getConstant(ConstantInt::get(cast<IntegerType>(Ty), V, isSigned));
+}
+
 const Type *SCEVConstant::getType() const { return V->getType(); }
 
 void SCEVConstant::print(raw_ostream &OS) const {
@@ -193,8 +200,9 @@ void SCEVConstant::print(raw_ostream &OS) const {
 }
 
 SCEVCastExpr::SCEVCastExpr(unsigned SCEVTy,
-                           const SCEVHandle &op, const Type *ty)
-  : SCEV(SCEVTy), Op(op), Ty(ty) {}
+                           const SCEVHandle &op, const Type *ty,
+                           const ScalarEvolution* p)
+  : SCEV(SCEVTy, p), Op(op), Ty(ty) {}
 
 SCEVCastExpr::~SCEVCastExpr() {}
 
@@ -208,8 +216,9 @@ bool SCEVCastExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
 static ManagedStatic<std::map<std::pair<const SCEV*, const Type*>, 
                      SCEVTruncateExpr*> > SCEVTruncates;
 
-SCEVTruncateExpr::SCEVTruncateExpr(const SCEVHandle &op, const Type *ty)
-  : SCEVCastExpr(scTruncate, op, ty) {
+SCEVTruncateExpr::SCEVTruncateExpr(const SCEVHandle &op, const Type *ty,
+                                   const ScalarEvolution* p)
+  : SCEVCastExpr(scTruncate, op, ty, p) {
   assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) &&
          (Ty->isInteger() || isa<PointerType>(Ty)) &&
          "Cannot truncate non-integer value!");
@@ -229,8 +238,9 @@ void SCEVTruncateExpr::print(raw_ostream &OS) const {
 static ManagedStatic<std::map<std::pair<const SCEV*, const Type*>,
                      SCEVZeroExtendExpr*> > SCEVZeroExtends;
 
-SCEVZeroExtendExpr::SCEVZeroExtendExpr(const SCEVHandle &op, const Type *ty)
-  : SCEVCastExpr(scZeroExtend, op, ty) {
+SCEVZeroExtendExpr::SCEVZeroExtendExpr(const SCEVHandle &op, const Type *ty,
+                                       const ScalarEvolution* p)
+  : SCEVCastExpr(scZeroExtend, op, ty, p) {
   assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) &&
          (Ty->isInteger() || isa<PointerType>(Ty)) &&
          "Cannot zero extend non-integer value!");
@@ -250,8 +260,9 @@ void SCEVZeroExtendExpr::print(raw_ostream &OS) const {
 static ManagedStatic<std::map<std::pair<const SCEV*, const Type*>,
                      SCEVSignExtendExpr*> > SCEVSignExtends;
 
-SCEVSignExtendExpr::SCEVSignExtendExpr(const SCEVHandle &op, const Type *ty)
-  : SCEVCastExpr(scSignExtend, op, ty) {
+SCEVSignExtendExpr::SCEVSignExtendExpr(const SCEVHandle &op, const Type *ty,
+                                       const ScalarEvolution* p)
+  : SCEVCastExpr(scSignExtend, op, ty, p) {
   assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) &&
          (Ty->isInteger() || isa<PointerType>(Ty)) &&
          "Cannot sign extend non-integer value!");
@@ -293,7 +304,7 @@ replaceSymbolicValuesWithConcrete(const SCEVHandle &Sym,
     SCEVHandle H =
       getOperand(i)->replaceSymbolicValuesWithConcrete(Sym, Conc, SE);
     if (H != getOperand(i)) {
-      std::vector<SCEVHandle> NewOps;
+      SmallVector<SCEVHandle, 8> NewOps;
       NewOps.reserve(getNumOperands());
       for (unsigned j = 0; j != i; ++j)
         NewOps.push_back(getOperand(j));
@@ -373,7 +384,7 @@ replaceSymbolicValuesWithConcrete(const SCEVHandle &Sym,
     SCEVHandle H =
       getOperand(i)->replaceSymbolicValuesWithConcrete(Sym, Conc, SE);
     if (H != getOperand(i)) {
-      std::vector<SCEVHandle> NewOps;
+      SmallVector<SCEVHandle, 8> NewOps;
       NewOps.reserve(getNumOperands());
       for (unsigned j = 0; j != i; ++j)
         NewOps.push_back(getOperand(j));
@@ -504,9 +515,18 @@ namespace {
         return false;
       }
 
-      // Constant sorting doesn't matter since they'll be folded.
-      if (isa<SCEVConstant>(LHS))
-        return false;
+      // Compare constant values.
+      if (const SCEVConstant *LC = dyn_cast<SCEVConstant>(LHS)) {
+        const SCEVConstant *RC = cast<SCEVConstant>(RHS);
+        return LC->getValue()->getValue().ult(RC->getValue()->getValue());
+      }
+
+      // Compare addrec loop depths.
+      if (const SCEVAddRecExpr *LA = dyn_cast<SCEVAddRecExpr>(LHS)) {
+        const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
+        if (LA->getLoop()->getLoopDepth() != RA->getLoop()->getLoopDepth())
+          return LA->getLoop()->getLoopDepth() < RA->getLoop()->getLoopDepth();
+      }
 
       // Lexicographically compare n-ary expressions.
       if (const SCEVNAryExpr *LC = dyn_cast<SCEVNAryExpr>(LHS)) {
@@ -558,7 +578,7 @@ namespace {
 /// this to depend on where the addresses of various SCEV objects happened to
 /// land in memory.
 ///
-static void GroupByComplexity(std::vector<SCEVHandle> &Ops,
+static void GroupByComplexity(SmallVectorImpl<SCEVHandle> &Ops,
                               LoopInfo *LI) {
   if (Ops.size() < 2) return;  // Noop
   if (Ops.size() == 2) {
@@ -763,17 +783,16 @@ SCEVHandle ScalarEvolution::getTruncateExpr(const SCEVHandle &Op,
   if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
     return getTruncateOrZeroExtend(SZ->getOperand(), Ty);
 
-  // If the input value is a chrec scev made out of constants, truncate
-  // all of the constants.
+  // If the input value is a chrec scev, truncate the chrec's operands.
   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
-    std::vector<SCEVHandle> Operands;
+    SmallVector<SCEVHandle, 4> Operands;
     for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
       Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty));
     return getAddRecExpr(Operands, AddRec->getLoop());
   }
 
   SCEVTruncateExpr *&Result = (*SCEVTruncates)[std::make_pair(Op, Ty)];
-  if (Result == 0) Result = new SCEVTruncateExpr(Op, Ty);
+  if (Result == 0) Result = new SCEVTruncateExpr(Op, Ty, this);
   return Result;
 }
 
@@ -861,7 +880,7 @@ SCEVHandle ScalarEvolution::getZeroExtendExpr(const SCEVHandle &Op,
     }
 
   SCEVZeroExtendExpr *&Result = (*SCEVZeroExtends)[std::make_pair(Op, Ty)];
-  if (Result == 0) Result = new SCEVZeroExtendExpr(Op, Ty);
+  if (Result == 0) Result = new SCEVZeroExtendExpr(Op, Ty, this);
   return Result;
 }
 
@@ -933,7 +952,7 @@ SCEVHandle ScalarEvolution::getSignExtendExpr(const SCEVHandle &Op,
     }
 
   SCEVSignExtendExpr *&Result = (*SCEVSignExtends)[std::make_pair(Op, Ty)];
-  if (Result == 0) Result = new SCEVSignExtendExpr(Op, Ty);
+  if (Result == 0) Result = new SCEVSignExtendExpr(Op, Ty, this);
   return Result;
 }
 
@@ -979,9 +998,105 @@ SCEVHandle ScalarEvolution::getAnyExtendExpr(const SCEVHandle &Op,
   return ZExt;
 }
 
+/// CollectAddOperandsWithScales - Process the given Ops list, which is
+/// a list of operands to be added under the given scale, update the given
+/// map. This is a helper function for getAddRecExpr. As an example of
+/// what it does, given a sequence of operands that would form an add
+/// expression like this:
+///
+///    m + n + 13 + (A * (o + p + (B * q + m + 29))) + r + (-1 * r)
+///
+/// where A and B are constants, update the map with these values:
+///
+///    (m, 1+A*B), (n, 1), (o, A), (p, A), (q, A*B), (r, 0)
+///
+/// and add 13 + A*B*29 to AccumulatedConstant.
+/// This will allow getAddRecExpr to produce this:
+///
+///    13+A*B*29 + n + (m * (1+A*B)) + ((o + p) * A) + (q * A*B)
+///
+/// This form often exposes folding opportunities that are hidden in
+/// the original operand list.
+///
+/// Return true iff it appears that any interesting folding opportunities
+/// may be exposed. This helps getAddRecExpr short-circuit extra work in
+/// the common case where no interesting opportunities are present, and
+/// is also used as a check to avoid infinite recursion.
+///
+static bool
+CollectAddOperandsWithScales(DenseMap<SCEVHandle, APInt> &M,
+                             SmallVector<SCEVHandle, 8> &NewOps,
+                             APInt &AccumulatedConstant,
+                             const SmallVectorImpl<SCEVHandle> &Ops,
+                             const APInt &Scale,
+                             ScalarEvolution &SE) {
+  bool Interesting = false;
+
+  // Iterate over the add operands.
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+    const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]);
+    if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) {
+      APInt NewScale =
+        Scale * cast<SCEVConstant>(Mul->getOperand(0))->getValue()->getValue();
+      if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) {
+        // A multiplication of a constant with another add; recurse.
+        Interesting |=
+          CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
+                                       cast<SCEVAddExpr>(Mul->getOperand(1))
+                                         ->getOperands(),
+                                       NewScale, SE);
+      } else {
+        // A multiplication of a constant with some other value. Update
+        // the map.
+        SmallVector<SCEVHandle, 4> MulOps(Mul->op_begin()+1, Mul->op_end());
+        SCEVHandle Key = SE.getMulExpr(MulOps);
+        std::pair<DenseMap<SCEVHandle, APInt>::iterator, bool> Pair =
+          M.insert(std::make_pair(Key, APInt()));
+        if (Pair.second) {
+          Pair.first->second = NewScale;
+          NewOps.push_back(Pair.first->first);
+        } else {
+          Pair.first->second += NewScale;
+          // The map already had an entry for this value, which may indicate
+          // a folding opportunity.
+          Interesting = true;
+        }
+      }
+    } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
+      // Pull a buried constant out to the outside.
+      if (Scale != 1 || AccumulatedConstant != 0 || C->isZero())
+        Interesting = true;
+      AccumulatedConstant += Scale * C->getValue()->getValue();
+    } else {
+      // An ordinary operand. Update the map.
+      std::pair<DenseMap<SCEVHandle, APInt>::iterator, bool> Pair =
+        M.insert(std::make_pair(Ops[i], APInt()));
+      if (Pair.second) {
+        Pair.first->second = Scale;
+        NewOps.push_back(Pair.first->first);
+      } else {
+        Pair.first->second += Scale;
+        // The map already had an entry for this value, which may indicate
+        // a folding opportunity.
+        Interesting = true;
+      }
+    }
+  }
+
+  return Interesting;
+}
+
+namespace {
+  struct APIntCompare {
+    bool operator()(const APInt &LHS, const APInt &RHS) const {
+      return LHS.ult(RHS);
+    }
+  };
+}
+
 /// getAddExpr - Get a canonical add expression, or something simpler if
 /// possible.
-SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
+SCEVHandle ScalarEvolution::getAddExpr(SmallVectorImpl<SCEVHandle> &Ops) {
   assert(!Ops.empty() && "Cannot get empty add!");
   if (Ops.size() == 1) return Ops[0];
 #ifndef NDEBUG
@@ -1001,11 +1116,10 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
     assert(Idx < Ops.size());
     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
       // We found two constants, fold them together!
-      ConstantInt *Fold = ConstantInt::get(LHSC->getValue()->getValue() + 
-                                           RHSC->getValue()->getValue());
-      Ops[0] = getConstant(Fold);
+      Ops[0] = getConstant(LHSC->getValue()->getValue() +
+                           RHSC->getValue()->getValue());
+      if (Ops.size() == 2) return Ops[0];
       Ops.erase(Ops.begin()+1);  // Erase the folded element
-      if (Ops.size() == 1) return Ops[0];
       LHSC = cast<SCEVConstant>(Ops[0]);
     }
 
@@ -1043,7 +1157,7 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
     const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]);
     const Type *DstType = Trunc->getType();
     const Type *SrcType = Trunc->getOperand()->getType();
-    std::vector<SCEVHandle> LargeOps;
+    SmallVector<SCEVHandle, 8> LargeOps;
     bool Ok = true;
     // Check all the operands to see if they can be represented in the
     // source type of the truncate.
@@ -1059,7 +1173,7 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
         // is much more likely to be foldable here.
         LargeOps.push_back(getSignExtendExpr(C, SrcType));
       } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) {
-        std::vector<SCEVHandle> LargeMulOps;
+        SmallVector<SCEVHandle, 8> LargeMulOps;
         for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) {
           if (const SCEVTruncateExpr *T =
                 dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) {
@@ -1120,6 +1234,38 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
     ++Idx;
 
+  // Check to see if there are any folding opportunities present with
+  // operands multiplied by constant values.
+  if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) {
+    uint64_t BitWidth = getTypeSizeInBits(Ty);
+    DenseMap<SCEVHandle, APInt> M;
+    SmallVector<SCEVHandle, 8> NewOps;
+    APInt AccumulatedConstant(BitWidth, 0);
+    if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
+                                     Ops, APInt(BitWidth, 1), *this)) {
+      // Some interesting folding opportunity is present, so its worthwhile to
+      // re-generate the operands list. Group the operands by constant scale,
+      // to avoid multiplying by the same constant scale multiple times.
+      std::map<APInt, SmallVector<SCEVHandle, 4>, APIntCompare> MulOpLists;
+      for (SmallVector<SCEVHandle, 8>::iterator I = NewOps.begin(),
+           E = NewOps.end(); I != E; ++I)
+        MulOpLists[M.find(*I)->second].push_back(*I);
+      // Re-generate the operands list.
+      Ops.clear();
+      if (AccumulatedConstant != 0)
+        Ops.push_back(getConstant(AccumulatedConstant));
+      for (std::map<APInt, SmallVector<SCEVHandle, 4>, APIntCompare>::iterator I =
+           MulOpLists.begin(), E = MulOpLists.end(); I != E; ++I)
+        if (I->first != 0)
+          Ops.push_back(getMulExpr(getConstant(I->first), getAddExpr(I->second)));
+      if (Ops.empty())
+        return getIntegerSCEV(0, Ty);
+      if (Ops.size() == 1)
+        return Ops[0];
+      return getAddExpr(Ops);
+    }
+  }
+
   // If we are adding something to a multiply expression, make sure the
   // something is not already an operand of the multiply.  If so, merge it into
   // the multiply.
@@ -1128,13 +1274,13 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
     for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) {
       const SCEV *MulOpSCEV = Mul->getOperand(MulOp);
       for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp)
-        if (MulOpSCEV == Ops[AddOp] && !isa<SCEVConstant>(MulOpSCEV)) {
+        if (MulOpSCEV == Ops[AddOp] && !isa<SCEVConstant>(Ops[AddOp])) {
           // Fold W + X + (X * Y * Z)  -->  W + (X * ((Y*Z)+1))
           SCEVHandle InnerMul = Mul->getOperand(MulOp == 0);
           if (Mul->getNumOperands() != 2) {
             // If the multiply has more than two operands, we must get the
             // Y*Z term.
-            std::vector<SCEVHandle> MulOps(Mul->op_begin(), Mul->op_end());
+            SmallVector<SCEVHandle, 4> MulOps(Mul->op_begin(), Mul->op_end());
             MulOps.erase(MulOps.begin()+MulOp);
             InnerMul = getMulExpr(MulOps);
           }
@@ -1166,13 +1312,13 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
             // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E))
             SCEVHandle InnerMul1 = Mul->getOperand(MulOp == 0);
             if (Mul->getNumOperands() != 2) {
-              std::vector<SCEVHandle> MulOps(Mul->op_begin(), Mul->op_end());
+              SmallVector<SCEVHandle, 4> MulOps(Mul->op_begin(), Mul->op_end());
               MulOps.erase(MulOps.begin()+MulOp);
               InnerMul1 = getMulExpr(MulOps);
             }
             SCEVHandle InnerMul2 = OtherMul->getOperand(OMulOp == 0);
             if (OtherMul->getNumOperands() != 2) {
-              std::vector<SCEVHandle> MulOps(OtherMul->op_begin(),
+              SmallVector<SCEVHandle, 4> MulOps(OtherMul->op_begin(),
                                              OtherMul->op_end());
               MulOps.erase(MulOps.begin()+OMulOp);
               InnerMul2 = getMulExpr(MulOps);
@@ -1199,7 +1345,7 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
   for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
     // Scan all of the other operands to this add and add them to the vector if
     // they are loop invariant w.r.t. the recurrence.
-    std::vector<SCEVHandle> LIOps;
+    SmallVector<SCEVHandle, 8> LIOps;
     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
       if (Ops[i]->isLoopInvariant(AddRec->getLoop())) {
@@ -1213,7 +1359,8 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
       //  NLI + LI + {Start,+,Step}  -->  NLI + {LI+Start,+,Step}
       LIOps.push_back(AddRec->getStart());
 
-      std::vector<SCEVHandle> AddRecOps(AddRec->op_begin(), AddRec->op_end());
+      SmallVector<SCEVHandle, 4> AddRecOps(AddRec->op_begin(),
+                                           AddRec->op_end());
       AddRecOps[0] = getAddExpr(LIOps);
 
       SCEVHandle NewRec = getAddRecExpr(AddRecOps, AddRec->getLoop());
@@ -1238,7 +1385,7 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
         const SCEVAddRecExpr *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]);
         if (AddRec->getLoop() == OtherAddRec->getLoop()) {
           // Other + {A,+,B} + {C,+,D}  -->  Other + {A+C,+,B+D}
-          std::vector<SCEVHandle> NewOps(AddRec->op_begin(), AddRec->op_end());
+          SmallVector<SCEVHandle, 4> NewOps(AddRec->op_begin(), AddRec->op_end());
           for (unsigned i = 0, e = OtherAddRec->getNumOperands(); i != e; ++i) {
             if (i >= NewOps.size()) {
               NewOps.insert(NewOps.end(), OtherAddRec->op_begin()+i,
@@ -1267,14 +1414,14 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
   std::vector<const SCEV*> SCEVOps(Ops.begin(), Ops.end());
   SCEVCommutativeExpr *&Result = (*SCEVCommExprs)[std::make_pair(scAddExpr,
                                                                  SCEVOps)];
-  if (Result == 0) Result = new SCEVAddExpr(Ops);
+  if (Result == 0) Result = new SCEVAddExpr(Ops, this);
   return Result;
 }
 
 
 /// getMulExpr - Get a canonical multiply expression, or something simpler if
 /// possible.
-SCEVHandle ScalarEvolution::getMulExpr(std::vector<SCEVHandle> &Ops) {
+SCEVHandle ScalarEvolution::getMulExpr(SmallVectorImpl<SCEVHandle> &Ops) {
   assert(!Ops.empty() && "Cannot get empty mul!");
 #ifndef NDEBUG
   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
@@ -1355,7 +1502,7 @@ SCEVHandle ScalarEvolution::getMulExpr(std::vector<SCEVHandle> &Ops) {
   for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
     // Scan all of the other operands to this mul and add them to the vector if
     // they are loop invariant w.r.t. the recurrence.
-    std::vector<SCEVHandle> LIOps;
+    SmallVector<SCEVHandle, 8> LIOps;
     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
       if (Ops[i]->isLoopInvariant(AddRec->getLoop())) {
@@ -1367,7 +1514,7 @@ SCEVHandle ScalarEvolution::getMulExpr(std::vector<SCEVHandle> &Ops) {
     // If we found some loop invariants, fold them into the recurrence.
     if (!LIOps.empty()) {
       //  NLI * LI * {Start,+,Step}  -->  NLI * {LI*Start,+,LI*Step}
-      std::vector<SCEVHandle> NewOps;
+      SmallVector<SCEVHandle, 4> NewOps;
       NewOps.reserve(AddRec->getNumOperands());
       if (LIOps.size() == 1) {
         const SCEV *Scale = LIOps[0];
@@ -1375,7 +1522,7 @@ SCEVHandle ScalarEvolution::getMulExpr(std::vector<SCEVHandle> &Ops) {
           NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i)));
       } else {
         for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
-          std::vector<SCEVHandle> MulOps(LIOps);
+          SmallVector<SCEVHandle, 4> MulOps(LIOps.begin(), LIOps.end());
           MulOps.push_back(AddRec->getOperand(i));
           NewOps.push_back(getMulExpr(MulOps));
         }
@@ -1433,7 +1580,7 @@ SCEVHandle ScalarEvolution::getMulExpr(std::vector<SCEVHandle> &Ops) {
   SCEVCommutativeExpr *&Result = (*SCEVCommExprs)[std::make_pair(scMulExpr,
                                                                  SCEVOps)];
   if (Result == 0)
-    Result = new SCEVMulExpr(Ops);
+    Result = new SCEVMulExpr(Ops, this);
   return Result;
 }
 
@@ -1473,14 +1620,14 @@ SCEVHandle ScalarEvolution::getUDivExpr(const SCEVHandle &LHS,
             getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
                           getZeroExtendExpr(Step, ExtTy),
                           AR->getLoop())) {
-          std::vector<SCEVHandle> Operands;
+          SmallVector<SCEVHandle, 4> Operands;
           for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i)
             Operands.push_back(getUDivExpr(AR->getOperand(i), RHS));
           return getAddRecExpr(Operands, AR->getLoop());
         }
     // (A*B)/C --> A*(B/C) if safe and B/C can be folded.
     if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
-      std::vector<SCEVHandle> Operands;
+      SmallVector<SCEVHandle, 4> Operands;
       for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i)
         Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy));
       if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands))
@@ -1489,7 +1636,9 @@ SCEVHandle ScalarEvolution::getUDivExpr(const SCEVHandle &LHS,
           SCEVHandle Op = M->getOperand(i);
           SCEVHandle Div = getUDivExpr(Op, RHSC);
           if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) {
-            Operands = M->getOperands();
+            const SmallVectorImpl<SCEVHandle> &MOperands = M->getOperands();
+            Operands = SmallVector<SCEVHandle, 4>(MOperands.begin(),
+                                                  MOperands.end());
             Operands[i] = Div;
             return getMulExpr(Operands);
           }
@@ -1497,7 +1646,7 @@ SCEVHandle ScalarEvolution::getUDivExpr(const SCEVHandle &LHS,
     }
     // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
     if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(LHS)) {
-      std::vector<SCEVHandle> Operands;
+      SmallVector<SCEVHandle, 4> Operands;
       for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i)
         Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy));
       if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) {
@@ -1522,7 +1671,7 @@ SCEVHandle ScalarEvolution::getUDivExpr(const SCEVHandle &LHS,
   }
 
   SCEVUDivExpr *&Result = (*SCEVUDivs)[std::make_pair(LHS, RHS)];
-  if (Result == 0) Result = new SCEVUDivExpr(LHS, RHS);
+  if (Result == 0) Result = new SCEVUDivExpr(LHS, RHS, this);
   return Result;
 }
 
@@ -1531,7 +1680,7 @@ SCEVHandle ScalarEvolution::getUDivExpr(const SCEVHandle &LHS,
 /// Simplify the expression as much as possible.
 SCEVHandle ScalarEvolution::getAddRecExpr(const SCEVHandle &Start,
                                const SCEVHandle &Step, const Loop *L) {
-  std::vector<SCEVHandle> Operands;
+  SmallVector<SCEVHandle, 4> Operands;
   Operands.push_back(Start);
   if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step))
     if (StepChrec->getLoop() == L) {
@@ -1546,7 +1695,7 @@ SCEVHandle ScalarEvolution::getAddRecExpr(const SCEVHandle &Start,
 
 /// getAddRecExpr - Get an add recurrence expression for the specified loop.
 /// Simplify the expression as much as possible.
-SCEVHandle ScalarEvolution::getAddRecExpr(std::vector<SCEVHandle> &Operands,
+SCEVHandle ScalarEvolution::getAddRecExpr(SmallVectorImpl<SCEVHandle> &Operands,
                                           const Loop *L) {
   if (Operands.size() == 1) return Operands[0];
 #ifndef NDEBUG
@@ -1565,8 +1714,8 @@ SCEVHandle ScalarEvolution::getAddRecExpr(std::vector<SCEVHandle> &Operands,
   if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
     const Loop* NestedLoop = NestedAR->getLoop();
     if (L->getLoopDepth() < NestedLoop->getLoopDepth()) {
-      std::vector<SCEVHandle> NestedOperands(NestedAR->op_begin(),
-                                             NestedAR->op_end());
+      SmallVector<SCEVHandle, 4> NestedOperands(NestedAR->op_begin(),
+                                                NestedAR->op_end());
       SCEVHandle NestedARHandle(NestedAR);
       Operands[0] = NestedAR->getStart();
       NestedOperands[0] = getAddRecExpr(Operands, L);
@@ -1576,19 +1725,20 @@ SCEVHandle ScalarEvolution::getAddRecExpr(std::vector<SCEVHandle> &Operands,
 
   std::vector<const SCEV*> SCEVOps(Operands.begin(), Operands.end());
   SCEVAddRecExpr *&Result = (*SCEVAddRecExprs)[std::make_pair(L, SCEVOps)];
-  if (Result == 0) Result = new SCEVAddRecExpr(Operands, L);
+  if (Result == 0) Result = new SCEVAddRecExpr(Operands, L, this);
   return Result;
 }
 
 SCEVHandle ScalarEvolution::getSMaxExpr(const SCEVHandle &LHS,
                                         const SCEVHandle &RHS) {
-  std::vector<SCEVHandle> Ops;
+  SmallVector<SCEVHandle, 2> Ops;
   Ops.push_back(LHS);
   Ops.push_back(RHS);
   return getSMaxExpr(Ops);
 }
 
-SCEVHandle ScalarEvolution::getSMaxExpr(std::vector<SCEVHandle> Ops) {
+SCEVHandle
+ScalarEvolution::getSMaxExpr(SmallVectorImpl<SCEVHandle> &Ops) {
   assert(!Ops.empty() && "Cannot get empty smax!");
   if (Ops.size() == 1) return Ops[0];
 #ifndef NDEBUG
@@ -1662,19 +1812,20 @@ SCEVHandle ScalarEvolution::getSMaxExpr(std::vector<SCEVHandle> Ops) {
   std::vector<const SCEV*> SCEVOps(Ops.begin(), Ops.end());
   SCEVCommutativeExpr *&Result = (*SCEVCommExprs)[std::make_pair(scSMaxExpr,
                                                                  SCEVOps)];
-  if (Result == 0) Result = new SCEVSMaxExpr(Ops);
+  if (Result == 0) Result = new SCEVSMaxExpr(Ops, this);
   return Result;
 }
 
 SCEVHandle ScalarEvolution::getUMaxExpr(const SCEVHandle &LHS,
                                         const SCEVHandle &RHS) {
-  std::vector<SCEVHandle> Ops;
+  SmallVector<SCEVHandle, 2> Ops;
   Ops.push_back(LHS);
   Ops.push_back(RHS);
   return getUMaxExpr(Ops);
 }
 
-SCEVHandle ScalarEvolution::getUMaxExpr(std::vector<SCEVHandle> Ops) {
+SCEVHandle
+ScalarEvolution::getUMaxExpr(SmallVectorImpl<SCEVHandle> &Ops) {
   assert(!Ops.empty() && "Cannot get empty umax!");
   if (Ops.size() == 1) return Ops[0];
 #ifndef NDEBUG
@@ -1748,17 +1899,29 @@ SCEVHandle ScalarEvolution::getUMaxExpr(std::vector<SCEVHandle> Ops) {
   std::vector<const SCEV*> SCEVOps(Ops.begin(), Ops.end());
   SCEVCommutativeExpr *&Result = (*SCEVCommExprs)[std::make_pair(scUMaxExpr,
                                                                  SCEVOps)];
-  if (Result == 0) Result = new SCEVUMaxExpr(Ops);
+  if (Result == 0) Result = new SCEVUMaxExpr(Ops, this);
   return Result;
 }
 
+SCEVHandle ScalarEvolution::getSMinExpr(const SCEVHandle &LHS,
+                                        const SCEVHandle &RHS) {
+  // ~smax(~x, ~y) == smin(x, y).
+  return getNotSCEV(getSMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
+}
+
+SCEVHandle ScalarEvolution::getUMinExpr(const SCEVHandle &LHS,
+                                        const SCEVHandle &RHS) {
+  // ~umax(~x, ~y) == umin(x, y)
+  return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
+}
+
 SCEVHandle ScalarEvolution::getUnknown(Value *V) {
   if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
     return getConstant(CI);
   if (isa<ConstantPointerNull>(V))
     return getIntegerSCEV(0, V->getType());
   SCEVUnknown *&Result = (*SCEVUnknowns)[V];
-  if (Result == 0) Result = new SCEVUnknown(V);
+  if (Result == 0) Result = new SCEVUnknown(V, this);
   return Result;
 }
 
@@ -1977,6 +2140,22 @@ ScalarEvolution::getTruncateOrNoop(const SCEVHandle &V, const Type *Ty) {
   return getTruncateExpr(V, Ty);
 }
 
+/// getUMaxFromMismatchedTypes - Promote the operands to the wider of
+/// the types using zero-extension, and then perform a umax operation
+/// with them.
+SCEVHandle ScalarEvolution::getUMaxFromMismatchedTypes(const SCEVHandle &LHS,
+                                                       const SCEVHandle &RHS) {
+  SCEVHandle PromotedLHS = LHS;
+  SCEVHandle PromotedRHS = RHS;
+
+  if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
+    PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
+  else
+    PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
+
+  return getUMaxExpr(PromotedLHS, PromotedRHS);
+}
+
 /// ReplaceSymbolicValueWithConcrete - This looks up the computed SCEV value for
 /// the specified instruction and replaces any references to the symbolic value
 /// SymName with the specified value.  This is used during PHI resolution.
@@ -2040,7 +2219,7 @@ SCEVHandle ScalarEvolution::createNodeForPHI(PHINode *PN) {
 
           if (FoundIndex != Add->getNumOperands()) {
             // Create an add with everything but the specified operand.
-            std::vector<SCEVHandle> Ops;
+            SmallVector<SCEVHandle, 8> Ops;
             for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
               if (i != FoundIndex)
                 Ops.push_back(Add->getOperand(i));
@@ -2143,73 +2322,134 @@ SCEVHandle ScalarEvolution::createNodeForGEP(User *GEP) {
 /// guaranteed to end in (at every loop iteration).  It is, at the same time,
 /// the minimum number of times S is divisible by 2.  For example, given {4,+,8}
 /// it returns 2.  If S is guaranteed to be 0, it returns the bitwidth of S.
-static uint32_t GetMinTrailingZeros(SCEVHandle S, const ScalarEvolution &SE) {
+uint32_t
+ScalarEvolution::GetMinTrailingZeros(const SCEVHandle &S) {
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
     return C->getValue()->getValue().countTrailingZeros();
 
   if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S))
-    return std::min(GetMinTrailingZeros(T->getOperand(), SE),
-                    (uint32_t)SE.getTypeSizeInBits(T->getType()));
+    return std::min(GetMinTrailingZeros(T->getOperand()),
+                    (uint32_t)getTypeSizeInBits(T->getType()));
 
   if (const SCEVZeroExtendExpr *E = dyn_cast<SCEVZeroExtendExpr>(S)) {
-    uint32_t OpRes = GetMinTrailingZeros(E->getOperand(), SE);
-    return OpRes == SE.getTypeSizeInBits(E->getOperand()->getType()) ?
-             SE.getTypeSizeInBits(E->getType()) : OpRes;
+    uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
+    return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
+             getTypeSizeInBits(E->getType()) : OpRes;
   }
 
   if (const SCEVSignExtendExpr *E = dyn_cast<SCEVSignExtendExpr>(S)) {
-    uint32_t OpRes = GetMinTrailingZeros(E->getOperand(), SE);
-    return OpRes == SE.getTypeSizeInBits(E->getOperand()->getType()) ?
-             SE.getTypeSizeInBits(E->getType()) : OpRes;
+    uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
+    return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
+             getTypeSizeInBits(E->getType()) : OpRes;
   }
 
   if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
     // The result is the min of all operands results.
-    uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0), SE);
+    uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
     for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
-      MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i), SE));
+      MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
     return MinOpRes;
   }
 
   if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
     // The result is the sum of all operands results.
-    uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0), SE);
-    uint32_t BitWidth = SE.getTypeSizeInBits(M->getType());
+    uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0));
+    uint32_t BitWidth = getTypeSizeInBits(M->getType());
     for (unsigned i = 1, e = M->getNumOperands();
          SumOpRes != BitWidth && i != e; ++i)
-      SumOpRes = std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i), SE),
+      SumOpRes = std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)),
                           BitWidth);
     return SumOpRes;
   }
 
   if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
     // The result is the min of all operands results.
-    uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0), SE);
+    uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
     for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
-      MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i), SE));
+      MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
     return MinOpRes;
   }
 
   if (const SCEVSMaxExpr *M = dyn_cast<SCEVSMaxExpr>(S)) {
     // The result is the min of all operands results.
-    uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0), SE);
+    uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
     for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
-      MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i), SE));
+      MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
     return MinOpRes;
   }
 
   if (const SCEVUMaxExpr *M = dyn_cast<SCEVUMaxExpr>(S)) {
     // The result is the min of all operands results.
-    uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0), SE);
+    uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
     for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
-      MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i), SE));
+      MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
     return MinOpRes;
   }
 
-  // SCEVUDivExpr, SCEVUnknown
+  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+    // For a SCEVUnknown, ask ValueTracking.
+    unsigned BitWidth = getTypeSizeInBits(U->getType());
+    APInt Mask = APInt::getAllOnesValue(BitWidth);
+    APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
+    ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones);
+    return Zeros.countTrailingOnes();
+  }
+
+  // SCEVUDivExpr
   return 0;
 }
 
+uint32_t
+ScalarEvolution::GetMinLeadingZeros(const SCEVHandle &S) {
+  // TODO: Handle other SCEV expression types here.
+
+  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
+    return C->getValue()->getValue().countLeadingZeros();
+
+  if (const SCEVZeroExtendExpr *C = dyn_cast<SCEVZeroExtendExpr>(S)) {
+    // A zero-extension cast adds zero bits.
+    return GetMinLeadingZeros(C->getOperand()) +
+           (getTypeSizeInBits(C->getType()) -
+            getTypeSizeInBits(C->getOperand()->getType()));
+  }
+
+  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+    // For a SCEVUnknown, ask ValueTracking.
+    unsigned BitWidth = getTypeSizeInBits(U->getType());
+    APInt Mask = APInt::getAllOnesValue(BitWidth);
+    APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
+    ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD);
+    return Zeros.countLeadingOnes();
+  }
+
+  return 1;
+}
+
+uint32_t
+ScalarEvolution::GetMinSignBits(const SCEVHandle &S) {
+  // TODO: Handle other SCEV expression types here.
+
+  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
+    const APInt &A = C->getValue()->getValue();
+    return A.isNegative() ? A.countLeadingOnes() :
+                            A.countLeadingZeros();
+  }
+
+  if (const SCEVSignExtendExpr *C = dyn_cast<SCEVSignExtendExpr>(S)) {
+    // A sign-extension cast adds sign bits.
+    return GetMinSignBits(C->getOperand()) +
+           (getTypeSizeInBits(C->getType()) -
+            getTypeSizeInBits(C->getOperand()->getType()));
+  }
+
+  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+    // For a SCEVUnknown, ask ValueTracking.
+    return ComputeNumSignBits(U->getValue(), TD);
+  }
+
+  return 1;
+}
+
 /// createSCEV - We know that there is no SCEV for the specified value.
 /// Analyze the expression.
 ///
@@ -2248,14 +2488,27 @@ SCEVHandle ScalarEvolution::createSCEV(Value *V) {
       if (CI->isAllOnesValue())
         return getSCEV(U->getOperand(0));
       const APInt &A = CI->getValue();
-      unsigned Ones = A.countTrailingOnes();
-      if (APIntOps::isMask(Ones, A))
+
+      // Instcombine's ShrinkDemandedConstant may strip bits out of
+      // constants, obscuring what would otherwise be a low-bits mask.
+      // Use ComputeMaskedBits to compute what ShrinkDemandedConstant
+      // knew about to reconstruct a low-bits mask value.
+      unsigned LZ = A.countLeadingZeros();
+      unsigned BitWidth = A.getBitWidth();
+      APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+      APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+      ComputeMaskedBits(U->getOperand(0), AllOnes, KnownZero, KnownOne, TD);
+
+      APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ);
+
+      if (LZ != 0 && !((~A & ~KnownZero) & EffectiveMask))
         return
           getZeroExtendExpr(getTruncateExpr(getSCEV(U->getOperand(0)),
-                                            IntegerType::get(Ones)),
+                                            IntegerType::get(BitWidth - LZ)),
                             U->getType());
     }
     break;
+
   case Instruction::Or:
     // If the RHS of the Or is a constant, we may have something like:
     // X*4+1 which got turned into X*4|1.  Handle this as an Add so loop
@@ -2266,7 +2519,7 @@ SCEVHandle ScalarEvolution::createSCEV(Value *V) {
     if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
       SCEVHandle LHS = getSCEV(U->getOperand(0));
       const APInt &CIVal = CI->getValue();
-      if (GetMinTrailingZeros(LHS, *this) >=
+      if (GetMinTrailingZeros(LHS) >=
           (CIVal.getBitWidth() - CIVal.countLeadingZeros()))
         return getAddExpr(LHS, getSCEV(U->getOperand(1)));
     }
@@ -2292,9 +2545,27 @@ SCEVHandle ScalarEvolution::createSCEV(Value *V) {
           if (BO->getOpcode() == Instruction::And &&
               LCI->getValue() == CI->getValue())
             if (const SCEVZeroExtendExpr *Z =
-                  dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0))))
-              return getZeroExtendExpr(getNotSCEV(Z->getOperand()),
-                                       U->getType());
+                  dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0)))) {
+              const Type *UTy = U->getType();
+              SCEVHandle Z0 = Z->getOperand();
+              const Type *Z0Ty = Z0->getType();
+              unsigned Z0TySize = getTypeSizeInBits(Z0Ty);
+
+              // If C is a low-bits mask, the zero extend is zerving to
+              // mask off the high bits. Complement the operand and
+              // re-apply the zext.
+              if (APIntOps::isMask(Z0TySize, CI->getValue()))
+                return getZeroExtendExpr(getNotSCEV(Z0), UTy);
+
+              // If C is a single bit, it may be in the sign-bit position
+              // before the zero-extend. In this case, represent the xor
+              // using an add, which is equivalent, and re-apply the zext.
+              APInt Trunc = APInt(CI->getValue()).trunc(Z0TySize);
+              if (APInt(Trunc).zext(getTypeSizeInBits(UTy)) == CI->getValue() &&
+                  Trunc.isSignBit())
+                return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)),
+                                         UTy);
+            }
     }
     break;
 
@@ -2385,10 +2656,7 @@ SCEVHandle ScalarEvolution::createSCEV(Value *V) {
         if (LHS == U->getOperand(1) && RHS == U->getOperand(2))
           return getSMaxExpr(getSCEV(LHS), getSCEV(RHS));
         else if (LHS == U->getOperand(2) && RHS == U->getOperand(1))
-          // ~smax(~x, ~y) == smin(x, y).
-          return getNotSCEV(getSMaxExpr(
-                                   getNotSCEV(getSCEV(LHS)),
-                                   getNotSCEV(getSCEV(RHS))));
+          return getSMinExpr(getSCEV(LHS), getSCEV(RHS));
         break;
       case ICmpInst::ICMP_ULT:
       case ICmpInst::ICMP_ULE:
@@ -2399,9 +2667,25 @@ SCEVHandle ScalarEvolution::createSCEV(Value *V) {
         if (LHS == U->getOperand(1) && RHS == U->getOperand(2))
           return getUMaxExpr(getSCEV(LHS), getSCEV(RHS));
         else if (LHS == U->getOperand(2) && RHS == U->getOperand(1))
-          // ~umax(~x, ~y) == umin(x, y)
-          return getNotSCEV(getUMaxExpr(getNotSCEV(getSCEV(LHS)),
-                                        getNotSCEV(getSCEV(RHS))));
+          return getUMinExpr(getSCEV(LHS), getSCEV(RHS));
+        break;
+      case ICmpInst::ICMP_NE:
+        // n != 0 ? n : 1  ->  umax(n, 1)
+        if (LHS == U->getOperand(1) &&
+            isa<ConstantInt>(U->getOperand(2)) &&
+            cast<ConstantInt>(U->getOperand(2))->isOne() &&
+            isa<ConstantInt>(RHS) &&
+            cast<ConstantInt>(RHS)->isZero())
+          return getUMaxExpr(getSCEV(LHS), getSCEV(U->getOperand(2)));
+        break;
+      case ICmpInst::ICMP_EQ:
+        // n == 0 ? 1 : n  ->  umax(n, 1)
+        if (LHS == U->getOperand(2) &&
+            isa<ConstantInt>(U->getOperand(1)) &&
+            cast<ConstantInt>(U->getOperand(1))->isOne() &&
+            isa<ConstantInt>(RHS) &&
+            cast<ConstantInt>(RHS)->isZero())
+          return getUMaxExpr(getSCEV(LHS), getSCEV(U->getOperand(1)));
         break;
       default:
         break;
@@ -2462,9 +2746,13 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
 
       // Update the value in the map.
       Pair.first->second = ItCount;
-    } else if (isa<PHINode>(L->getHeader()->begin())) {
-      // Only count loops that have phi nodes as not being computable.
-      ++NumTripCountsNotComputed;
+    } else {
+      if (ItCount.Max != CouldNotCompute)
+        // Update the value in the map.
+        Pair.first->second = ItCount;
+      if (isa<PHINode>(L->getHeader()->begin()))
+        // Only count loops that have phi nodes as not being computable.
+        ++NumTripCountsNotComputed;
     }
 
     // Now that we know more about the trip count for this loop, forget any
@@ -2520,19 +2808,58 @@ void ScalarEvolution::forgetLoopPHIs(const Loop *L) {
 /// of the specified loop will execute.
 ScalarEvolution::BackedgeTakenInfo
 ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
-  // If the loop has a non-one exit block count, we can't analyze it.
-  BasicBlock *ExitBlock = L->getExitBlock();
-  if (!ExitBlock)
-    return CouldNotCompute;
+  SmallVector<BasicBlock*, 8> ExitingBlocks;
+  L->getExitingBlocks(ExitingBlocks);
+
+  // Examine all exits and pick the most conservative values.
+  SCEVHandle BECount = CouldNotCompute;
+  SCEVHandle MaxBECount = CouldNotCompute;
+  bool CouldNotComputeBECount = false;
+  bool CouldNotComputeMaxBECount = false;
+  for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+    BackedgeTakenInfo NewBTI =
+      ComputeBackedgeTakenCountFromExit(L, ExitingBlocks[i]);
+
+    if (NewBTI.Exact == CouldNotCompute) {
+      // We couldn't compute an exact value for this exit, so
+      // we don't be able to compute an exact value for the loop.
+      CouldNotComputeBECount = true;
+      BECount = CouldNotCompute;
+    } else if (!CouldNotComputeBECount) {
+      if (BECount == CouldNotCompute)
+        BECount = NewBTI.Exact;
+      else {
+        // TODO: More analysis could be done here. For example, a
+        // loop with a short-circuiting && operator has an exact count
+        // of the min of both sides.
+        CouldNotComputeBECount = true;
+        BECount = CouldNotCompute;
+      }
+    }
+    if (NewBTI.Max == CouldNotCompute) {
+      // We couldn't compute an maximum value for this exit, so
+      // we don't be able to compute an maximum value for the loop.
+      CouldNotComputeMaxBECount = true;
+      MaxBECount = CouldNotCompute;
+    } else if (!CouldNotComputeMaxBECount) {
+      if (MaxBECount == CouldNotCompute)
+        MaxBECount = NewBTI.Max;
+      else
+        MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, NewBTI.Max);
+    }
+  }
+
+  return BackedgeTakenInfo(BECount, MaxBECount);
+}
 
-  // Okay, there is one exit block.  Try to find the condition that causes the
-  // loop to be exited.
-  BasicBlock *ExitingBlock = L->getExitingBlock();
-  if (!ExitingBlock)
-    return CouldNotCompute;   // More than one block exiting!
+/// ComputeBackedgeTakenCountFromExit - Compute the number of times the backedge
+/// of the specified loop will execute if it exits via the specified block.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ComputeBackedgeTakenCountFromExit(const Loop *L,
+                                                   BasicBlock *ExitingBlock) {
 
-  // Okay, we've computed the exiting block.  See what condition causes us to
-  // exit.
+  // Okay, we've chosen an exiting block.  See what condition causes us to
+  // exit at this block.
   //
   // FIXME: we should be able to handle switch instructions (with a single exit)
   BranchInst *ExitBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
@@ -2547,23 +2874,154 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
   // Currently we check for this by checking to see if the Exit branch goes to
   // the loop header.  If so, we know it will always execute the same number of
   // times as the loop.  We also handle the case where the exit block *is* the
-  // loop header.  This is common for un-rotated loops.  More extensive analysis
-  // could be done to handle more cases here.
+  // loop header.  This is common for un-rotated loops.
+  //
+  // If both of those tests fail, walk up the unique predecessor chain to the
+  // header, stopping if there is an edge that doesn't exit the loop. If the
+  // header is reached, the execution count of the branch will be equal to the
+  // trip count of the loop.
+  //
+  //  More extensive analysis could be done to handle more cases here.
+  //
   if (ExitBr->getSuccessor(0) != L->getHeader() &&
       ExitBr->getSuccessor(1) != L->getHeader() &&
-      ExitBr->getParent() != L->getHeader())
-    return CouldNotCompute;
-  
-  ICmpInst *ExitCond = dyn_cast<ICmpInst>(ExitBr->getCondition());
+      ExitBr->getParent() != L->getHeader()) {
+    // The simple checks failed, try climbing the unique predecessor chain
+    // up to the header.
+    bool Ok = false;
+    for (BasicBlock *BB = ExitBr->getParent(); BB; ) {
+      BasicBlock *Pred = BB->getUniquePredecessor();
+      if (!Pred)
+        return CouldNotCompute;
+      TerminatorInst *PredTerm = Pred->getTerminator();
+      for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) {
+        BasicBlock *PredSucc = PredTerm->getSuccessor(i);
+        if (PredSucc == BB)
+          continue;
+        // If the predecessor has a successor that isn't BB and isn't
+        // outside the loop, assume the worst.
+        if (L->contains(PredSucc))
+          return CouldNotCompute;
+      }
+      if (Pred == L->getHeader()) {
+        Ok = true;
+        break;
+      }
+      BB = Pred;
+    }
+    if (!Ok)
+      return CouldNotCompute;
+  }
+
+  // Procede to the next level to examine the exit condition expression.
+  return ComputeBackedgeTakenCountFromExitCond(L, ExitBr->getCondition(),
+                                               ExitBr->getSuccessor(0),
+                                               ExitBr->getSuccessor(1));
+}
+
+/// ComputeBackedgeTakenCountFromExitCond - Compute the number of times the
+/// backedge of the specified loop will execute if its exit condition
+/// were a conditional branch of ExitCond, TBB, and FBB.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
+                                                       Value *ExitCond,
+                                                       BasicBlock *TBB,
+                                                       BasicBlock *FBB) {
+  // Check if the controlling expression for this loop is an and or or. In
+  // such cases, an exact backedge-taken count may be infeasible, but a
+  // maximum count may still be feasible.
+  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
+    if (BO->getOpcode() == Instruction::And) {
+      // Recurse on the operands of the and.
+      BackedgeTakenInfo BTI0 =
+        ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB);
+      BackedgeTakenInfo BTI1 =
+        ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB);
+      SCEVHandle BECount = CouldNotCompute;
+      SCEVHandle MaxBECount = CouldNotCompute;
+      if (L->contains(TBB)) {
+        // Both conditions must be true for the loop to continue executing.
+        // Choose the less conservative count.
+        // TODO: Take the minimum of the exact counts.
+        if (BTI0.Exact == BTI1.Exact)
+          BECount = BTI0.Exact;
+        // TODO: Take the minimum of the maximum counts.
+        if (BTI0.Max == CouldNotCompute)
+          MaxBECount = BTI1.Max;
+        else if (BTI1.Max == CouldNotCompute)
+          MaxBECount = BTI0.Max;
+        else if (const SCEVConstant *C0 = dyn_cast<SCEVConstant>(BTI0.Max))
+          if (const SCEVConstant *C1 = dyn_cast<SCEVConstant>(BTI1.Max))
+              MaxBECount = getConstant(APIntOps::umin(C0->getValue()->getValue(),
+                                                      C1->getValue()->getValue()));
+      } else {
+        // Both conditions must be true for the loop to exit.
+        assert(L->contains(FBB) && "Loop block has no successor in loop!");
+        if (BTI0.Exact != CouldNotCompute && BTI1.Exact != CouldNotCompute)
+          BECount = getUMaxFromMismatchedTypes(BTI0.Exact, BTI1.Exact);
+        if (BTI0.Max != CouldNotCompute && BTI1.Max != CouldNotCompute)
+          MaxBECount = getUMaxFromMismatchedTypes(BTI0.Max, BTI1.Max);
+      }
+
+      return BackedgeTakenInfo(BECount, MaxBECount);
+    }
+    if (BO->getOpcode() == Instruction::Or) {
+      // Recurse on the operands of the or.
+      BackedgeTakenInfo BTI0 =
+        ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB);
+      BackedgeTakenInfo BTI1 =
+        ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB);
+      SCEVHandle BECount = CouldNotCompute;
+      SCEVHandle MaxBECount = CouldNotCompute;
+      if (L->contains(FBB)) {
+        // Both conditions must be false for the loop to continue executing.
+        // Choose the less conservative count.
+        // TODO: Take the minimum of the exact counts.
+        if (BTI0.Exact == BTI1.Exact)
+          BECount = BTI0.Exact;
+        // TODO: Take the minimum of the maximum counts.
+        if (BTI0.Max == CouldNotCompute)
+          MaxBECount = BTI1.Max;
+        else if (BTI1.Max == CouldNotCompute)
+          MaxBECount = BTI0.Max;
+        else if (const SCEVConstant *C0 = dyn_cast<SCEVConstant>(BTI0.Max))
+          if (const SCEVConstant *C1 = dyn_cast<SCEVConstant>(BTI1.Max))
+              MaxBECount = getConstant(APIntOps::umin(C0->getValue()->getValue(),
+                                                      C1->getValue()->getValue()));
+      } else {
+        // Both conditions must be false for the loop to exit.
+        assert(L->contains(TBB) && "Loop block has no successor in loop!");
+        if (BTI0.Exact != CouldNotCompute && BTI1.Exact != CouldNotCompute)
+          BECount = getUMaxFromMismatchedTypes(BTI0.Exact, BTI1.Exact);
+        if (BTI0.Max != CouldNotCompute && BTI1.Max != CouldNotCompute)
+          MaxBECount = getUMaxFromMismatchedTypes(BTI0.Max, BTI1.Max);
+      }
+
+      return BackedgeTakenInfo(BECount, MaxBECount);
+    }
+  }
+
+  // With an icmp, it may be feasible to compute an exact backedge-taken count.
+  // Procede to the next level to examine the icmp.
+  if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
+    return ComputeBackedgeTakenCountFromExitCondICmp(L, ExitCondICmp, TBB, FBB);
 
   // If it's not an integer or pointer comparison then compute it the hard way.
-  if (ExitCond == 0)
-    return ComputeBackedgeTakenCountExhaustively(L, ExitBr->getCondition(),
-                                          ExitBr->getSuccessor(0) == ExitBlock);
+  return ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB));
+}
+
+/// ComputeBackedgeTakenCountFromExitCondICmp - Compute the number of times the
+/// backedge of the specified loop will execute if its exit condition
+/// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
+                                                           ICmpInst *ExitCond,
+                                                           BasicBlock *TBB,
+                                                           BasicBlock *FBB) {
 
   // If the condition was exit on true, convert the condition to exit on false
   ICmpInst::Predicate Cond;
-  if (ExitBr->getSuccessor(1) == ExitBlock)
+  if (!L->contains(FBB))
     Cond = ExitCond->getPredicate();
   else
     Cond = ExitCond->getInversePredicate();
@@ -2573,7 +3031,12 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
     if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
       SCEVHandle ItCnt =
         ComputeLoadConstantCompareBackedgeTakenCount(LI, RHS, L, Cond);
-      if (!isa<SCEVCouldNotCompute>(ItCnt)) return ItCnt;
+      if (!isa<SCEVCouldNotCompute>(ItCnt)) {
+        unsigned BitWidth = getTypeSizeInBits(ItCnt->getType());
+        return BackedgeTakenInfo(ItCnt,
+                                 isa<SCEVConstant>(ItCnt) ? ItCnt :
+                                   getConstant(APInt::getMaxValue(BitWidth)-1));
+      }
     }
 
   SCEVHandle LHS = getSCEV(ExitCond->getOperand(0));
@@ -2651,8 +3114,7 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
     break;
   }
   return
-    ComputeBackedgeTakenCountExhaustively(L, ExitCond,
-                                          ExitBr->getSuccessor(0) == ExitBlock);
+    ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB));
 }
 
 static ConstantInt *
@@ -2750,7 +3212,7 @@ ComputeLoadConstantCompareBackedgeTakenCount(LoadInst *LI, Constant *RHS,
   unsigned MaxSteps = MaxBruteForceIterations;
   for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) {
     ConstantInt *ItCst =
-      ConstantInt::get(IdxExpr->getType(), IterationNum);
+      ConstantInt::get(cast<IntegerType>(IdxExpr->getType()), IterationNum);
     ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this);
 
     // Form the GEP offset.
@@ -2945,7 +3407,7 @@ ComputeBackedgeTakenCountExhaustively(const Loop *L, Value *Cond, bool ExitWhen)
     if (CondVal->getValue() == uint64_t(ExitWhen)) {
       ConstantEvolutionLoopExitValue[PN] = PHIVal;
       ++NumBruteForceTripCountsComputed;
-      return getConstant(ConstantInt::get(Type::Int32Ty, IterationNum));
+      return getConstant(Type::Int32Ty, IterationNum);
     }
 
     // Compute the value of the PHI node for the next iteration.
@@ -3074,7 +3536,7 @@ SCEVHandle ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
       if (OpAtScope != Comm->getOperand(i)) {
         // Okay, at least one of these operands is loop variant but might be
         // foldable.  Build a new instance of the folded commutative expression.
-        std::vector<SCEVHandle> NewOps(Comm->op_begin(), Comm->op_begin()+i);
+        SmallVector<SCEVHandle, 8> NewOps(Comm->op_begin(), Comm->op_begin()+i);
         NewOps.push_back(OpAtScope);
 
         for (++i; i != e; ++i) {
@@ -3394,6 +3856,29 @@ ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) {
   return 0;
 }
 
+/// HasSameValue - SCEV structural equivalence is usually sufficient for
+/// testing whether two expressions are equal, however for the purposes of
+/// looking for a condition guarding a loop, it can be useful to be a little
+/// more general, since a front-end may have replicated the controlling
+/// expression.
+///
+static bool HasSameValue(const SCEVHandle &A, const SCEVHandle &B) {
+  // Quick check to see if they are the same SCEV.
+  if (A == B) return true;
+
+  // Otherwise, if they're both SCEVUnknown, it's possible that they hold
+  // two different instructions with the same value. Check for this case.
+  if (const SCEVUnknown *AU = dyn_cast<SCEVUnknown>(A))
+    if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B))
+      if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue()))
+        if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue()))
+          if (AI->isIdenticalTo(BI))
+            return true;
+
+  // Otherwise assume they may have a different value.
+  return false;
+}
+
 /// isLoopGuardedByCond - Test whether entry to the loop is protected by
 /// a conditional between LHS and RHS.  This is used to help avoid max
 /// expressions in loop trip counts.
@@ -3494,15 +3979,43 @@ bool ScalarEvolution::isLoopGuardedByCond(const Loop *L,
 
     SCEVHandle PreCondLHSSCEV = getSCEV(PreCondLHS);
     SCEVHandle PreCondRHSSCEV = getSCEV(PreCondRHS);
-    if ((LHS == PreCondLHSSCEV && RHS == PreCondRHSSCEV) ||
-        (LHS == getNotSCEV(PreCondRHSSCEV) &&
-         RHS == getNotSCEV(PreCondLHSSCEV)))
+    if ((HasSameValue(LHS, PreCondLHSSCEV) &&
+         HasSameValue(RHS, PreCondRHSSCEV)) ||
+        (HasSameValue(LHS, getNotSCEV(PreCondRHSSCEV)) &&
+         HasSameValue(RHS, getNotSCEV(PreCondLHSSCEV))))
       return true;
   }
 
   return false;
 }
 
+/// getBECount - Subtract the end and start values and divide by the step,
+/// rounding up, to get the number of times the backedge is executed. Return
+/// CouldNotCompute if an intermediate computation overflows.
+SCEVHandle ScalarEvolution::getBECount(const SCEVHandle &Start,
+                                       const SCEVHandle &End,
+                                       const SCEVHandle &Step) {
+  const Type *Ty = Start->getType();
+  SCEVHandle NegOne = getIntegerSCEV(-1, Ty);
+  SCEVHandle Diff = getMinusSCEV(End, Start);
+  SCEVHandle RoundUp = getAddExpr(Step, NegOne);
+
+  // Add an adjustment to the difference between End and Start so that
+  // the division will effectively round up.
+  SCEVHandle Add = getAddExpr(Diff, RoundUp);
+
+  // Check Add for unsigned overflow.
+  // TODO: More sophisticated things could be done here.
+  const Type *WideTy = IntegerType::get(getTypeSizeInBits(Ty) + 1);
+  SCEVHandle OperandExtendedAdd =
+    getAddExpr(getZeroExtendExpr(Diff, WideTy),
+               getZeroExtendExpr(RoundUp, WideTy));
+  if (getZeroExtendExpr(Add, WideTy) != OperandExtendedAdd)
+    return CouldNotCompute;
+
+  return getUDivExpr(Add, Step);
+}
+
 /// HowManyLessThans - Return the number of times a backedge containing the
 /// specified less-than comparison will execute.  If not computable, return
 /// CouldNotCompute.
@@ -3520,7 +4033,6 @@ HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
     // FORNOW: We only support unit strides.
     unsigned BitWidth = getTypeSizeInBits(AddRec->getType());
     SCEVHandle Step = AddRec->getStepRecurrence(*this);
-    SCEVHandle NegOne = getIntegerSCEV(-1, AddRec->getType());
 
     // TODO: handle non-constant strides.
     const SCEVConstant *CStep = dyn_cast<SCEVConstant>(Step);
@@ -3575,22 +4087,20 @@ HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
                      : getUMaxExpr(RHS, Start);
 
     // Determine the maximum constant end value.
-    SCEVHandle MaxEnd = isa<SCEVConstant>(End) ? End :
-      getConstant(isSigned ? APInt::getSignedMaxValue(BitWidth) :
-                             APInt::getMaxValue(BitWidth));
+    SCEVHandle MaxEnd =
+      isa<SCEVConstant>(End) ? End :
+      getConstant(isSigned ? APInt::getSignedMaxValue(BitWidth)
+                               .ashr(GetMinSignBits(End) - 1) :
+                             APInt::getMaxValue(BitWidth)
+                               .lshr(GetMinLeadingZeros(End)));
 
     // Finally, we subtract these two values and divide, rounding up, to get
     // the number of times the backedge is executed.
-    SCEVHandle BECount = getUDivExpr(getAddExpr(getMinusSCEV(End, Start),
-                                                getAddExpr(Step, NegOne)),
-                                     Step);
+    SCEVHandle BECount = getBECount(Start, End, Step);
 
     // The maximum backedge count is similar, except using the minimum start
     // value and the maximum end value.
-    SCEVHandle MaxBECount = getUDivExpr(getAddExpr(getMinusSCEV(MaxEnd,
-                                                                MinStart),
-                                                   getAddExpr(Step, NegOne)),
-                                        Step);
+    SCEVHandle MaxBECount = getBECount(MinStart, MaxEnd, Step);;
 
     return BackedgeTakenInfo(BECount, MaxBECount);
   }
@@ -3611,7 +4121,7 @@ SCEVHandle SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
   // If the start is a non-zero constant, shift the range to simplify things.
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart()))
     if (!SC->getValue()->isZero()) {
-      std::vector<SCEVHandle> Operands(op_begin(), op_end());
+      SmallVector<SCEVHandle, 4> Operands(op_begin(), op_end());
       Operands[0] = SE.getIntegerSCEV(0, SC->getType());
       SCEVHandle Shifted = SE.getAddRecExpr(Operands, getLoop());
       if (const SCEVAddRecExpr *ShiftedAddRec =
@@ -3636,7 +4146,7 @@ SCEVHandle SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
   // iteration exits.
   unsigned BitWidth = SE.getTypeSizeInBits(getType());
   if (!Range.contains(APInt(BitWidth, 0)))
-    return SE.getConstant(ConstantInt::get(getType(),0));
+    return SE.getIntegerSCEV(0, getType());
 
   if (isAffine()) {
     // If this is an affine expression then we have this situation:
@@ -3672,7 +4182,7 @@ SCEVHandle SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
     // quadratic equation to solve it.  To do this, we must frame our problem in
     // terms of figuring out when zero is crossed, instead of when
     // Range.getUpper() is crossed.
-    std::vector<SCEVHandle> NewOps(op_begin(), op_end());
+    SmallVector<SCEVHandle, 4> NewOps(op_begin(), op_end());
     NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper()));
     SCEVHandle NewAddRec = SE.getAddRecExpr(NewOps, getLoop());
 
@@ -3783,7 +4293,7 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
 //===----------------------------------------------------------------------===//
 
 ScalarEvolution::ScalarEvolution()
-  : FunctionPass(&ID), CouldNotCompute(new SCEVCouldNotCompute()) {
+  : FunctionPass(&ID), CouldNotCompute(new SCEVCouldNotCompute(0)) {
 }
 
 bool ScalarEvolution::runOnFunction(Function &F) {
@@ -3847,11 +4357,18 @@ void ScalarEvolution::print(raw_ostream &OS, const Module* ) const {
       OS << "  -->  ";
       SCEVHandle SV = SE.getSCEV(&*I);
       SV->print(OS);
-      OS << "\t\t";
 
-      if (const Loop *L = LI->getLoopFor((*I).getParent())) {
-        OS << "Exits: ";
-        SCEVHandle ExitValue = SE.getSCEVAtScope(&*I, L->getParentLoop());
+      const Loop *L = LI->getLoopFor((*I).getParent());
+
+      SCEVHandle AtUse = SE.getSCEVAtScope(SV, L);
+      if (AtUse != SV) {
+        OS << "  -->  ";
+        AtUse->print(OS);
+      }
+
+      if (L) {
+        OS << "\t\t" "Exits: ";
+        SCEVHandle ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop());
         if (!ExitValue->isLoopInvariant(L)) {
           OS << "<<Unknown>>";
         } else {
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index e1f8fa421f5a..2a73c27405a8 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -182,7 +182,8 @@ static bool FactorOutConstant(SCEVHandle &S,
   if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S))
     if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
       if (!C->getValue()->getValue().srem(Factor)) {
-        std::vector<SCEVHandle> NewMulOps(M->getOperands());
+        const SmallVectorImpl<SCEVHandle> &MOperands = M->getOperands();
+        SmallVector<SCEVHandle, 4> NewMulOps(MOperands.begin(), MOperands.end());
         NewMulOps[0] =
           SE.getConstant(C->getValue()->getValue().sdiv(Factor));
         S = SE.getMulExpr(NewMulOps);
@@ -239,7 +240,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEVHandle *op_begin,
                                     Value *V) {
   const Type *ElTy = PTy->getElementType();
   SmallVector<Value *, 4> GepIndices;
-  std::vector<SCEVHandle> Ops(op_begin, op_end);
+  SmallVector<SCEVHandle, 8> Ops(op_begin, op_end);
   bool AnyNonZeroIndices = false;
 
   // Decend down the pointer's type and attempt to convert the other
@@ -250,8 +251,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEVHandle *op_begin,
   for (;;) {
     APInt ElSize = APInt(SE.getTypeSizeInBits(Ty),
                          ElTy->isSized() ?  SE.TD->getTypeAllocSize(ElTy) : 0);
-    std::vector<SCEVHandle> NewOps;
-    std::vector<SCEVHandle> ScaledOps;
+    SmallVector<SCEVHandle, 8> NewOps;
+    SmallVector<SCEVHandle, 8> ScaledOps;
     for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
       // Split AddRecs up into parts as either of the parts may be usable
       // without the other.
@@ -297,9 +298,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEVHandle *op_begin,
               GepIndices.push_back(ConstantInt::get(Type::Int32Ty, ElIdx));
               ElTy = STy->getTypeAtIndex(ElIdx);
               Ops[0] =
-                SE.getConstant(ConstantInt::get(Ty,
-                                                FullOffset -
-                                                  SL.getElementOffset(ElIdx)));
+                SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx));
               AnyNonZeroIndices = true;
               continue;
             }
@@ -365,7 +364,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
   // comments on expandAddToGEP for details.
   if (SE.TD)
     if (const PointerType *PTy = dyn_cast<PointerType>(V->getType())) {
-      const std::vector<SCEVHandle> &Ops = S->getOperands();
+      const SmallVectorImpl<SCEVHandle> &Ops = S->getOperands();
       return expandAddToGEP(&Ops[0], &Ops[Ops.size() - 1],
                             PTy, Ty, V);
     }
@@ -432,7 +431,7 @@ static void ExposePointerBase(SCEVHandle &Base, SCEVHandle &Rest,
   }
   if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) {
     Base = A->getOperand(A->getNumOperands()-1);
-    std::vector<SCEVHandle> NewAddOps(A->op_begin(), A->op_end());
+    SmallVector<SCEVHandle, 8> NewAddOps(A->op_begin(), A->op_end());
     NewAddOps.back() = Rest;
     Rest = SE.getAddExpr(NewAddOps);
     ExposePointerBase(Base, Rest, SE);
@@ -473,7 +472,8 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
 
   // {X,+,F} --> X + {0,+,F}
   if (!S->getStart()->isZero()) {
-    std::vector<SCEVHandle> NewOps(S->getOperands());
+    const SmallVectorImpl<SCEVHandle> &SOperands = S->getOperands();
+    SmallVector<SCEVHandle, 4> NewOps(SOperands.begin(), SOperands.end());
     NewOps[0] = SE.getIntegerSCEV(0, Ty);
     SCEVHandle Rest = SE.getAddRecExpr(NewOps, L);
 
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 45f97b8f64b1..17ffa2d2de6b 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -52,11 +52,12 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
   assert(V && "No Value?");
   assert(Depth <= MaxDepth && "Limit Search Depth");
   unsigned BitWidth = Mask.getBitWidth();
-  assert((V->getType()->isInteger() || isa<PointerType>(V->getType())) &&
+  assert((V->getType()->isIntOrIntVector() || isa<PointerType>(V->getType())) &&
          "Not integer or pointer type!");
-  assert((!TD || TD->getTypeSizeInBits(V->getType()) == BitWidth) &&
-         (!isa<IntegerType>(V->getType()) ||
-          V->getType()->getPrimitiveSizeInBits() == BitWidth) &&
+  assert((!TD ||
+          TD->getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
+         (!V->getType()->isIntOrIntVector() ||
+          V->getType()->getScalarSizeInBits() == BitWidth) &&
          KnownZero.getBitWidth() == BitWidth && 
          KnownOne.getBitWidth() == BitWidth &&
          "V, Mask, KnownOne and KnownZero should have same BitWidth");
@@ -67,12 +68,26 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
     KnownZero = ~KnownOne & Mask;
     return;
   }
-  // Null is all-zeros.
-  if (isa<ConstantPointerNull>(V)) {
+  // Null and aggregate-zero are all-zeros.
+  if (isa<ConstantPointerNull>(V) ||
+      isa<ConstantAggregateZero>(V)) {
     KnownOne.clear();
     KnownZero = Mask;
     return;
   }
+  // Handle a constant vector by taking the intersection of the known bits of
+  // each element.
+  if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
+    KnownZero.set(); KnownOne.set();
+    for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
+      APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
+      ComputeMaskedBits(CV->getOperand(i), Mask, KnownZero2, KnownOne2,
+                        TD, Depth);
+      KnownZero &= KnownZero2;
+      KnownOne &= KnownOne2;
+    }
+    return;
+  }
   // The address of an aligned GlobalValue has trailing zeros.
   if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
     unsigned Align = GV->getAlignment();
@@ -218,7 +233,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
     const Type *SrcTy = I->getOperand(0)->getType();
     unsigned SrcBitWidth = TD ?
       TD->getTypeSizeInBits(SrcTy) :
-      SrcTy->getPrimitiveSizeInBits();
+      SrcTy->getScalarSizeInBits();
     APInt MaskIn(Mask);
     MaskIn.zextOrTrunc(SrcBitWidth);
     KnownZero.zextOrTrunc(SrcBitWidth);
@@ -480,7 +495,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
         // Handle array index arithmetic.
         const Type *IndexedTy = GTI.getIndexedType();
         if (!IndexedTy->isSized()) return;
-        unsigned GEPOpiBits = Index->getType()->getPrimitiveSizeInBits();
+        unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits();
         uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1;
         LocalMask = APInt::getAllOnesValue(GEPOpiBits);
         LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0);
@@ -609,8 +624,8 @@ bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask,
 /// 'Op' must have a scalar integer type.
 ///
 unsigned llvm::ComputeNumSignBits(Value *V, TargetData *TD, unsigned Depth) {
-  const IntegerType *Ty = cast<IntegerType>(V->getType());
-  unsigned TyBits = Ty->getBitWidth();
+  const Type *Ty = V->getType();
+  unsigned TyBits = Ty->getScalarSizeInBits();
   unsigned Tmp, Tmp2;
   unsigned FirstAnswer = 1;
 
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index c5190efc4dac..b3f7cdb3c379 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -526,6 +526,10 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(coldcc);
   KEYWORD(x86_stdcallcc);
   KEYWORD(x86_fastcallcc);
+  KEYWORD(arm_apcscc);
+  KEYWORD(arm_aapcscc);
+  KEYWORD(arm_aapcs_vfpcc);
+
   KEYWORD(cc);
   KEYWORD(c);
 
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 4863f3c5116a..909370cb669d 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -808,8 +808,11 @@ bool LLParser::ParseOptionalVisibility(unsigned &Res) {
 ///   ::= 'coldcc'
 ///   ::= 'x86_stdcallcc'
 ///   ::= 'x86_fastcallcc'
+///   ::= 'arm_apcscc'
+///   ::= 'arm_aapcscc'
+///   ::= 'arm_aapcs_vfpcc'
 ///   ::= 'cc' UINT
-/// 
+///
 bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
   switch (Lex.getKind()) {
   default:                       CC = CallingConv::C; return false;
@@ -818,6 +821,9 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
   case lltok::kw_coldcc:         CC = CallingConv::Cold; break;
   case lltok::kw_x86_stdcallcc:  CC = CallingConv::X86_StdCall; break;
   case lltok::kw_x86_fastcallcc: CC = CallingConv::X86_FastCall; break;
+  case lltok::kw_arm_apcscc:     CC = CallingConv::ARM_APCS; break;
+  case lltok::kw_arm_aapcscc:    CC = CallingConv::ARM_AAPCS; break;
+  case lltok::kw_arm_aapcs_vfpcc:CC = CallingConv::ARM_AAPCS_VFP; break;
   case lltok::kw_cc:             Lex.Lex(); return ParseUInt32(CC);
   }
   Lex.Lex();
@@ -1743,7 +1749,7 @@ bool LLParser::ParseValID(ValID &ID) {
     Lex.Lex();
     if (ParseToken(lltok::lparen, "expected '(' after constantexpr cast") ||
         ParseGlobalTypeAndValue(SrcVal) ||
-        ParseToken(lltok::kw_to, "expected 'to' int constantexpr cast") ||
+        ParseToken(lltok::kw_to, "expected 'to' in constantexpr cast") ||
         ParseType(DestTy) ||
         ParseToken(lltok::rparen, "expected ')' at end of constantexpr cast"))
       return true;
@@ -3145,7 +3151,7 @@ bool LLParser::ParseFree(Instruction *&Inst, PerFunctionState &PFS) {
 }
 
 /// ParseLoad
-///   ::= 'volatile'? 'load' TypeAndValue (',' 'align' uint)?
+///   ::= 'volatile'? 'load' TypeAndValue (',' 'align' i32)?
 bool LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS,
                          bool isVolatile) {
   Value *Val; LocTy Loc;
@@ -3163,7 +3169,7 @@ bool LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS,
 }
 
 /// ParseStore
-///   ::= 'volatile'? 'store' TypeAndValue ',' TypeAndValue (',' 'align' uint)?
+///   ::= 'volatile'? 'store' TypeAndValue ',' TypeAndValue (',' 'align' i32)?
 bool LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS,
                           bool isVolatile) {
   Value *Val, *Ptr; LocTy Loc, PtrLoc;
@@ -3186,7 +3192,7 @@ bool LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS,
 }
 
 /// ParseGetResult
-///   ::= 'getresult' TypeAndValue ',' uint
+///   ::= 'getresult' TypeAndValue ',' i32
 /// FIXME: Remove support for getresult in LLVM 3.0
 bool LLParser::ParseGetResult(Instruction *&Inst, PerFunctionState &PFS) {
   Value *Val; LocTy ValLoc, EltLoc;
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index 9335d19612a5..cff89f8e472f 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -60,7 +60,9 @@ namespace lltok {
     kw_gc,
     kw_c,
 
-    kw_cc, kw_ccc, kw_fastcc, kw_coldcc, kw_x86_stdcallcc, kw_x86_fastcallcc,
+    kw_cc, kw_ccc, kw_fastcc, kw_coldcc,
+    kw_x86_stdcallcc, kw_x86_fastcallcc,
+    kw_arm_apcscc, kw_arm_aapcscc, kw_arm_aapcs_vfpcc,
 
     kw_signext,
     kw_zeroext,
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 3b44f56421f4..6b9606c5d14b 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -2040,14 +2040,13 @@ void BitcodeReader::dematerializeFunction(Function *F) {
 
 
 Module *BitcodeReader::materializeModule(std::string *ErrInfo) {
-  for (DenseMap<Function*, std::pair<uint64_t, unsigned> >::iterator I = 
-       DeferredFunctionInfo.begin(), E = DeferredFunctionInfo.end(); I != E;
-       ++I) {
-    Function *F = I->first;
+  // Iterate over the module, deserializing any functions that are still on
+  // disk.
+  for (Module::iterator F = TheModule->begin(), E = TheModule->end();
+       F != E; ++F)
     if (F->hasNotBeenReadFromBitcode() &&
         materializeFunction(F, ErrInfo))
       return 0;
-  }
 
   // Upgrade any intrinsic calls that slipped through (should not happen!) and 
   // delete the old functions to clean up. We can't do this unless the entire 
@@ -2123,7 +2122,7 @@ Module *llvm::ParseBitcodeFile(MemoryBuffer *Buffer, std::string *ErrMsg){
   // is run.
   if (M)
     M = R->releaseModule(ErrMsg);
-  
+   
   delete R;
   return M;
 }
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 45462da0d26a..e93190462347 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -152,6 +152,9 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
 bool AsmPrinter::doInitialization(Module &M) {
   Mang = new Mangler(M, TAI->getGlobalPrefix(), TAI->getPrivateGlobalPrefix());
   
+  if (TAI->doesAllowQuotesInName())
+    Mang->setUseQuotes(true);
+  
   GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
   assert(MI && "AsmPrinter didn't require GCModuleInfo?");
 
@@ -174,9 +177,17 @@ bool AsmPrinter::doInitialization(Module &M) {
 
   SwitchToDataSection("");   // Reset back to no section.
   
-  MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
-  if (MMI) MMI->AnalyzeModule(M);
-  DW = getAnalysisIfAvailable<DwarfWriter>();
+  if (TAI->doesSupportDebugInformation() 
+      || TAI->doesSupportExceptionHandling()) {
+    MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+    if (MMI) {
+      MMI->AnalyzeModule(M);
+      DW = getAnalysisIfAvailable<DwarfWriter>();
+      if (DW)
+        DW->BeginModule(&M, MMI, O, this, TAI);
+    }
+  }
+
   return false;
 }
 
@@ -347,8 +358,9 @@ void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI,
   const char* JumpTableDataSection = TAI->getJumpTableDataSection();
   const Function *F = MF.getFunction();
   unsigned SectionFlags = TAI->SectionFlagsForGlobal(F);
+  bool JTInDiffSection = false;
   if ((IsPic && !(LoweringInfo && LoweringInfo->usesGlobalOffsetTable())) ||
-     !JumpTableDataSection ||
+      !JumpTableDataSection ||
       SectionFlags & SectionFlags::Linkonce) {
     // In PIC mode, we need to emit the jump table to the same section as the
     // function body itself, otherwise the label differences won't make sense.
@@ -357,6 +369,7 @@ void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI,
     SwitchToSection(TAI->SectionForGlobal(F));
   } else {
     SwitchToDataSection(JumpTableDataSection);
+    JTInDiffSection = true;
   }
   
   EmitAlignment(Log2_32(MJTI->getAlignment()));
@@ -380,8 +393,10 @@ void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI,
     // before each jump table.  The first label is never referenced, but tells
     // the assembler and linker the extents of the jump table object.  The
     // second label is actually referenced by the code.
-    if (const char *JTLabelPrefix = TAI->getJumpTableSpecialLabelPrefix())
-      O << JTLabelPrefix << "JTI" << getFunctionNumber() << '_' << i << ":\n";
+    if (JTInDiffSection) {
+      if (const char *JTLabelPrefix = TAI->getJumpTableSpecialLabelPrefix())
+        O << JTLabelPrefix << "JTI" << getFunctionNumber() << '_' << i << ":\n";
+    }
     
     O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() 
       << '_' << i << ":\n";
@@ -502,7 +517,7 @@ const GlobalValue * AsmPrinter::findGlobalValue(const Constant *CV) {
 void AsmPrinter::EmitLLVMUsedList(Constant *List) {
   const char *Directive = TAI->getUsedDirective();
 
-  // Should be an array of 'sbyte*'.
+  // Should be an array of 'i8*'.
   ConstantArray *InitList = dyn_cast<ConstantArray>(List);
   if (InitList == 0) return;
   
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index c773378bb015..9d340e3aee58 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1757,6 +1757,9 @@ unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
+  CompileUnit *Unit = MainCU;
+  if (!Unit)
+    Unit = &FindCompileUnit(SP.getCompileUnit());
   GlobalVariable *GV = SP.getGV();
   DenseMap<const GlobalVariable *, DbgScope *>::iterator
     II = AbstractInstanceRootMap.find(GV);
@@ -1767,7 +1770,6 @@ unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
     DbgScope *Scope = new DbgScope(NULL, DIDescriptor(GV));
 
     // Get the compile unit context.
-    CompileUnit *Unit = &FindCompileUnit(SP.getCompileUnit());
     DIE *SPDie = Unit->getDieMapSlotFor(GV);
     if (!SPDie)
       SPDie = CreateSubprogramDIE(Unit, SP, false, true);
@@ -1789,7 +1791,6 @@ unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
   // Create a concrete inlined instance for this inlined function.
   DbgConcreteScope *ConcreteScope = new DbgConcreteScope(DIDescriptor(GV));
   DIE *ScopeDie = new DIE(dwarf::DW_TAG_inlined_subroutine);
-  CompileUnit *Unit = &FindCompileUnit(SP.getCompileUnit());
   ScopeDie->setAbstractCompileUnit(Unit);
 
   DIE *Origin = Unit->getDieMapSlotFor(GV);
@@ -1850,7 +1851,14 @@ unsigned DwarfDebug::RecordInlinedFnEnd(DISubprogram &SP) {
   }
 
   SmallVector<DbgScope *, 8> &Scopes = I->second;
-  assert(!Scopes.empty() && "We should have at least one debug scope!");
+  if (Scopes.empty()) {
+    // Returned ID is 0 if this is unbalanced "end of inlined
+    // scope". This could happen if optimizer eats dbg intrinsics
+    // or "beginning of inlined scope" is not recoginized due to
+    // missing location info. In such cases, ignore this region.end.
+    return 0;
+  }
+
   DbgScope *Scope = Scopes.back(); Scopes.pop_back();
   unsigned ID = MMI->NextLabelID();
   MMI->RecordUsedDbgLabel(ID);
@@ -1987,8 +1995,8 @@ void DwarfDebug::EmitInitial() {
   Asm->SwitchToDataSection(TAI->getDwarfARangesSection());
   EmitLabel("section_aranges", 0);
 
-  if (TAI->doesSupportMacInfoSection()) {
-    Asm->SwitchToDataSection(TAI->getDwarfMacInfoSection());
+  if (const char *LineInfoDirective = TAI->getDwarfMacroInfoSection()) {
+    Asm->SwitchToDataSection(LineInfoDirective);
     EmitLabel("section_macinfo", 0);
   }
 
@@ -2534,9 +2542,9 @@ void DwarfDebug::EmitDebugRanges() {
 /// EmitDebugMacInfo - Emit visible names into a debug macinfo section.
 ///
 void DwarfDebug::EmitDebugMacInfo() {
-  if (TAI->doesSupportMacInfoSection()) {
+  if (const char *LineInfoDirective = TAI->getDwarfMacroInfoSection()) {
     // Start the dwarf macinfo section.
-    Asm->SwitchToDataSection(TAI->getDwarfMacInfoSection());
+    Asm->SwitchToDataSection(LineInfoDirective);
     Asm->EOL();
   }
 }
diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
index f7ca4f4c9045..a1b97df82afc 100644
--- a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
@@ -190,7 +190,7 @@ void Dwarf::EmitFrameMoves(const char *BaseLabel, unsigned BaseLabelID,
         Asm->EmitULEB128Bytes(Offset);
         Asm->EOL("Offset");
       } else {
-        assert(0 && "Machine move no supported yet.");
+        assert(0 && "Machine move not supported yet.");
       }
     } else if (Src.isReg() &&
                Src.getReg() == MachineLocation::VirtualFP) {
@@ -200,7 +200,7 @@ void Dwarf::EmitFrameMoves(const char *BaseLabel, unsigned BaseLabelID,
         Asm->EmitULEB128Bytes(RI->getDwarfRegNum(Dst.getReg(), isEH));
         Asm->EOL("Register");
       } else {
-        assert(0 && "Machine move no supported yet.");
+        assert(0 && "Machine move not supported yet.");
       }
     } else {
       unsigned Reg = RI->getDwarfRegNum(Src.getReg(), isEH);
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 1d0887f843d8..4d5c3c2c7dc7 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -547,7 +547,11 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
     // fallthrough.
     if (!BBI.FalseBB)
       BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB);  
-    assert(BBI.FalseBB && "Expected to find the fallthrough block!");
+    if (!BBI.FalseBB) {
+      // Malformed bcc? True and false blocks are the same?
+      BBI.IsUnpredicable = true;
+      return;
+    }
   }
 
   // Then scan all the instructions.
@@ -663,6 +667,13 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
     return BBI;
   }
 
+  // Do not ifcvt if true and false fallthrough blocks are the same.
+  if (!BBI.FalseBB) {
+    BBI.IsBeingAnalyzed = false;
+    BBI.IsAnalyzed = true;
+    return BBI;
+  }
+
   BBInfo &TrueBBI  = AnalyzeBlock(BBI.TrueBB, Tokens);
   BBInfo &FalseBBI = AnalyzeBlock(BBI.FalseBB, Tokens);
 
diff --git a/lib/CodeGen/LazyLiveness.cpp b/lib/CodeGen/LazyLiveness.cpp
index 6fb35d235a7f..a951c99ddb7a 100644
--- a/lib/CodeGen/LazyLiveness.cpp
+++ b/lib/CodeGen/LazyLiveness.cpp
@@ -32,10 +32,12 @@ void LazyLiveness::computeBackedgeChain(MachineFunction& mf,
   calculated.set(preorder[MBB]);
   
   for (SparseBitVector<128>::iterator I = tmp.begin(); I != tmp.end(); ++I) {
+    assert(rev_preorder.size() > *I && "Unknown block!");
+    
     MachineBasicBlock* SrcMBB = rev_preorder[*I];
     
-    for (MachineBasicBlock::succ_iterator SI = SrcMBB->succ_begin();
-         SI != SrcMBB->succ_end(); ++SI) {
+    for (MachineBasicBlock::succ_iterator SI = SrcMBB->succ_begin(),
+         SE = SrcMBB->succ_end(); SI != SE; ++SI) {
       MachineBasicBlock* TgtMBB = *SI;
       
       if (backedges.count(std::make_pair(SrcMBB, TgtMBB)) &&
@@ -44,7 +46,8 @@ void LazyLiveness::computeBackedgeChain(MachineFunction& mf,
           computeBackedgeChain(mf, TgtMBB);
         
         tv[MBB].set(preorder[TgtMBB]);
-        tv[MBB] |= tv[TgtMBB];
+        SparseBitVector<128> right = tv[TgtMBB];
+        tv[MBB] |= right;
       }
     }
     
@@ -60,6 +63,12 @@ bool LazyLiveness::runOnMachineFunction(MachineFunction &mf) {
   backedge_target.clear();
   calculated.clear();
   preorder.clear();
+  rev_preorder.clear();
+  
+  rv.resize(mf.size());
+  tv.resize(mf.size());
+  preorder.resize(mf.size());
+  rev_preorder.reserve(mf.size());
   
   MRI = &mf.getRegInfo();
   MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>();
@@ -106,8 +115,8 @@ bool LazyLiveness::runOnMachineFunction(MachineFunction &mf) {
       for (MachineBasicBlock::succ_iterator SI = (*POI)->succ_begin(),
            SE = (*POI)->succ_end(); SI != SE; ++SI)
         if (!backedges.count(std::make_pair(*POI, *SI)) && tv.count(*SI)) {
-          SparseBitVector<128>& PBV = tv[*POI];
-          PBV = tv[*SI];
+          SparseBitVector<128> right = tv[*SI];
+          tv[*POI] |= right;
         }
   
   for (po_iterator<MachineBasicBlock*> POI = po_begin(&*mf.begin()),
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 67120b879886..cac92530df8f 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -19,6 +19,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/STLExtras.h"
@@ -305,9 +306,9 @@ void LiveInterval::removeRange(unsigned Start, unsigned End,
               VNInfo *VNI = valnos.back();
               valnos.pop_back();
               VNI->~VNInfo();
-            } while (!valnos.empty() && valnos.back()->def == ~1U);
+            } while (!valnos.empty() && valnos.back()->isUnused());
           } else {
-            ValNo->def = ~1U;
+            ValNo->setIsUnused(true);
           }
         }
       }
@@ -353,9 +354,9 @@ void LiveInterval::removeValNo(VNInfo *ValNo) {
       VNInfo *VNI = valnos.back();
       valnos.pop_back();
       VNI->~VNInfo();
-    } while (!valnos.empty() && valnos.back()->def == ~1U);
+    } while (!valnos.empty() && valnos.back()->isUnused());
   } else {
-    ValNo->def = ~1U;
+    ValNo->setIsUnused(true);
   }
 }
  
@@ -371,9 +372,8 @@ void LiveInterval::scaleNumbering(unsigned factor) {
   // Scale VNI info.                                                          
   for (vni_iterator VNI = vni_begin(), VNIE = vni_end(); VNI != VNIE; ++VNI) {
     VNInfo *vni = *VNI;
-    if (vni->def != ~0U && vni->def != ~1U) {
-      vni->def = InstrSlots::scale(vni->def, factor);
-    }
+
+    vni->def = InstrSlots::scale(vni->def, factor);
 
     for (unsigned i = 0; i < vni->kills.size(); ++i) {
       if (vni->kills[i] != 0)
@@ -421,13 +421,13 @@ VNInfo *LiveInterval::findDefinedVNInfo(unsigned DefIdxOrReg) const {
   return VNI;
 }
 
-
 /// join - Join two live intervals (this, and other) together.  This applies
 /// mappings to the value numbers in the LHS/RHS intervals as specified.  If
 /// the intervals are not joinable, this aborts.
 void LiveInterval::join(LiveInterval &Other, const int *LHSValNoAssignments,
                         const int *RHSValNoAssignments, 
-                        SmallVector<VNInfo*, 16> &NewVNInfo) {
+                        SmallVector<VNInfo*, 16> &NewVNInfo,
+                        MachineRegisterInfo *MRI) {
   // Determine if any of our live range values are mapped.  This is uncommon, so
   // we want to avoid the interval scan if not. 
   bool MustMapCurValNos = false;
@@ -502,8 +502,18 @@ void LiveInterval::join(LiveInterval &Other, const int *LHSValNoAssignments,
   }
 
   weight += Other.weight;
-  if (Other.preference && !preference)
-    preference = Other.preference;
+
+  // Update regalloc hint if currently there isn't one.
+  if (TargetRegisterInfo::isVirtualRegister(reg) &&
+      TargetRegisterInfo::isVirtualRegister(Other.reg)) {
+    std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(reg);
+    if (Hint.first == 0 && Hint.second == 0) {
+      std::pair<unsigned, unsigned> OtherHint =
+        MRI->getRegAllocationHint(Other.reg);
+      if (OtherHint.first || OtherHint.second)
+        MRI->setRegAllocationHint(reg, OtherHint.first, OtherHint.second);
+    }
+  }
 }
 
 /// MergeRangesInAsValue - Merge all of the intervals in RHS into this live
@@ -582,9 +592,9 @@ void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS,
             VNInfo *VNI = valnos.back();
             valnos.pop_back();
             VNI->~VNInfo();
-          } while (!valnos.empty() && valnos.back()->def == ~1U);
+          } while (!valnos.empty() && valnos.back()->isUnused());
         } else {
-          V1->def = ~1U;
+          V1->setIsUnused(true);
         }
       }
     }
@@ -611,7 +621,7 @@ void LiveInterval::MergeInClobberRanges(const LiveInterval &Clobbers,
     else if (UnusedValNo)
       ClobberValNo = UnusedValNo;
     else {
-      UnusedValNo = ClobberValNo = getNextValue(~0U, 0, VNInfoAllocator);
+      UnusedValNo = ClobberValNo = getNextValue(0, 0, false, VNInfoAllocator);
       ValNoMaps.insert(std::make_pair(I->valno, ClobberValNo));
     }
 
@@ -664,7 +674,7 @@ void LiveInterval::MergeInClobberRange(unsigned Start, unsigned End,
                                        BumpPtrAllocator &VNInfoAllocator) {
   // Find a value # to use for the clobber ranges.  If there is already a value#
   // for unknown values, use it.
-  VNInfo *ClobberValNo = getNextValue(~0U, 0, VNInfoAllocator);
+  VNInfo *ClobberValNo = getNextValue(0, 0, false, VNInfoAllocator);
   
   iterator IP = begin();
   IP = std::upper_bound(IP, end(), Start);
@@ -747,24 +757,26 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
       VNInfo *VNI = valnos.back();
       valnos.pop_back();
       VNI->~VNInfo();
-    } while (valnos.back()->def == ~1U);
+    } while (valnos.back()->isUnused());
   } else {
-    V1->def = ~1U;
+    V1->setIsUnused(true);
   }
   
   return V2;
 }
 
 void LiveInterval::Copy(const LiveInterval &RHS,
+                        MachineRegisterInfo *MRI,
                         BumpPtrAllocator &VNInfoAllocator) {
   ranges.clear();
   valnos.clear();
-  preference = RHS.preference;
+  std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(RHS.reg);
+  MRI->setRegAllocationHint(reg, Hint.first, Hint.second);
+
   weight = RHS.weight;
   for (unsigned i = 0, e = RHS.getNumValNums(); i != e; ++i) {
     const VNInfo *VNI = RHS.getValNumInfo(i);
-    VNInfo *NewVNI = getNextValue(~0U, 0, VNInfoAllocator);
-    copyValNumInfo(NewVNI, VNI);
+    createValueCopy(VNI, VNInfoAllocator);
   }
   for (unsigned i = 0, e = RHS.ranges.size(); i != e; ++i) {
     const LiveRange &LR = RHS.ranges[i];
@@ -816,22 +828,22 @@ void LiveInterval::print(std::ostream &OS,
       const VNInfo *vni = *i;
       if (vnum) OS << " ";
       OS << vnum << "@";
-      if (vni->def == ~1U) {
+      if (vni->isUnused()) {
         OS << "x";
       } else {
-        if (vni->def == ~0U)
+        if (!vni->isDefAccurate())
           OS << "?";
         else
           OS << vni->def;
         unsigned ee = vni->kills.size();
-        if (ee || vni->hasPHIKill) {
+        if (ee || vni->hasPHIKill()) {
           OS << "-(";
           for (unsigned j = 0; j != ee; ++j) {
             OS << vni->kills[j];
             if (j != ee-1)
               OS << " ";
           }
-          if (vni->hasPHIKill) {
+          if (vni->hasPHIKill()) {
             if (ee)
               OS << " ";
             OS << "phi";
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index cf0a648b629d..d6931df896dc 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -199,7 +199,7 @@ void LiveIntervals::computeNumbering() {
         // Remap the VNInfo def index, which works the same as the
         // start indices above. VN's with special sentinel defs
         // don't need to be remapped.
-        if (vni->def != ~0U && vni->def != ~1U) {
+        if (vni->isDefAccurate() && !vni->isUnused()) {
           unsigned index = vni->def / InstrSlots::NUM;
           unsigned offset = vni->def % InstrSlots::NUM;
           if (offset == InstrSlots::LOAD) {
@@ -447,7 +447,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
         tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg))
       CopyMI = mi;
     // Earlyclobbers move back one.
-    ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
+    ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator);
 
     assert(ValNo->id == 0 && "First value in interval is not 0?");
 
@@ -539,13 +539,15 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       // The new value number (#1) is defined by the instruction we claimed
       // defined value #0.
       VNInfo *ValNo = interval.getNextValue(OldValNo->def, OldValNo->copy,
+                                            false, // update at *
                                             VNInfoAllocator);
-      
+      ValNo->setFlags(OldValNo->getFlags()); // * <- updating here
+
       // Value#0 is now defined by the 2-addr instruction.
       OldValNo->def  = RedefIndex;
       OldValNo->copy = 0;
       if (MO.isEarlyClobber())
-        OldValNo->redefByEC = true;
+        OldValNo->setHasRedefByEC(true);
       
       // Add the new live interval which replaces the range for the input copy.
       LiveRange LR(DefIndex, RedefIndex, ValNo);
@@ -577,12 +579,14 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
         DOUT << " Removing [" << Start << "," << End << "] from: ";
         interval.print(DOUT, tri_); DOUT << "\n";
         interval.removeRange(Start, End);
-        VNI->hasPHIKill = true;
+        VNI->setHasPHIKill(true);
         DOUT << " RESULT: "; interval.print(DOUT, tri_);
 
         // Replace the interval with one of a NEW value number.  Note that this
         // value number isn't actually defined by an instruction, weird huh? :)
-        LiveRange LR(Start, End, interval.getNextValue(~0, 0, VNInfoAllocator));
+        LiveRange LR(Start, End,
+          interval.getNextValue(mbb->getNumber(), 0, false, VNInfoAllocator));
+        LR.valno->setIsPHIDef(true);
         DOUT << " replace range with " << LR;
         interval.addRange(LR);
         interval.addKill(LR.valno, End);
@@ -604,13 +608,13 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
           mi->getOpcode() == TargetInstrInfo::SUBREG_TO_REG ||
           tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg))
         CopyMI = mi;
-      ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
+      ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator);
       
       unsigned killIndex = getMBBEndIdx(mbb) + 1;
       LiveRange LR(defIndex, killIndex, ValNo);
       interval.addRange(LR);
       interval.addKill(ValNo, killIndex);
-      ValNo->hasPHIKill = true;
+      ValNo->setHasPHIKill(true);
       DOUT << " +" << LR;
     }
   }
@@ -692,9 +696,9 @@ exit:
   LiveInterval::iterator OldLR = interval.FindLiveRangeContaining(start);
   bool Extend = OldLR != interval.end();
   VNInfo *ValNo = Extend
-    ? OldLR->valno : interval.getNextValue(start, CopyMI, VNInfoAllocator);
+    ? OldLR->valno : interval.getNextValue(start, CopyMI, true, VNInfoAllocator);
   if (MO.isEarlyClobber() && Extend)
-    ValNo->redefByEC = true;
+    ValNo->setHasRedefByEC(true);
   LiveRange LR(start, end, ValNo);
   interval.addRange(LR);
   interval.addKill(LR.valno, end);
@@ -750,7 +754,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
       DOUT << " killed";
       end = getUseIndex(baseIndex) + 1;
       SeenDefUse = true;
-      goto exit;
+      break;
     } else if (mi->modifiesRegister(interval.reg, tri_)) {
       // Another instruction redefines the register before it is ever read.
       // Then the register is essentially dead at the instruction that defines
@@ -759,7 +763,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
       DOUT << " dead";
       end = getDefIndex(start) + 1;
       SeenDefUse = true;
-      goto exit;
+      break;
     }
 
     baseIndex += InstrSlots::NUM;
@@ -771,7 +775,6 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
     }
   }
 
-exit:
   // Live-in register might not be used at all.
   if (!SeenDefUse) {
     if (isAlias) {
@@ -783,7 +786,11 @@ exit:
     }
   }
 
-  LiveRange LR(start, end, interval.getNextValue(~0U, 0, VNInfoAllocator));
+  VNInfo *vni =
+    interval.getNextValue(MBB->getNumber(), 0, false, VNInfoAllocator);
+  vni->setIsPHIDef(true);
+  LiveRange LR(start, end, vni);
+  
   interval.addRange(LR);
   interval.addKill(LR.valno, end);
   DOUT << " +" << LR << '\n';
@@ -896,7 +903,7 @@ LiveInterval* LiveIntervals::createInterval(unsigned reg) {
 /// managing the allocated memory.
 LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) {
   LiveInterval *NewLI = createInterval(li->reg);
-  NewLI->Copy(*li, getVNInfoAllocator());
+  NewLI->Copy(*li, mri_, getVNInfoAllocator());
   return NewLI;
 }
 
@@ -1099,13 +1106,12 @@ bool LiveIntervals::isReMaterializable(const LiveInterval &li,
   for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end();
        i != e; ++i) {
     const VNInfo *VNI = *i;
-    unsigned DefIdx = VNI->def;
-    if (DefIdx == ~1U)
+    if (VNI->isUnused())
       continue; // Dead val#.
     // Is the def for the val# rematerializable?
-    if (DefIdx == ~0u)
+    if (!VNI->isDefAccurate())
       return false;
-    MachineInstr *ReMatDefMI = getInstructionFromIndex(DefIdx);
+    MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def);
     bool DefIsLoad = false;
     if (!ReMatDefMI ||
         !isReMaterializable(li, VNI, ReMatDefMI, SpillIs, DefIsLoad))
@@ -1450,7 +1456,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
     if (HasUse) {
       if (CreatedNewVReg) {
         LiveRange LR(getLoadIndex(index), getUseIndex(index)+1,
-                     nI.getNextValue(~0U, 0, VNInfoAllocator));
+                     nI.getNextValue(0, 0, false, VNInfoAllocator));
         DOUT << " +" << LR;
         nI.addRange(LR);
       } else {
@@ -1464,7 +1470,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
     }
     if (HasDef) {
       LiveRange LR(getDefIndex(index), getStoreIndex(index),
-                   nI.getNextValue(~0U, 0, VNInfoAllocator));
+                   nI.getNextValue(0, 0, false, VNInfoAllocator));
       DOUT << " +" << LR;
       nI.addRange(LR);
     }
@@ -1840,14 +1846,14 @@ addIntervalsForSpillsFast(const LiveInterval &li,
       unsigned index = getInstructionIndex(MI);
       if (HasUse) {
         LiveRange LR(getLoadIndex(index), getUseIndex(index),
-                     nI.getNextValue(~0U, 0, getVNInfoAllocator()));
+                     nI.getNextValue(0, 0, false, getVNInfoAllocator()));
         DOUT << " +" << LR;
         nI.addRange(LR);
         vrm.addRestorePoint(NewVReg, MI);
       }
       if (HasDef) {
         LiveRange LR(getDefIndex(index), getStoreIndex(index),
-                     nI.getNextValue(~0U, 0, getVNInfoAllocator()));
+                     nI.getNextValue(0, 0, false, getVNInfoAllocator()));
         DOUT << " +" << LR;
         nI.addRange(LR);
         vrm.addSpillPoint(NewVReg, true, MI);
@@ -1961,12 +1967,11 @@ addIntervalsForSpills(const LiveInterval &li,
        i != e; ++i) {
     const VNInfo *VNI = *i;
     unsigned VN = VNI->id;
-    unsigned DefIdx = VNI->def;
-    if (DefIdx == ~1U)
+    if (VNI->isUnused())
       continue; // Dead val#.
     // Is the def for the val# rematerializable?
-    MachineInstr *ReMatDefMI = (DefIdx == ~0u)
-      ? 0 : getInstructionFromIndex(DefIdx);
+    MachineInstr *ReMatDefMI = VNI->isDefAccurate()
+      ? getInstructionFromIndex(VNI->def) : 0;
     bool dummy;
     if (ReMatDefMI && isReMaterializable(li, VNI, ReMatDefMI, SpillIs, dummy)) {
       // Remember how to remat the def of this val#.
@@ -1977,7 +1982,7 @@ addIntervalsForSpills(const LiveInterval &li,
       ReMatDefs[VN] = Clone;
 
       bool CanDelete = true;
-      if (VNI->hasPHIKill) {
+      if (VNI->hasPHIKill()) {
         // A kill is a phi node, not all of its uses can be rematerialized.
         // It must not be deleted.
         CanDelete = false;
@@ -2287,8 +2292,8 @@ LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
   LiveInterval& Interval = getOrCreateInterval(reg);
   VNInfo* VN = Interval.getNextValue(
             getInstructionIndex(startInst) + InstrSlots::DEF,
-            startInst, getVNInfoAllocator());
-  VN->hasPHIKill = true;
+            startInst, true, getVNInfoAllocator());
+  VN->setHasPHIKill(true);
   VN->kills.push_back(getMBBEndIdx(startInst->getParent()));
   LiveRange LR(getInstructionIndex(startInst) + InstrSlots::DEF,
                getMBBEndIdx(startInst->getParent()) + 1, VN);
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 622882169128..bd845085bbf5 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -359,12 +359,11 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
     // That is, unless we are currently processing the last reference itself.
     LastRefOrPartRef->addRegisterDead(Reg, TRI, true);
 
-  /* Partial uses. Mark register def dead and add implicit def of
-     sub-registers which are used.
-    FIXME: LiveIntervalAnalysis can't handle this yet!
-    EAX<dead>  = op  AL<imp-def>
-    That is, EAX def is dead but AL def extends pass it.
-    Enable this after live interval analysis is fixed to improve codegen!
+  // Partial uses. Mark register def dead and add implicit def of
+  // sub-registers which are used.
+  // EAX<dead>  = op  AL<imp-def>
+  // That is, EAX def is dead but AL def extends pass it.
+  // Enable this after live interval analysis is fixed to improve codegen!
   else if (!PhysRegUse[Reg]) {
     PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true);
     for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
@@ -377,7 +376,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
           PartUses.erase(*SS);
       }
     }
-  } */
+  }
   else
     LastRefOrPartRef->addRegisterKilled(Reg, TRI, true);
   return true;
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 4f5ab1f5860e..544d83a33f7f 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -16,6 +16,7 @@ using namespace llvm;
 
 MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) {
   VRegInfo.reserve(256);
+  RegAllocHints.reserve(256);
   RegClass2VRegMap.resize(TRI.getNumRegClasses()+1); // RC ID starts at 1.
   UsedPhysRegs.resize(TRI.getNumRegs());
   
@@ -64,6 +65,7 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
   // Add a reg, but keep track of whether the vector reallocated or not.
   void *ArrayBase = VRegInfo.empty() ? 0 : &VRegInfo[0];
   VRegInfo.push_back(std::make_pair(RegClass, (MachineOperand*)0));
+  RegAllocHints.push_back(std::make_pair(0, 0));
 
   if (!((&VRegInfo[0] == ArrayBase || VRegInfo.size() == 1)))
     // The vector reallocated, handle this now.
diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp
index 97d4728348e5..ae60c86c3d7c 100644
--- a/lib/CodeGen/PreAllocSplitting.cpp
+++ b/lib/CodeGen/PreAllocSplitting.cpp
@@ -343,7 +343,7 @@ int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg,
   if (CurrSLI->hasAtLeastOneValue())
     CurrSValNo = CurrSLI->getValNumInfo(0);
   else
-    CurrSValNo = CurrSLI->getNextValue(~0U, 0, LSs->getVNInfoAllocator());
+    CurrSValNo = CurrSLI->getNextValue(0, 0, false, LSs->getVNInfoAllocator());
   return SS;
 }
 
@@ -637,8 +637,9 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us
   if (Phis.count(MBB)) return Phis[MBB]; 
 
   unsigned StartIndex = LIs->getMBBStartIdx(MBB);
-  VNInfo *RetVNI = Phis[MBB] = LI->getNextValue(~0U, /*FIXME*/ 0,
-                                                LIs->getVNInfoAllocator());
+  VNInfo *RetVNI = Phis[MBB] =
+    LI->getNextValue(0, /*FIXME*/ 0, false, LIs->getVNInfoAllocator());
+
   if (!IsIntraBlock) LiveOut[MBB] = RetVNI;
     
   // If there are no uses or defs between our starting point and the
@@ -654,7 +655,7 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us
       IncomingVNs[*PI] = Incoming;
   }
     
-  if (MBB->pred_size() == 1 && !RetVNI->hasPHIKill) {
+  if (MBB->pred_size() == 1 && !RetVNI->hasPHIKill()) {
     VNInfo* OldVN = RetVNI;
     VNInfo* NewVN = IncomingVNs.begin()->second;
     VNInfo* MergedVN = LI->MergeValueNumberInto(OldVN, NewVN);
@@ -678,7 +679,7 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us
     // VNInfo to represent the joined value.
     for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator I =
            IncomingVNs.begin(), E = IncomingVNs.end(); I != E; ++I) {
-      I->second->hasPHIKill = true;
+      I->second->setHasPHIKill(true);
       unsigned KillIndex = LIs->getMBBEndIdx(I->first);
       if (!LiveInterval::isKill(I->second, KillIndex))
         LI->addKill(I->second, KillIndex);
@@ -730,7 +731,9 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
     unsigned DefIdx = LIs->getInstructionIndex(&*DI);
     DefIdx = LiveIntervals::getDefIndex(DefIdx);
     
-    VNInfo* NewVN = LI->getNextValue(DefIdx, 0, Alloc);
+    assert(DI->getOpcode() != TargetInstrInfo::PHI &&
+           "Following NewVN isPHIDef flag incorrect. Fix me!");
+    VNInfo* NewVN = LI->getNextValue(DefIdx, 0, true, Alloc);
     
     // If the def is a move, set the copy field.
     unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
@@ -793,7 +796,7 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) {
     
     // Bail out if we ever encounter a valno that has a PHI kill.  We can't
     // renumber these.
-    if (OldVN->hasPHIKill) return;
+    if (OldVN->hasPHIKill()) return;
     
     VNsToCopy.push_back(OldVN);
     
@@ -823,9 +826,7 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) {
     VNInfo* OldVN = *OI;
     
     // Copy the valno over
-    VNInfo* NewVN = NewLI.getNextValue(OldVN->def, OldVN->copy, 
-                                       LIs->getVNInfoAllocator());
-    NewLI.copyValNumInfo(NewVN, OldVN);
+    VNInfo* NewVN = NewLI.createValueCopy(OldVN, LIs->getVNInfoAllocator());
     NewLI.MergeValueInAsValue(*CurrLI, OldVN, NewVN);
 
     // Remove the valno from the old interval
@@ -873,7 +874,7 @@ bool PreAllocSplitting::Rematerialize(unsigned vreg, VNInfo* ValNo,
   
   MachineBasicBlock::iterator KillPt = BarrierMBB->end();
   unsigned KillIdx = 0;
-  if (ValNo->def == ~0U || DefMI->getParent() == BarrierMBB)
+  if (!ValNo->isDefAccurate() || DefMI->getParent() == BarrierMBB)
     KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB, KillIdx);
   else
     KillPt = findNextEmptySlot(DefMI->getParent(), DefMI, KillIdx);
@@ -942,7 +943,7 @@ MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg,
     if (CurrSLI->hasAtLeastOneValue())
       CurrSValNo = CurrSLI->getValNumInfo(0);
     else
-      CurrSValNo = CurrSLI->getNextValue(~0U, 0, LSs->getVNInfoAllocator());
+      CurrSValNo = CurrSLI->getNextValue(0, 0, false, LSs->getVNInfoAllocator());
   }
   
   return FMI;
@@ -1032,13 +1033,13 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
     CurrLI->FindLiveRangeContaining(LIs->getUseIndex(BarrierIdx));
   VNInfo *ValNo = LR->valno;
 
-  if (ValNo->def == ~1U) {
+  if (ValNo->isUnused()) {
     // Defined by a dead def? How can this be?
     assert(0 && "Val# is defined by a dead def?");
     abort();
   }
 
-  MachineInstr *DefMI = (ValNo->def != ~0U)
+  MachineInstr *DefMI = ValNo->isDefAccurate()
     ? LIs->getInstructionFromIndex(ValNo->def) : NULL;
 
   // If this would create a new join point, do not split.
@@ -1072,8 +1073,8 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
   unsigned SpillIndex = 0;
   MachineInstr *SpillMI = NULL;
   int SS = -1;
-  if (ValNo->def == ~0U) {
-    // If it's defined by a phi, we must split just before the barrier.
+  if (!ValNo->isDefAccurate()) {
+    // If we don't know where the def is we must split just before the barrier.
     if ((SpillMI = FoldSpill(LI->reg, RC, 0, Barrier,
                             BarrierMBB, SS, RefsInMBB))) {
       SpillIndex = LIs->getInstructionIndex(SpillMI);
@@ -1254,17 +1255,16 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) {
       
       // We don't currently try to handle definitions with PHI kills, because
       // it would involve processing more than one VNInfo at once.
-      if (CurrVN->hasPHIKill) continue;
+      if (CurrVN->hasPHIKill()) continue;
       
       // We also don't try to handle the results of PHI joins, since there's
       // no defining instruction to analyze.
-      unsigned DefIdx = CurrVN->def;
-      if (DefIdx == ~0U || DefIdx == ~1U) continue;
+      if (!CurrVN->isDefAccurate() || CurrVN->isUnused()) continue;
     
       // We're only interested in eliminating cruft introduced by the splitter,
       // is of the form load-use or load-use-store.  First, check that the
       // definition is a load, and remember what stack slot we loaded it from.
-      MachineInstr* DefMI = LIs->getInstructionFromIndex(DefIdx);
+      MachineInstr* DefMI = LIs->getInstructionFromIndex(CurrVN->def);
       int FrameIndex;
       if (!TII->isLoadFromStackSlot(DefMI, FrameIndex)) continue;
       
@@ -1383,7 +1383,7 @@ bool PreAllocSplitting::createsNewJoin(LiveRange* LR,
   if (DefMBB == BarrierMBB)
     return false;
   
-  if (LR->valno->hasPHIKill)
+  if (LR->valno->hasPHIKill())
     return false;
   
   unsigned MBBEnd = LIs->getMBBEndIdx(BarrierMBB);
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index 804fae55e545..41a42fd22d3b 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -281,7 +281,8 @@ namespace {
     /// getFreePhysReg - return a free physical register for this virtual
     /// register interval if we have one, otherwise return 0.
     unsigned getFreePhysReg(LiveInterval* cur);
-    unsigned getFreePhysReg(const TargetRegisterClass *RC,
+    unsigned getFreePhysReg(LiveInterval* cur,
+                            const TargetRegisterClass *RC,
                             unsigned MaxInactiveCount,
                             SmallVector<unsigned, 256> &inactiveCounts,
                             bool SkipDGRegs);
@@ -352,11 +353,12 @@ void RALinScan::ComputeRelatedRegClasses() {
 /// different register classes or because the coalescer was overly
 /// conservative.
 unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
-  if ((cur.preference && cur.preference == Reg) || !cur.containsOneValue())
+  unsigned Preference = vrm_->getRegAllocPref(cur.reg);
+  if ((Preference && Preference == Reg) || !cur.containsOneValue())
     return Reg;
 
   VNInfo *vni = cur.begin()->valno;
-  if (!vni->def || vni->def == ~1U || vni->def == ~0U)
+  if (!vni->def || vni->isUnused() || !vni->isDefAccurate())
     return Reg;
   MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
   unsigned SrcReg, DstReg, SrcSubReg, DstSubReg, PhysReg;
@@ -584,7 +586,7 @@ void RALinScan::linearScan()
   // register allocator had to spill other registers in its register class.
   if (ls_->getNumIntervals() == 0)
     return;
-  if (!vrm_->FindUnusedRegisters(tri_, li_))
+  if (!vrm_->FindUnusedRegisters(li_))
     return;
 }
 
@@ -743,7 +745,7 @@ static void addStackInterval(LiveInterval *cur, LiveStacks *ls_,
   if (SI.hasAtLeastOneValue())
     VNI = SI.getValNumInfo(0);
   else
-    VNI = SI.getNextValue(~0U, 0, ls_->getVNInfoAllocator());
+    VNI = SI.getNextValue(0, 0, false, ls_->getVNInfoAllocator());
 
   LiveInterval &RI = li_->getInterval(cur->reg);
   // FIXME: This may be overly conservative.
@@ -897,7 +899,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
   // This is an implicitly defined live interval, just assign any register.
   const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
   if (cur->empty()) {
-    unsigned physReg = cur->preference;
+    unsigned physReg = vrm_->getRegAllocPref(cur->reg);
     if (!physReg)
       physReg = *RC->allocation_order_begin(*mf_);
     DOUT <<  tri_->getName(physReg) << '\n';
@@ -917,9 +919,9 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
   // register class, then we should try to assign it the same register.
   // This can happen when the move is from a larger register class to a smaller
   // one, e.g. X86::mov32to32_. These move instructions are not coalescable.
-  if (!cur->preference && cur->hasAtLeastOneValue()) {
+  if (!vrm_->getRegAllocPref(cur->reg) && cur->hasAtLeastOneValue()) {
     VNInfo *vni = cur->begin()->valno;
-    if (vni->def && vni->def != ~1U && vni->def != ~0U) {
+    if (vni->def && !vni->isUnused() && vni->isDefAccurate()) {
       MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
       unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
       if (CopyMI &&
@@ -935,7 +937,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
           if (DstSubReg)
             Reg = tri_->getMatchingSuperReg(Reg, DstSubReg, RC);
           if (Reg && allocatableRegs_[Reg] && RC->contains(Reg))
-            cur->preference = Reg;
+            mri_->setRegAllocationHint(cur->reg, 0, Reg);
         }
       }
     }
@@ -1044,7 +1046,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
     if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) {
       // "Downgrade" physReg to try to keep physReg from being allocated until
       // the next reload from the same SS is allocated. 
-      NextReloadLI->preference = physReg;
+      mri_->setRegAllocationHint(NextReloadLI->reg, 0, physReg);
       DowngradeRegister(cur, physReg);
     }
     return;
@@ -1071,7 +1073,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
 
   // Find a register to spill.
   float minWeight = HUGE_VALF;
-  unsigned minReg = 0; /*cur->preference*/;  // Try the pref register first.
+  unsigned minReg = 0;
 
   bool Found = false;
   std::vector<std::pair<unsigned,float> > RegsWeights;
@@ -1290,7 +1292,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
       // It interval has a preference, it must be defined by a copy. Clear the
       // preference now since the source interval allocation may have been
       // undone as well.
-      i->preference = 0;
+      mri_->setRegAllocationHint(i->reg, 0, 0);
     else {
       UpgradeRegister(ii->second);
     }
@@ -1346,15 +1348,23 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
   }
 }
 
-unsigned RALinScan::getFreePhysReg(const TargetRegisterClass *RC,
+unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
+                                   const TargetRegisterClass *RC,
                                    unsigned MaxInactiveCount,
                                    SmallVector<unsigned, 256> &inactiveCounts,
                                    bool SkipDGRegs) {
   unsigned FreeReg = 0;
   unsigned FreeRegInactiveCount = 0;
 
-  TargetRegisterClass::iterator I = RC->allocation_order_begin(*mf_);
-  TargetRegisterClass::iterator E = RC->allocation_order_end(*mf_);
+  std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(cur->reg);
+  // Resolve second part of the hint (if possible) given the current allocation.
+  unsigned physReg = Hint.second;
+  if (physReg &&
+      TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg))
+    physReg = vrm_->getPhys(physReg);
+
+  TargetRegisterClass::iterator I, E;
+  tie(I, E) = tri_->getAllocationOrder(RC, Hint.first, physReg, *mf_);
   assert(I != E && "No allocatable register in this register class!");
 
   // Scan for the first available register.
@@ -1377,7 +1387,7 @@ unsigned RALinScan::getFreePhysReg(const TargetRegisterClass *RC,
   // return this register.
   if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount)
     return FreeReg;
-  
+ 
   // Continue scanning the registers, looking for the one with the highest
   // inactive count.  Alkis found that this reduced register pressure very
   // slightly on X86 (in rev 1.94 of this file), though this should probably be
@@ -1428,20 +1438,21 @@ unsigned RALinScan::getFreePhysReg(LiveInterval *cur) {
 
   // If copy coalescer has assigned a "preferred" register, check if it's
   // available first.
-  if (cur->preference) {
-    DOUT << "(preferred: " << tri_->getName(cur->preference) << ") ";
-    if (isRegAvail(cur->preference) && 
-        RC->contains(cur->preference))
-      return cur->preference;
+  unsigned Preference = vrm_->getRegAllocPref(cur->reg);
+  if (Preference) {
+    DOUT << "(preferred: " << tri_->getName(Preference) << ") ";
+    if (isRegAvail(Preference) && 
+        RC->contains(Preference))
+      return Preference;
   }
 
   if (!DowngradedRegs.empty()) {
-    unsigned FreeReg = getFreePhysReg(RC, MaxInactiveCount, inactiveCounts,
+    unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts,
                                       true);
     if (FreeReg)
       return FreeReg;
   }
-  return getFreePhysReg(RC, MaxInactiveCount, inactiveCounts, false);
+  return getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, false);
 }
 
 FunctionPass* llvm::createLinearScanRegisterAllocator() {
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 61450a7cca7c..89e2c59fe805 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -651,7 +651,7 @@ void PBQPRegAlloc::addStackInterval(const LiveInterval *spilled,
   if (stackInterval.getNumValNums() != 0)
     vni = stackInterval.getValNumInfo(0);
   else
-    vni = stackInterval.getNextValue(-0U, 0, lss->getVNInfoAllocator());
+    vni = stackInterval.getNextValue(0, 0, false, lss->getVNInfoAllocator());
 
   LiveInterval &rhsInterval = lis->getInterval(spilled->reg);
   stackInterval.MergeRangesInAsValue(rhsInterval, vni);
@@ -733,8 +733,7 @@ void PBQPRegAlloc::finalizeAlloc() const {
          itr != end; ++itr) {
     LiveInterval *li = *itr;
 
-    unsigned physReg = li->preference;
-
+    unsigned physReg = vrm->getRegAllocPref(li->reg);
     if (physReg == 0) {
       const TargetRegisterClass *liRC = mri->getRegClass(li->reg);
       physReg = *liRC->allocation_order_begin(*mf);
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 4a7dbebe2de8..24fccf00eec4 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -47,7 +47,6 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/DebugLoc.h"
 #include "llvm/CodeGen/DwarfWriter.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Target/TargetData.h"
@@ -361,7 +360,7 @@ bool FastISel::SelectCall(User *I) {
           // Returned ID is 0 if this is unbalanced "end of inlined
           // scope". This could happen if optimizer eats dbg intrinsics
           // or "beginning of inlined scope" is not recoginized due to
-          // missing location info. In such cases, do ignore this region.end.
+          // missing location info. In such cases, ignore this region.end.
           BuildMI(MBB, DL, II).addImm(ID);
       } else {
         const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index f3c2833e0fe3..1bb80901ded0 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2768,6 +2768,53 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
                                    ISD::SETULT : ISD::SETUGT));
     break;
   }
+  case ISD::UMULO:
+  case ISD::SMULO: {
+    MVT VT = Node->getValueType(0);
+    SDValue LHS = Node->getOperand(0);
+    SDValue RHS = Node->getOperand(1);
+    SDValue BottomHalf;
+    SDValue TopHalf;
+    static unsigned Ops[2][3] =
+        { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
+          { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
+    bool isSigned = Node->getOpcode() == ISD::SMULO;
+    if (TLI.isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
+      BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+      TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
+    } else if (TLI.isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
+      BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
+                               RHS);
+      TopHalf = BottomHalf.getValue(1);
+    } else if (TLI.isTypeLegal(MVT::getIntegerVT(VT.getSizeInBits() * 2))) {
+      MVT WideVT = MVT::getIntegerVT(VT.getSizeInBits() * 2);
+      LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
+      RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
+      Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
+      BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
+                               DAG.getIntPtrConstant(0));
+      TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
+                            DAG.getIntPtrConstant(1));
+    } else {
+      // FIXME: We should be able to fall back to a libcall with an illegal
+      // type in some cases cases.
+      // Also, we can fall back to a division in some cases, but that's a big
+      // performance hit in the general case.
+      assert(0 && "Don't know how to expand this operation yet!");
+    }
+    if (isSigned) {
+      Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy());
+      Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1);
+      TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf, Tmp1,
+                             ISD::SETNE);
+    } else {
+      TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf,
+                             DAG.getConstant(0, VT), ISD::SETNE);
+    }
+    Results.push_back(BottomHalf);
+    Results.push_back(TopHalf);
+    break;
+  }
   case ISD::BUILD_PAIR: {
     MVT PairTy = Node->getValueType(0);
     Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0));
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index b7d7818d6e02..6e5adee84c34 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -95,14 +95,13 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
   if (InVT.isVector() && OutVT.isInteger()) {
     // Handle cases like i64 = BIT_CONVERT v1i64 on x86, where the operand
     // is legal but the result is not.
-    MVT NVT = MVT::getVectorVT(TLI.getTypeToTransformTo(OutVT), 2);
+    MVT NVT = MVT::getVectorVT(NOutVT, 2);
 
     if (isTypeLegal(NVT)) {
       SDValue CastInOp = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, InOp);
-      MVT EltNVT = NVT.getVectorElementType();
-      Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltNVT, CastInOp,
+      Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
                        DAG.getIntPtrConstant(0));
-      Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltNVT, CastInOp,
+      Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
                        DAG.getIntPtrConstant(1));
 
       if (TLI.isBigEndian())
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
index 93750d6b98c4..48ebd0ff0d4f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
@@ -5317,8 +5317,12 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
         if ((OpFlag & 7) == 2 /*REGDEF*/
             || (OpFlag & 7) == 6 /* EARLYCLOBBER REGDEF */) {
           // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
-          assert(!OpInfo.isIndirect &&
-                 "Don't know how to handle tied indirect register inputs yet!");
+          if (OpInfo.isIndirect) {
+            cerr << "llvm: error: "
+                    "Don't know how to handle tied indirect "
+                    "register inputs yet!\n";
+            exit(1);
+          }
           RegsForValue MatchedRegs;
           MatchedRegs.TLI = &TLI;
           MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index ab4cd515531c..a771d4695a52 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -171,6 +171,8 @@ static void InitLibcallNames(const char **Names) {
   Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2";
   Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2";
   Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2";
+  Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfi8";
+  Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfi16";
   Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi";
   Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi";
   Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti";
@@ -183,6 +185,8 @@ static void InitLibcallNames(const char **Names) {
   Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi";
   Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi";
   Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti";
+  Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfi8";
+  Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfi16";
   Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi";
   Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi";
   Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti";
@@ -271,6 +275,10 @@ RTLIB::Libcall RTLIB::getFPROUND(MVT OpVT, MVT RetVT) {
 /// UNKNOWN_LIBCALL if there is none.
 RTLIB::Libcall RTLIB::getFPTOSINT(MVT OpVT, MVT RetVT) {
   if (OpVT == MVT::f32) {
+    if (RetVT == MVT::i8)
+      return FPTOSINT_F32_I8;
+    if (RetVT == MVT::i16)
+      return FPTOSINT_F32_I16;
     if (RetVT == MVT::i32)
       return FPTOSINT_F32_I32;
     if (RetVT == MVT::i64)
@@ -306,6 +314,10 @@ RTLIB::Libcall RTLIB::getFPTOSINT(MVT OpVT, MVT RetVT) {
 /// UNKNOWN_LIBCALL if there is none.
 RTLIB::Libcall RTLIB::getFPTOUINT(MVT OpVT, MVT RetVT) {
   if (OpVT == MVT::f32) {
+    if (RetVT == MVT::i8)
+      return FPTOUINT_F32_I8;
+    if (RetVT == MVT::i16)
+      return FPTOUINT_F32_I16;
     if (RetVT == MVT::i32)
       return FPTOUINT_F32_I32;
     if (RetVT == MVT::i64)
@@ -2584,8 +2596,12 @@ bool TargetLowering::CheckTailCallReturnConstraints(CallSDNode *TheCall,
   // Check that operand of the RET node sources from the CALL node. The RET node
   // has at least two operands. Operand 0 holds the chain. Operand 1 holds the
   // value.
+  // Also we need to check that there is no code in between the call and the
+  // return. Hence we also check that the incomming chain to the return sources
+  // from the outgoing chain of the call.
   if (NumOps > 1 &&
-      IgnoreHarmlessInstructions(Ret.getOperand(1)) == SDValue(TheCall,0))
+      IgnoreHarmlessInstructions(Ret.getOperand(1)) == SDValue(TheCall,0) &&
+      Ret.getOperand(0) == SDValue(TheCall, TheCall->getNumValues()-1))
     return true;
   // void return: The RET node  has the chain result value of the CALL node as
   // input.
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index 2bc234f7d09e..20348055bd96 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -141,7 +141,7 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
   // The live interval of ECX is represented as this:
   // %reg20,inf = [46,47:1)[174,230:0)  0@174-(230) 1@46-(47)
   // The coalescer has no idea there was a def in the middle of [174,230].
-  if (AValNo->redefByEC)
+  if (AValNo->hasRedefByEC())
     return false;
   
   // If AValNo is defined as a copy from IntB, we can potentially process this.  
@@ -203,7 +203,8 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
     for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
       LiveInterval &SRLI = li_->getInterval(*SR);
       SRLI.addRange(LiveRange(FillerStart, FillerEnd,
-                 SRLI.getNextValue(FillerStart, 0, li_->getVNInfoAllocator())));
+                              SRLI.getNextValue(FillerStart, 0, true,
+                                                li_->getVNInfoAllocator())));
     }
   }
 
@@ -304,8 +305,10 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
   assert(ALR != IntA.end() && "Live range not found!");
   VNInfo *AValNo = ALR->valno;
   // If other defs can reach uses of this def, then it's not safe to perform
-  // the optimization.
-  if (AValNo->def == ~0U || AValNo->def == ~1U || AValNo->hasPHIKill)
+  // the optimization. FIXME: Do isPHIDef and isDefAccurate both need to be
+  // tested?
+  if (AValNo->isPHIDef() || !AValNo->isDefAccurate() ||
+      AValNo->isUnused() || AValNo->hasPHIKill())
     return false;
   MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def);
   const TargetInstrDesc &TID = DefMI->getDesc();
@@ -351,7 +354,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
   unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false);
   NewMI->getOperand(OpIdx).setIsKill();
 
-  bool BHasPHIKill = BValNo->hasPHIKill;
+  bool BHasPHIKill = BValNo->hasPHIKill();
   SmallVector<VNInfo*, 4> BDeadValNos;
   SmallVector<unsigned, 4> BKills;
   std::map<unsigned, unsigned> BExtend;
@@ -403,7 +406,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
       // extended to the end of the existing live range defined by the copy.
       unsigned DefIdx = li_->getDefIndex(UseIdx);
       const LiveRange *DLR = IntB.getLiveRangeContaining(DefIdx);
-      BHasPHIKill |= DLR->valno->hasPHIKill;
+      BHasPHIKill |= DLR->valno->hasPHIKill();
       assert(DLR->valno->def == DefIdx);
       BDeadValNos.push_back(DLR->valno);
       BExtend[DLR->start] = DLR->end;
@@ -462,7 +465,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
     }
   }
   IntB.addKills(ValNo, BKills);
-  ValNo->hasPHIKill = BHasPHIKill;
+  ValNo->setHasPHIKill(BHasPHIKill);
 
   DOUT << "   result = "; IntB.print(DOUT, tri_);
   DOUT << "\n";
@@ -578,8 +581,10 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
   assert(SrcLR != SrcInt.end() && "Live range not found!");
   VNInfo *ValNo = SrcLR->valno;
   // If other defs can reach uses of this def, then it's not safe to perform
-  // the optimization.
-  if (ValNo->def == ~0U || ValNo->def == ~1U || ValNo->hasPHIKill)
+  // the optimization. FIXME: Do isPHIDef and isDefAccurate both need to be
+  // tested?
+  if (ValNo->isPHIDef() || !ValNo->isDefAccurate() ||
+      ValNo->isUnused() || ValNo->hasPHIKill())
     return false;
   MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def);
   const TargetInstrDesc &TID = DefMI->getDesc();
@@ -616,19 +621,17 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
     }
 
   MachineBasicBlock::iterator MII = next(MachineBasicBlock::iterator(CopyMI));
-  CopyMI->removeFromParent();
   tii_->reMaterialize(*MBB, MII, DstReg, DefMI);
   MachineInstr *NewMI = prior(MII);
 
   if (checkForDeadDef) {
-      // PR4090 fix: Trim interval failed because there was no use of the
-      // source interval in this MBB. If the def is in this MBB too then we
-      // should mark it dead:
-      if (DefMI->getParent() == MBB) {
-        DefMI->addRegisterDead(SrcInt.reg, tri_);
-        SrcLR->end = SrcLR->start + 1;
-      }
- 
+    // PR4090 fix: Trim interval failed because there was no use of the
+    // source interval in this MBB. If the def is in this MBB too then we
+    // should mark it dead:
+    if (DefMI->getParent() == MBB) {
+      DefMI->addRegisterDead(SrcInt.reg, tri_);
+      SrcLR->end = SrcLR->start + 1;
+    }
   }
 
   // CopyMI may have implicit operands, transfer them over to the newly
@@ -647,7 +650,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
   }
 
   li_->ReplaceMachineInstrInMaps(CopyMI, NewMI);
-  MBB->getParent()->DeleteMachineInstr(CopyMI);
+  CopyMI->eraseFromParent();
   ReMatCopies.insert(CopyMI);
   ReMatDefs.insert(DefMI);
   ++NumReMats;
@@ -673,7 +676,7 @@ bool SimpleRegisterCoalescing::isBackEdgeCopy(MachineInstr *CopyMI,
     return false;
   unsigned KillIdx = li_->getMBBEndIdx(MBB) + 1;
   if (DstLR->valno->kills.size() == 1 &&
-      DstLR->valno->kills[0] == KillIdx && DstLR->valno->hasPHIKill)
+      DstLR->valno->kills[0] == KillIdx && DstLR->valno->hasPHIKill())
     return true;
   return false;
 }
@@ -937,7 +940,7 @@ bool SimpleRegisterCoalescing::CanCoalesceWithImpDef(MachineInstr *CopyMI,
   LiveInterval::iterator LR = li.FindLiveRangeContaining(CopyIdx);
   if (LR == li.end())
     return false;
-  if (LR->valno->hasPHIKill)
+  if (LR->valno->hasPHIKill())
     return false;
   if (LR->valno->def != CopyIdx)
     return false;
@@ -965,11 +968,11 @@ bool SimpleRegisterCoalescing::CanCoalesceWithImpDef(MachineInstr *CopyMI,
 }
 
 
-/// RemoveCopiesFromValNo - The specified value# is defined by an implicit
-/// def and it is being removed. Turn all copies from this value# into
-/// identity copies so they will be removed.
-void SimpleRegisterCoalescing::RemoveCopiesFromValNo(LiveInterval &li,
-                                                     VNInfo *VNI) {
+/// TurnCopiesFromValNoToImpDefs - The specified value# is defined by an
+/// implicit_def and it is being removed. Turn all copies from this value#
+/// into implicit_defs.
+void SimpleRegisterCoalescing::TurnCopiesFromValNoToImpDefs(LiveInterval &li,
+                                                            VNInfo *VNI) {
   SmallVector<MachineInstr*, 4> ImpDefs;
   MachineOperand *LastUse = NULL;
   unsigned LastUseIdx = li_->getUseIndex(VNI->def);
@@ -979,9 +982,8 @@ void SimpleRegisterCoalescing::RemoveCopiesFromValNo(LiveInterval &li,
     MachineInstr *MI = &*RI;
     ++RI;
     if (MO->isDef()) {
-      if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) {
+      if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF)
         ImpDefs.push_back(MI);
-      }
       continue;
     }
     if (JoinedCopies.count(MI))
@@ -994,13 +996,18 @@ void SimpleRegisterCoalescing::RemoveCopiesFromValNo(LiveInterval &li,
     unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
     if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
         SrcReg == li.reg) {
-      // Each use MI may have multiple uses of this register. Change them all.
-      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-        MachineOperand &MO = MI->getOperand(i);
-        if (MO.isReg() && MO.getReg() == li.reg)
-          MO.setReg(DstReg);
-      }
-      JoinedCopies.insert(MI);
+      // Change it to an implicit_def.
+      MI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF));
+      for (int i = MI->getNumOperands() - 1, e = 0; i > e; --i)
+        MI->RemoveOperand(i);
+      // It's no longer a copy, update the valno it defines.
+      unsigned DefIdx = li_->getDefIndex(UseIdx);
+      LiveInterval &DstInt = li_->getInterval(DstReg);
+      LiveInterval::iterator DLR = DstInt.FindLiveRangeContaining(DefIdx);
+      assert(DLR != DstInt.end() && "Live range not found!");
+      assert(DLR->valno->copy == MI);
+      DLR->valno->copy = NULL;
+      ReMatCopies.insert(MI);
     } else if (UseIdx > LastUseIdx) {
       LastUseIdx = UseIdx;
       LastUse = MO;
@@ -1265,6 +1272,17 @@ SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg,
   return true;
 }
 
+/// getRegAllocPreference - Return register allocation preference register.
+///
+static unsigned getRegAllocPreference(unsigned Reg, MachineFunction &MF,
+                                      MachineRegisterInfo *MRI,
+                                      const TargetRegisterInfo *TRI) {
+  if (TargetRegisterInfo::isPhysicalRegister(Reg))
+    return 0;
+  std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg);
+  return TRI->ResolveRegAllocHint(Hint.first, Hint.second, MF);
+}
+
 /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
 /// which are the src/dst of the copy instruction CopyMI.  This returns true
 /// if the copy was successfully coalesced away. If it is not currently
@@ -1566,7 +1584,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
       if (PhysJoinTweak) {
         if (SrcIsPhys) {
           if (!isWinToJoinVRWithSrcPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) {
-            DstInt.preference = SrcReg;
+            mri_->setRegAllocationHint(DstInt.reg, 0, SrcReg);
             ++numAborts;
             DOUT << "\tMay tie down a physical register, abort!\n";
             Again = true;  // May be possible to coalesce later.
@@ -1574,7 +1592,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
           }
         } else {
           if (!isWinToJoinVRWithDstPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) {
-            SrcInt.preference = DstReg;
+            mri_->setRegAllocationHint(SrcInt.reg, 0, DstReg);
             ++numAborts;
             DOUT << "\tMay tie down a physical register, abort!\n";
             Again = true;  // May be possible to coalesce later.
@@ -1598,7 +1616,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
         if (Length > Threshold &&
             (((float)std::distance(mri_->use_begin(JoinVReg),
                                    mri_->use_end()) / Length) < Ratio)) {
-          JoinVInt.preference = JoinPReg;
+          mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg);
           ++numAborts;
           DOUT << "\tMay tie down a physical register, abort!\n";
           Again = true;  // May be possible to coalesce later.
@@ -1669,9 +1687,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
              E = SavedLI->vni_end(); I != E; ++I) {
         const VNInfo *ValNo = *I;
         VNInfo *NewValNo = RealInt.getNextValue(ValNo->def, ValNo->copy,
+                                                false, // updated at *
                                                 li_->getVNInfoAllocator());
-        NewValNo->hasPHIKill = ValNo->hasPHIKill;
-        NewValNo->redefByEC = ValNo->redefByEC;
+        NewValNo->setFlags(ValNo->getFlags()); // * updated here.
         RealInt.addKills(NewValNo, ValNo->kills);
         RealInt.MergeValueInAsValue(*SavedLI, ValNo, NewValNo);
       }
@@ -1691,7 +1709,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
       !SrcIsPhys && !DstIsPhys) {
     if ((isExtSubReg && !Swapped) ||
         ((isInsSubReg || isSubRegToReg) && Swapped)) {
-      ResSrcInt->Copy(*ResDstInt, li_->getVNInfoAllocator());
+      ResSrcInt->Copy(*ResDstInt, mri_, li_->getVNInfoAllocator());
       std::swap(SrcReg, DstReg);
       std::swap(ResSrcInt, ResDstInt);
     }
@@ -1710,7 +1728,8 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     for (LiveInterval::const_vni_iterator i = ResSrcInt->vni_begin(),
            e = ResSrcInt->vni_end(); i != e; ++i) {
       const VNInfo *vni = *i;
-      if (!vni->def || vni->def == ~1U || vni->def == ~0U)
+      // FIXME: Do isPHIDef and isDefAccurate both need to be tested?
+      if (!vni->def || vni->isUnused() || vni->isPHIDef() || !vni->isDefAccurate())
         continue;
       MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
       unsigned NewSrcReg, NewDstReg, NewSrcSubIdx, NewDstSubIdx;
@@ -1747,6 +1766,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
   // being merged.
   li_->removeInterval(SrcReg);
 
+  // Update regalloc hint.
+  tri_->UpdateRegAllocHint(SrcReg, DstReg, *mf_);
+
   // Manually deleted the live interval copy.
   if (SavedLI) {
     SavedLI->clear();
@@ -1762,7 +1784,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     VNInfo *ImpVal = LR->valno;
     assert(ImpVal->def == CopyIdx);
     unsigned NextDef = LR->end;
-    RemoveCopiesFromValNo(*ResDstInt, ImpVal);
+    TurnCopiesFromValNoToImpDefs(*ResDstInt, ImpVal);
     ResDstInt->removeValNo(ImpVal);
     LR = ResDstInt->FindLiveRangeContaining(NextDef);
     if (LR != ResDstInt->end() && LR->valno->def == NextDef) {
@@ -1778,11 +1800,12 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
 
   // If resulting interval has a preference that no longer fits because of subreg
   // coalescing, just clear the preference.
-  if (ResDstInt->preference && (isExtSubReg || isInsSubReg || isSubRegToReg) &&
+  unsigned Preference = getRegAllocPreference(ResDstInt->reg, *mf_, mri_, tri_);
+  if (Preference && (isExtSubReg || isInsSubReg || isSubRegToReg) &&
       TargetRegisterInfo::isVirtualRegister(ResDstInt->reg)) {
     const TargetRegisterClass *RC = mri_->getRegClass(ResDstInt->reg);
-    if (!RC->contains(ResDstInt->preference))
-      ResDstInt->preference = 0;
+    if (!RC->contains(Preference))
+      mri_->setRegAllocationHint(ResDstInt->reg, 0, 0);
   }
 
   DOUT << "\n\t\tJoined.  Result = "; ResDstInt->print(DOUT, tri_);
@@ -1856,7 +1879,8 @@ bool SimpleRegisterCoalescing::RangeIsDefinedByCopyFromReg(LiveInterval &li,
   unsigned SrcReg = li_->getVNInfoSourceReg(LR->valno);
   if (SrcReg == Reg)
     return true;
-  if (LR->valno->def == ~0U &&
+  // FIXME: Do isPHIDef and isDefAccurate both need to be tested?
+  if ((LR->valno->isPHIDef() || !LR->valno->isDefAccurate()) &&
       TargetRegisterInfo::isPhysicalRegister(li.reg) &&
       *tri_->getSuperRegisters(li.reg)) {
     // It's a sub-register live interval, we may not have precise information.
@@ -2025,12 +2049,20 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){
   
   // Okay, the final step is to loop over the RHS live intervals, adding them to
   // the LHS.
-  LHSValNo->hasPHIKill |= VNI->hasPHIKill;
+  if (VNI->hasPHIKill())
+    LHSValNo->setHasPHIKill(true);
   LHS.addKills(LHSValNo, VNI->kills);
   LHS.MergeRangesInAsValue(RHS, LHSValNo);
   LHS.weight += RHS.weight;
-  if (RHS.preference && !LHS.preference)
-    LHS.preference = RHS.preference;
+
+  // Update regalloc hint if both are virtual registers.
+  if (TargetRegisterInfo::isVirtualRegister(LHS.reg) && 
+      TargetRegisterInfo::isVirtualRegister(RHS.reg)) {
+    std::pair<unsigned, unsigned> RHSPref = mri_->getRegAllocationHint(RHS.reg);
+    std::pair<unsigned, unsigned> LHSPref = mri_->getRegAllocationHint(LHS.reg);
+    if (RHSPref != LHSPref)
+      mri_->setRegAllocationHint(LHS.reg, RHSPref.first, RHSPref.second);
+  }
 
   // Update the liveintervals of sub-registers.
   if (TargetRegisterInfo::isPhysicalRegister(LHS.reg))
@@ -2185,7 +2217,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
     for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
          i != e; ++i) {
       VNInfo *VNI = *i;
-      if (VNI->def == ~1U || VNI->copy == 0)  // Src not defined by a copy?
+      if (VNI->isUnused() || VNI->copy == 0)  // Src not defined by a copy?
         continue;
       
       // DstReg is known to be a register in the LHS interval.  If the src is
@@ -2202,7 +2234,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
     for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
          i != e; ++i) {
       VNInfo *VNI = *i;
-      if (VNI->def == ~1U || VNI->copy == 0)  // Src not defined by a copy?
+      if (VNI->isUnused() || VNI->copy == 0)  // Src not defined by a copy?
         continue;
       
       // DstReg is known to be a register in the RHS interval.  If the src is
@@ -2222,7 +2254,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
          i != e; ++i) {
       VNInfo *VNI = *i;
       unsigned VN = VNI->id;
-      if (LHSValNoAssignments[VN] >= 0 || VNI->def == ~1U) 
+      if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused()) 
         continue;
       ComputeUltimateVN(VNI, NewVNInfo,
                         LHSValsDefinedFromRHS, RHSValsDefinedFromLHS,
@@ -2232,7 +2264,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
          i != e; ++i) {
       VNInfo *VNI = *i;
       unsigned VN = VNI->id;
-      if (RHSValNoAssignments[VN] >= 0 || VNI->def == ~1U)
+      if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused())
         continue;
       // If this value number isn't a copy from the LHS, it's a new number.
       if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) {
@@ -2296,7 +2328,8 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
     VNInfo *VNI = I->first;
     unsigned LHSValID = LHSValNoAssignments[VNI->id];
     LiveInterval::removeKill(NewVNInfo[LHSValID], VNI->def);
-    NewVNInfo[LHSValID]->hasPHIKill |= VNI->hasPHIKill;
+    if (VNI->hasPHIKill())
+      NewVNInfo[LHSValID]->setHasPHIKill(true);
     RHS.addKills(NewVNInfo[LHSValID], VNI->kills);
   }
 
@@ -2306,7 +2339,8 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
     VNInfo *VNI = I->first;
     unsigned RHSValID = RHSValNoAssignments[VNI->id];
     LiveInterval::removeKill(NewVNInfo[RHSValID], VNI->def);
-    NewVNInfo[RHSValID]->hasPHIKill |= VNI->hasPHIKill;
+    if (VNI->hasPHIKill())
+      NewVNInfo[RHSValID]->setHasPHIKill(true);
     LHS.addKills(NewVNInfo[RHSValID], VNI->kills);
   }
 
@@ -2315,10 +2349,12 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
   if ((RHS.ranges.size() > LHS.ranges.size() &&
       TargetRegisterInfo::isVirtualRegister(LHS.reg)) ||
       TargetRegisterInfo::isPhysicalRegister(RHS.reg)) {
-    RHS.join(LHS, &RHSValNoAssignments[0], &LHSValNoAssignments[0], NewVNInfo);
+    RHS.join(LHS, &RHSValNoAssignments[0], &LHSValNoAssignments[0], NewVNInfo,
+             mri_);
     Swapped = true;
   } else {
-    LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo);
+    LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo,
+             mri_);
     Swapped = false;
   }
   return true;
@@ -2620,6 +2656,11 @@ SimpleRegisterCoalescing::TurnCopyIntoImpDef(MachineBasicBlock::iterator &I,
     return false;
   LiveInterval &DstInt = li_->getInterval(DstReg);
   const LiveRange *DstLR = DstInt.getLiveRangeContaining(CopyIdx);
+  // If the valno extends beyond this basic block, then it's not safe to delete
+  // the val# or else livein information won't be correct.
+  MachineBasicBlock *EndMBB = li_->getMBBFromIndex(DstLR->end);
+  if (EndMBB != MBB)
+    return false;
   DstInt.removeValNo(DstLR->valno);
   CopyMI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF));
   for (int i = CopyMI->getNumOperands() - 1, e = 0; i > e; --i)
@@ -2800,7 +2841,8 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
       }
 
       // Slightly prefer live interval that has been assigned a preferred reg.
-      if (LI.preference)
+      std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(LI.reg);
+      if (Hint.first || Hint.second)
         LI.weight *= 1.01F;
 
       // Divide the weight of the interval by its size.  This encourages 
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h
index a495bfd644a5..d2c55810f60c 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.h
+++ b/lib/CodeGen/SimpleRegisterCoalescing.h
@@ -219,10 +219,10 @@ namespace llvm {
     bool CanCoalesceWithImpDef(MachineInstr *CopyMI,
                                LiveInterval &li, LiveInterval &ImpLi) const;
 
-    /// RemoveCopiesFromValNo - The specified value# is defined by an implicit
-    /// def and it is being removed. Turn all copies from this value# into
-    /// identity copies so they will be removed.
-    void RemoveCopiesFromValNo(LiveInterval &li, VNInfo *VNI);
+    /// TurnCopiesFromValNoToImpDefs - The specified value# is defined by an
+    /// implicit_def and it is being removed. Turn all copies from this value#
+    /// into implicit_defs.
+    void TurnCopiesFromValNoToImpDefs(LiveInterval &li, VNInfo *VNI);
 
     /// isWinToJoinVRWithSrcPhysReg - Return true if it's worth while to join a
     /// a virtual destination register with physical source register.
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index ce63121251e3..919a0ce160f4 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -39,7 +39,8 @@ protected:
   VirtRegMap *vrm;
   
   /// Construct a spiller base. 
-  SpillerBase(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls, VirtRegMap *vrm) :
+  SpillerBase(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls,
+              VirtRegMap *vrm) :
     mf(mf), lis(lis), ls(ls), vrm(vrm)
   {
     mfi = mf->getFrameInfo();
@@ -47,16 +48,24 @@ protected:
     tii = mf->getTarget().getInstrInfo();
   }
 
-  /// Insert a store of the given vreg to the given stack slot immediately
-  /// after the given instruction. Returns the base index of the inserted
-  /// instruction. The caller is responsible for adding an appropriate
-  /// LiveInterval to the LiveIntervals analysis.
-  unsigned insertStoreFor(MachineInstr *mi, unsigned ss,
-                          unsigned newVReg,
-                          const TargetRegisterClass *trc) {
-    MachineBasicBlock::iterator nextInstItr(mi); 
-    ++nextInstItr;
+  /// Ensures there is space before the given machine instruction, returns the
+  /// instruction's new number.
+  unsigned makeSpaceBefore(MachineInstr *mi) {
+    if (!lis->hasGapBeforeInstr(lis->getInstructionIndex(mi))) {
+      lis->scaleNumbering(2);
+      ls->scaleNumbering(2);
+    }
+
+    unsigned miIdx = lis->getInstructionIndex(mi);
 
+    assert(lis->hasGapBeforeInstr(miIdx));
+    
+    return miIdx;
+  }
+
+  /// Ensure there is space after the given machine instruction, returns the
+  /// instruction's new number.
+  unsigned makeSpaceAfter(MachineInstr *mi) {
     if (!lis->hasGapAfterInstr(lis->getInstructionIndex(mi))) {
       lis->scaleNumbering(2);
       ls->scaleNumbering(2);
@@ -66,7 +75,24 @@ protected:
 
     assert(lis->hasGapAfterInstr(miIdx));
 
-    tii->storeRegToStackSlot(*mi->getParent(), nextInstItr, newVReg,
+    return miIdx;
+  }  
+
+
+  /// Insert a store of the given vreg to the given stack slot immediately
+  /// after the given instruction. Returns the base index of the inserted
+  /// instruction. The caller is responsible for adding an appropriate
+  /// LiveInterval to the LiveIntervals analysis.
+  unsigned insertStoreFor(MachineInstr *mi, unsigned ss,
+                          unsigned vreg,
+                          const TargetRegisterClass *trc) {
+
+    MachineBasicBlock::iterator nextInstItr(mi); 
+    ++nextInstItr;
+
+    unsigned miIdx = makeSpaceAfter(mi);
+
+    tii->storeRegToStackSlot(*mi->getParent(), nextInstItr, vreg,
                              true, ss, trc);
     MachineBasicBlock::iterator storeInstItr(mi);
     ++storeInstItr;
@@ -81,25 +107,35 @@ protected:
     return storeInstIdx;
   }
 
+  void insertStoreOnInterval(LiveInterval *li,
+                             MachineInstr *mi, unsigned ss,
+                             unsigned vreg,
+                             const TargetRegisterClass *trc) {
+
+    unsigned storeInstIdx = insertStoreFor(mi, ss, vreg, trc);
+    unsigned start = lis->getDefIndex(lis->getInstructionIndex(mi)),
+             end = lis->getUseIndex(storeInstIdx);
+
+    VNInfo *vni =
+      li->getNextValue(storeInstIdx, 0, true, lis->getVNInfoAllocator());
+    vni->kills.push_back(storeInstIdx);
+    LiveRange lr(start, end, vni);
+      
+    li->addRange(lr);
+  }
+
   /// Insert a load of the given veg from the given stack slot immediately
   /// before the given instruction. Returns the base index of the inserted
   /// instruction. The caller is responsible for adding an appropriate
   /// LiveInterval to the LiveIntervals analysis.
   unsigned insertLoadFor(MachineInstr *mi, unsigned ss,
-                         unsigned newVReg,
+                         unsigned vreg,
                          const TargetRegisterClass *trc) {
     MachineBasicBlock::iterator useInstItr(mi);
-
-    if (!lis->hasGapBeforeInstr(lis->getInstructionIndex(mi))) {
-      lis->scaleNumbering(2);
-      ls->scaleNumbering(2);
-    }
-
-    unsigned miIdx = lis->getInstructionIndex(mi);
-
-    assert(lis->hasGapBeforeInstr(miIdx));
-    
-    tii->loadRegFromStackSlot(*mi->getParent(), useInstItr, newVReg, ss, trc);
+  
+    unsigned miIdx = makeSpaceBefore(mi);
+  
+    tii->loadRegFromStackSlot(*mi->getParent(), useInstItr, vreg, ss, trc);
     MachineBasicBlock::iterator loadInstItr(mi);
     --loadInstItr;
     MachineInstr *loadInst = &*loadInstItr;
@@ -113,6 +149,24 @@ protected:
     return loadInstIdx;
   }
 
+  void insertLoadOnInterval(LiveInterval *li,
+                            MachineInstr *mi, unsigned ss, 
+                            unsigned vreg,
+                            const TargetRegisterClass *trc) {
+
+    unsigned loadInstIdx = insertLoadFor(mi, ss, vreg, trc);
+    unsigned start = lis->getDefIndex(loadInstIdx),
+             end = lis->getUseIndex(lis->getInstructionIndex(mi));
+
+    VNInfo *vni =
+      li->getNextValue(loadInstIdx, 0, true, lis->getVNInfoAllocator());
+    vni->kills.push_back(lis->getInstructionIndex(mi));
+    LiveRange lr(start, end, vni);
+
+    li->addRange(lr);
+  }
+
+
 
   /// Add spill ranges for every use/def of the live interval, inserting loads
   /// immediately before each use, and stores after each def. No folding is
@@ -173,35 +227,16 @@ protected:
       assert(hasUse || hasDef);
 
       if (hasUse) {
-        unsigned loadInstIdx = insertLoadFor(mi, ss, newVReg, trc);
-        unsigned start = lis->getDefIndex(loadInstIdx),
-                 end = lis->getUseIndex(lis->getInstructionIndex(mi));
-
-        VNInfo *vni =
-          newLI->getNextValue(loadInstIdx, 0, lis->getVNInfoAllocator());
-        vni->kills.push_back(lis->getInstructionIndex(mi));
-        LiveRange lr(start, end, vni);
-
-        newLI->addRange(lr);
+        insertLoadOnInterval(newLI, mi, ss, newVReg, trc);
       }
 
       if (hasDef) {
-        unsigned storeInstIdx = insertStoreFor(mi, ss, newVReg, trc);
-        unsigned start = lis->getDefIndex(lis->getInstructionIndex(mi)),
-                 end = lis->getUseIndex(storeInstIdx);
-
-        VNInfo *vni =
-          newLI->getNextValue(storeInstIdx, 0, lis->getVNInfoAllocator());
-        vni->kills.push_back(storeInstIdx);
-        LiveRange lr(start, end, vni);
-      
-        newLI->addRange(lr);
+        insertStoreOnInterval(newLI, mi, ss, newVReg, trc);
       }
 
       added.push_back(newLI);
     }
 
-
     return added;
   }
 
@@ -212,13 +247,44 @@ protected:
 /// folding.
 class TrivialSpiller : public SpillerBase {
 public:
-  TrivialSpiller(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls, VirtRegMap *vrm) :
+
+  TrivialSpiller(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls,
+                 VirtRegMap *vrm) :
     SpillerBase(mf, lis, ls, vrm) {}
 
   std::vector<LiveInterval*> spill(LiveInterval *li) {
     return trivialSpillEverywhere(li);
   }
 
+  std::vector<LiveInterval*> intraBlockSplit(LiveInterval *li, VNInfo *valno)  {
+    std::vector<LiveInterval*> spillIntervals;
+    MachineBasicBlock::iterator storeInsertPoint;
+
+    if (valno->isDefAccurate()) {
+      // If we have an accurate def we can just grab an iterator to the instr
+      // after the def.
+      storeInsertPoint =
+        next(MachineBasicBlock::iterator(lis->getInstructionFromIndex(valno->def)));
+    } else {
+      // If the def info isn't accurate we check if this is a PHI def.
+      // If it is then def holds the index of the defining Basic Block, and we
+      // can use that to get an insertion point.
+      if (valno->isPHIDef()) {
+
+      } else {
+        // We have no usable def info. We can't split this value sensibly.
+        // FIXME: Need sensible feedback for "failure to split", an empty
+        // set of spill intervals could be reasonably returned from a
+        // split where both the store and load are folded.
+        return spillIntervals;
+      }
+    }
+
+        
+
+    return spillIntervals;
+  }
+
 };
 
 }
diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h
index cad054d744c4..9c3900df0b57 100644
--- a/lib/CodeGen/Spiller.h
+++ b/lib/CodeGen/Spiller.h
@@ -13,11 +13,14 @@
 #include <vector>
 
 namespace llvm {
+
   class LiveInterval;
   class LiveIntervals;
   class LiveStacks;
   class MachineFunction;
+  class MachineInstr;
   class VirtRegMap;
+  class VNInfo;
 
   /// Spiller interface.
   ///
@@ -26,7 +29,15 @@ namespace llvm {
   class Spiller {
   public:
     virtual ~Spiller() = 0;
+
+    /// Spill the given live range. The method used will depend on the Spiller
+    /// implementation selected.
     virtual std::vector<LiveInterval*> spill(LiveInterval *li) = 0;
+
+    /// Intra-block split.
+    virtual std::vector<LiveInterval*> intraBlockSplit(LiveInterval *li,
+                                                       VNInfo *valno) = 0;
+
   };
 
   /// Create and return a spiller object, as specified on the command line.
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
index a2c12554f377..ca9952863b7c 100644
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -827,7 +827,7 @@ void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN,
         // Add a live range for the new vreg
         LiveInterval& Int = LI.getInterval(I->getOperand(i).getReg());
         VNInfo* FirstVN = *Int.vni_begin();
-        FirstVN->hasPHIKill = false;
+        FirstVN->setHasPHIKill(false);
         if (I->getOperand(i).isKill())
           FirstVN->kills.push_back(
                          LiveIntervals::getUseIndex(LI.getInstructionIndex(I)));
@@ -886,10 +886,7 @@ bool StrongPHIElimination::mergeLiveIntervals(unsigned primary,
     VNInfo* OldVN = R.valno;
     VNInfo*& NewVN = VNMap[OldVN];
     if (!NewVN) {
-      NewVN = LHS.getNextValue(OldVN->def,
-                               OldVN->copy,
-                               LI.getVNInfoAllocator());
-      NewVN->kills = OldVN->kills;
+      NewVN = LHS.createValueCopy(OldVN, LI.getVNInfoAllocator());
     }
     
     LiveRange LR (R.start, R.end, NewVN);
@@ -987,7 +984,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
       LiveInterval& Int = LI.getOrCreateInterval(I->first);
       const LiveRange* LR =
                        Int.getLiveRangeContaining(LI.getMBBEndIdx(SI->second));
-      LR->valno->hasPHIKill = true;
+      LR->valno->setHasPHIKill(true);
       
       I->second.erase(SI->first);
     }
@@ -1037,7 +1034,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
       // now has an unknown def.
       unsigned idx = LI.getDefIndex(LI.getInstructionIndex(PInstr));
       const LiveRange* PLR = PI.getLiveRangeContaining(idx);
-      PLR->valno->def = ~0U;
+      PLR->valno->setIsPHIDef(true);
       LiveRange R (LI.getMBBStartIdx(PInstr->getParent()),
                    PLR->start, PLR->valno);
       PI.addRange(R);
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 29637b954f0b..4d3417fdff51 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -51,6 +51,7 @@ static RegisterPass<VirtRegMap>
 X("virtregmap", "Virtual Register Map");
 
 bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
+  MRI = &mf.getRegInfo();
   TII = mf.getTarget().getInstrInfo();
   TRI = mf.getTarget().getRegisterInfo();
   MF = &mf;
@@ -98,6 +99,18 @@ void VirtRegMap::grow() {
   ImplicitDefed.resize(LastVirtReg-TargetRegisterInfo::FirstVirtualRegister+1);
 }
 
+unsigned VirtRegMap::getRegAllocPref(unsigned virtReg) {
+  std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(virtReg);
+  unsigned physReg = Hint.second;
+  if (physReg &&
+      TargetRegisterInfo::isVirtualRegister(physReg) && hasPhys(physReg))
+    physReg = getPhys(physReg);
+  if (Hint.first == 0)
+    return (physReg && TargetRegisterInfo::isPhysicalRegister(physReg))
+      ? physReg : 0;
+  return TRI->ResolveRegAllocHint(Hint.first, physReg, *MF);
+}
+
 int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) {
   assert(TargetRegisterInfo::isVirtualRegister(virtReg));
   assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
@@ -213,8 +226,7 @@ void VirtRegMap::RemoveMachineInstrFromMaps(MachineInstr *MI) {
 
 /// FindUnusedRegisters - Gather a list of allocatable registers that
 /// have not been allocated to any virtual register.
-bool VirtRegMap::FindUnusedRegisters(const TargetRegisterInfo *TRI,
-                                     LiveIntervals* LIs) {
+bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) {
   unsigned NumRegs = TRI->getNumRegs();
   UnusedRegs.reset();
   UnusedRegs.resize(NumRegs);
diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h
index 507557d24c08..fe767b7671e1 100644
--- a/lib/CodeGen/VirtRegMap.h
+++ b/lib/CodeGen/VirtRegMap.h
@@ -31,6 +31,7 @@ namespace llvm {
   class LiveIntervals;
   class MachineInstr;
   class MachineFunction;
+  class MachineRegisterInfo;
   class TargetInstrInfo;
   class TargetRegisterInfo;
 
@@ -47,6 +48,7 @@ namespace llvm {
                           std::pair<unsigned, ModRef> > MI2VirtMapTy;
 
   private:
+    MachineRegisterInfo *MRI;
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
     MachineFunction *MF;
@@ -190,6 +192,9 @@ namespace llvm {
       grow();
     }
 
+    /// @brief returns the register allocation preference.
+    unsigned getRegAllocPref(unsigned virtReg);
+
     /// @brief records virtReg is a split live interval from SReg.
     void setIsSplitFromReg(unsigned virtReg, unsigned SReg) {
       Virt2SplitMap[virtReg] = SReg;
@@ -445,8 +450,7 @@ namespace llvm {
 
     /// FindUnusedRegisters - Gather a list of allocatable registers that
     /// have not been allocated to any virtual register.
-    bool FindUnusedRegisters(const TargetRegisterInfo *TRI,
-                             LiveIntervals* LIs);
+    bool FindUnusedRegisters(LiveIntervals* LIs);
 
     /// HasUnusedRegisters - Return true if there are any allocatable registers
     /// that have not been allocated to any virtual register.
diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index 83397a586d53..401a22647e1d 100644
--- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -118,7 +118,7 @@ int LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT,
                           char **OutError) {
   std::string Error;
   if (ExecutionEngine *JIT =
-      ExecutionEngine::createJIT(unwrap(MP), &Error, 0,
+      ExecutionEngine::create(unwrap(MP), false, &Error,
                                  (CodeGenOpt::Level)OptLevel)) {
     *OutJIT = wrap(JIT);
     return 0;
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index 7c8ce706b610..e7a76cc3f236 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -19,6 +19,7 @@ add_llvm_library(LLVMSupport
   PrettyStackTrace.cpp
   SlowOperationInformer.cpp
   SmallPtrSet.cpp
+  SourceMgr.cpp
   Statistic.cpp
   Streams.cpp
   StringExtras.cpp
diff --git a/lib/Support/ManagedStatic.cpp b/lib/Support/ManagedStatic.cpp
index 6de65752b3d6..4e655a0f9eec 100644
--- a/lib/Support/ManagedStatic.cpp
+++ b/lib/Support/ManagedStatic.cpp
@@ -14,18 +14,15 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Config/config.h"
 #include "llvm/System/Atomic.h"
-#include "llvm/System/Mutex.h"
 #include <cassert>
 using namespace llvm;
 
 static const ManagedStaticBase *StaticList = 0;
 
-static sys::Mutex* ManagedStaticMutex = 0;
-
 void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(),
                                               void (*Deleter)(void*)) const {
-  if (ManagedStaticMutex) {
-    ManagedStaticMutex->acquire();
+  if (llvm_is_multithreaded()) {
+    llvm_acquire_global_lock();
 
     if (Ptr == 0) {
       void* tmp = Creator ? Creator() : 0;
@@ -39,7 +36,7 @@ void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(),
       StaticList = this;
     }
 
-    ManagedStaticMutex->release();
+    llvm_release_global_lock();
   } else {
     assert(Ptr == 0 && DeleterFn == 0 && Next == 0 &&
            "Partially initialized ManagedStatic!?");
@@ -68,24 +65,11 @@ void ManagedStaticBase::destroy() const {
   DeleterFn = 0;
 }
 
-bool llvm::llvm_start_multithreaded() {
-#if LLVM_MULTITHREADED
-  assert(ManagedStaticMutex == 0 && "Multithreaded LLVM already initialized!");
-  ManagedStaticMutex = new sys::Mutex(true);
-  return true;
-#else
-  return false;
-#endif
-}
-
 /// llvm_shutdown - Deallocate and destroy all ManagedStatic variables.
 void llvm::llvm_shutdown() {
   while (StaticList)
     StaticList->destroy();
 
-  if (ManagedStaticMutex) {
-    delete ManagedStaticMutex;
-    ManagedStaticMutex = 0;
-  }
+  if (llvm_is_multithreaded()) llvm_stop_multithreaded();
 }
 
diff --git a/utils/TableGen/TGSourceMgr.cpp b/lib/Support/SourceMgr.cpp
index 42bc75246c9f..d789f1010be2 100644
--- a/utils/TableGen/TGSourceMgr.cpp
+++ b/lib/Support/SourceMgr.cpp
@@ -1,4 +1,4 @@
-//===- TGSourceMgr.cpp - Manager for Source Buffers & Diagnostics ---------===//
+//===- SourceMgr.cpp - Manager for Simple Source Buffers & Diagnostics ----===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,25 +7,47 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the TGSourceMgr class.
+// This file implements the SourceMgr class.  This class is used as a simple
+// substrate for diagnostics, #include handling, and other low level things for
+// simple parsers.
 //
 //===----------------------------------------------------------------------===//
 
-#include "TGSourceMgr.h"
+#include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-TGSourceMgr::~TGSourceMgr() {
+SourceMgr::~SourceMgr() {
   while (!Buffers.empty()) {
     delete Buffers.back().Buffer;
     Buffers.pop_back();
   }
 }
 
+/// AddIncludeFile - Search for a file with the specified name in the current
+/// directory or in one of the IncludeDirs.  If no file is found, this returns
+/// ~0, otherwise it returns the buffer ID of the stacked file.
+unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
+                                   SMLoc IncludeLoc) {
+  
+  MemoryBuffer *NewBuf = MemoryBuffer::getFile(Filename.c_str());
+
+  // If the file didn't exist directly, see if it's in an include path.
+  for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBuf; ++i) {
+    std::string IncFile = IncludeDirectories[i] + "/" + Filename;
+    NewBuf = MemoryBuffer::getFile(IncFile.c_str());
+  }
+ 
+  if (NewBuf == 0) return ~0U;
+
+  return AddNewSourceBuffer(NewBuf, IncludeLoc);
+}
+
+
 /// FindBufferContainingLoc - Return the ID of the buffer containing the
 /// specified location, returning -1 if not found.
-int TGSourceMgr::FindBufferContainingLoc(TGLoc Loc) const {
+int SourceMgr::FindBufferContainingLoc(SMLoc Loc) const {
   for (unsigned i = 0, e = Buffers.size(); i != e; ++i)
     if (Loc.getPointer() >= Buffers[i].Buffer->getBufferStart() &&
         // Use <= here so that a pointer to the null at the end of the buffer
@@ -37,7 +59,7 @@ int TGSourceMgr::FindBufferContainingLoc(TGLoc Loc) const {
 
 /// FindLineNumber - Find the line number for the specified location in the
 /// specified file.  This is not a fast method.
-unsigned TGSourceMgr::FindLineNumber(TGLoc Loc, int BufferID) const {
+unsigned SourceMgr::FindLineNumber(SMLoc Loc, int BufferID) const {
   if (BufferID == -1) BufferID = FindBufferContainingLoc(Loc);
   assert(BufferID != -1 && "Invalid Location!");
   
@@ -49,13 +71,13 @@ unsigned TGSourceMgr::FindLineNumber(TGLoc Loc, int BufferID) const {
   
   const char *Ptr = Buff->getBufferStart();
 
-  for (; TGLoc::getFromPointer(Ptr) != Loc; ++Ptr)
+  for (; SMLoc::getFromPointer(Ptr) != Loc; ++Ptr)
     if (*Ptr == '\n') ++LineNo;
   return LineNo;
 }
 
-void TGSourceMgr::PrintIncludeStack(TGLoc IncludeLoc) const {
-  if (IncludeLoc == TGLoc()) return;  // Top of stack.
+void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc) const {
+  if (IncludeLoc == SMLoc()) return;  // Top of stack.
   
   int CurBuf = FindBufferContainingLoc(IncludeLoc);
   assert(CurBuf != -1 && "Invalid or unspecified location!");
@@ -68,12 +90,12 @@ void TGSourceMgr::PrintIncludeStack(TGLoc IncludeLoc) const {
 }
 
 
-void TGSourceMgr::PrintError(TGLoc ErrorLoc, const std::string &Msg) const {
+void SourceMgr::PrintMessage(SMLoc Loc, const std::string &Msg) const {
   raw_ostream &OS = errs();
   
   // First thing to do: find the current buffer containing the specified
   // location.
-  int CurBuf = FindBufferContainingLoc(ErrorLoc);
+  int CurBuf = FindBufferContainingLoc(Loc);
   assert(CurBuf != -1 && "Invalid or unspecified location!");
   
   PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc);
@@ -82,24 +104,24 @@ void TGSourceMgr::PrintError(TGLoc ErrorLoc, const std::string &Msg) const {
   
   
   OS << "Parsing " << CurMB->getBufferIdentifier() << ":"
-     << FindLineNumber(ErrorLoc, CurBuf) << ": ";
+     << FindLineNumber(Loc, CurBuf) << ": ";
   
   OS << Msg << "\n";
   
   // Scan backward to find the start of the line.
-  const char *LineStart = ErrorLoc.getPointer();
+  const char *LineStart = Loc.getPointer();
   while (LineStart != CurMB->getBufferStart() && 
          LineStart[-1] != '\n' && LineStart[-1] != '\r')
     --LineStart;
   // Get the end of the line.
-  const char *LineEnd = ErrorLoc.getPointer();
+  const char *LineEnd = Loc.getPointer();
   while (LineEnd != CurMB->getBufferEnd() && 
          LineEnd[0] != '\n' && LineEnd[0] != '\r')
     ++LineEnd;
   // Print out the line.
   OS << std::string(LineStart, LineEnd) << "\n";
   // Print out spaces before the caret.
-  for (const char *Pos = LineStart; Pos != ErrorLoc.getPointer(); ++Pos)
+  for (const char *Pos = LineStart; Pos != Loc.getPointer(); ++Pos)
     OS << (*Pos == '\t' ? '\t' : ' ');
   OS << "^\n";
 }
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index e8cf69d81ff1..dd5c3d61c27b 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -43,6 +43,7 @@ const char *Triple::getOSTypeName(OSType Kind) {
   switch (Kind) {
   case UnknownOS: return "unknown";
 
+  case AuroraUX: return "auroraux";
   case Darwin: return "darwin";
   case DragonFly: return "dragonfly";
   case FreeBSD: return "freebsd";
@@ -79,7 +80,9 @@ void Triple::Parse() const {
     Vendor = UnknownVendor;
 
   std::string OSName = getOSName();
-  if (memcmp(&OSName[0], "darwin", 6) == 0)
+  if (memcmp(&OSName[0], "auroraux", 8) == 0)
+    OS = AuroraUX;
+  else if (memcmp(&OSName[0], "darwin", 6) == 0)
     OS = Darwin;
   else if (memcmp(&OSName[0], "dragonfly", 9) == 0)
     OS = DragonFly;
diff --git a/lib/System/Atomic.cpp b/lib/System/Atomic.cpp
index 2827d8896594..416f981df827 100644
--- a/lib/System/Atomic.cpp
+++ b/lib/System/Atomic.cpp
@@ -51,3 +51,31 @@ sys::cas_flag sys::CompareAndSwap(volatile sys::cas_flag* ptr,
 #  error No compare-and-swap implementation for your platform!
 #endif
 }
+
+sys::cas_flag sys::AtomicIncrement(volatile sys::cas_flag* ptr) {
+#if LLVM_MULTITHREADED==0
+  ++(*ptr);
+  return *ptr;
+#elif defined(__GNUC__)
+  return __sync_add_and_fetch(ptr, 1);
+#elif defined(_MSC_VER)
+  return InterlockedIncrement(ptr);
+#else
+#  error No atomic increment implementation for your platform!
+#endif
+}
+
+sys::cas_flag sys::AtomicDecrement(volatile sys::cas_flag* ptr) {
+#if LLVM_MULTITHREADED==0
+  --(*ptr);
+  return *ptr;
+#elif defined(__GNUC__)
+  return __sync_sub_and_fetch(ptr, 1);
+#elif defined(_MSC_VER)
+  return InterlockedDecrement(ptr);
+#else
+#  error No atomic decrement implementation for your platform!
+#endif
+}
+
+
diff --git a/lib/System/CMakeLists.txt b/lib/System/CMakeLists.txt
index 5415dd6e300c..a5a56e832dc2 100644
--- a/lib/System/CMakeLists.txt
+++ b/lib/System/CMakeLists.txt
@@ -10,7 +10,9 @@ add_llvm_library(LLVMSystem
   Path.cpp
   Process.cpp
   Program.cpp
+  RWMutex.cpp
   Signals.cpp
+  Threading.cpp
   TimeValue.cpp
   )
 
diff --git a/lib/System/Mutex.cpp b/lib/System/Mutex.cpp
index d95c25b39eef..a5e9920ae3f0 100644
--- a/lib/System/Mutex.cpp
+++ b/lib/System/Mutex.cpp
@@ -23,11 +23,11 @@
 // Define all methods as no-ops if threading is explicitly disabled
 namespace llvm {
 using namespace sys;
-Mutex::Mutex( bool recursive) { }
-Mutex::~Mutex() { }
-bool Mutex::acquire() { return true; }
-bool Mutex::release() { return true; }
-bool Mutex::tryacquire() { return true; }
+MutexImpl::MutexImpl( bool recursive) { }
+MutexImpl::~MutexImpl() { }
+bool MutexImpl::acquire() { return true; }
+bool MutexImpl::release() { return true; }
+bool MutexImpl::tryacquire() { return true; }
 }
 #else
 
@@ -55,7 +55,7 @@ using namespace sys;
 static const bool pthread_enabled = true;
 
 // Construct a Mutex using pthread calls
-Mutex::Mutex( bool recursive)
+MutexImpl::MutexImpl( bool recursive)
   : data_(0)
 {
   if (pthread_enabled)
@@ -94,7 +94,7 @@ Mutex::Mutex( bool recursive)
 }
 
 // Destruct a Mutex
-Mutex::~Mutex()
+MutexImpl::~MutexImpl()
 {
   if (pthread_enabled)
   {
@@ -106,7 +106,7 @@ Mutex::~Mutex()
 }
 
 bool
-Mutex::acquire()
+MutexImpl::acquire()
 {
   if (pthread_enabled)
   {
@@ -120,7 +120,7 @@ Mutex::acquire()
 }
 
 bool
-Mutex::release()
+MutexImpl::release()
 {
   if (pthread_enabled)
   {
@@ -134,7 +134,7 @@ Mutex::release()
 }
 
 bool
-Mutex::tryacquire()
+MutexImpl::tryacquire()
 {
   if (pthread_enabled)
   {
diff --git a/lib/System/RWMutex.cpp b/lib/System/RWMutex.cpp
new file mode 100644
index 000000000000..15d98cb8f418
--- /dev/null
+++ b/lib/System/RWMutex.cpp
@@ -0,0 +1,175 @@
+//===- RWMutex.cpp - Reader/Writer Mutual Exclusion Lock --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the llvm::sys::RWMutex class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/System/RWMutex.h"
+#include <cstring>
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//===          independent code.
+//===----------------------------------------------------------------------===//
+
+#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0
+// Define all methods as no-ops if threading is explicitly disabled
+namespace llvm {
+using namespace sys;
+RWMutexImpl::RWMutexImpl() { }
+RWMutexImpl::~RWMutexImpl() { }
+bool RWMutexImpl::reader_acquire() { return true; }
+bool RWMutexImpl::reader_release() { return true; }
+bool RWMutexImpl::writer_acquire() { return true; }
+bool RWMutexImpl::writer_release() { return true; }
+}
+#else
+
+#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_RWLOCK_INIT)
+
+#include <cassert>
+#include <pthread.h>
+#include <stdlib.h>
+
+namespace llvm {
+using namespace sys;
+
+
+// This variable is useful for situations where the pthread library has been
+// compiled with weak linkage for its interface symbols. This allows the
+// threading support to be turned off by simply not linking against -lpthread.
+// In that situation, the value of pthread_mutex_init will be 0 and
+// consequently pthread_enabled will be false. In such situations, all the
+// pthread operations become no-ops and the functions all return false. If
+// pthread_rwlock_init does have an address, then rwlock support is enabled.
+// Note: all LLVM tools will link against -lpthread if its available since it
+//       is configured into the LIBS variable.
+// Note: this line of code generates a warning if pthread_rwlock_init is not
+//       declared with weak linkage. It's safe to ignore the warning.
+static const bool pthread_enabled = true;
+
+// Construct a RWMutex using pthread calls
+RWMutexImpl::RWMutexImpl()
+  : data_(0)
+{
+  if (pthread_enabled)
+  {
+    // Declare the pthread_rwlock data structures
+    pthread_rwlock_t* rwlock =
+      static_cast<pthread_rwlock_t*>(malloc(sizeof(pthread_rwlock_t)));
+
+#ifdef __APPLE__
+    // Workaround a bug/mis-feature in Darwin's pthread_rwlock_init.
+    bzero(rwlock, sizeof(pthread_rwlock_t));
+#endif
+
+    pthread_rwlockattr_t attr;
+
+    // Initialize the rwlock attributes
+    int errorcode = pthread_rwlockattr_init(&attr);
+    assert(errorcode == 0);
+
+#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && !defined(__DragonFly__)
+    // Make it a process local rwlock
+    errorcode = pthread_rwlockattr_setpshared(&attr, PTHREAD_PROCESS_PRIVATE);
+#endif
+
+    // Initialize the rwlock
+    errorcode = pthread_rwlock_init(rwlock, &attr);
+    assert(errorcode == 0);
+
+    // Destroy the attributes
+    errorcode = pthread_rwlockattr_destroy(&attr);
+    assert(errorcode == 0);
+
+    // Assign the data member
+    data_ = rwlock;
+  }
+}
+
+// Destruct a RWMutex
+RWMutexImpl::~RWMutexImpl()
+{
+  if (pthread_enabled)
+  {
+    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+    assert(rwlock != 0);
+    pthread_rwlock_destroy(rwlock);
+    free(rwlock);
+  }
+}
+
+bool
+RWMutexImpl::reader_acquire()
+{
+  if (pthread_enabled)
+  {
+    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+    assert(rwlock != 0);
+
+    int errorcode = pthread_rwlock_rdlock(rwlock);
+    return errorcode == 0;
+  }
+  return false;
+}
+
+bool
+RWMutexImpl::reader_release()
+{
+  if (pthread_enabled)
+  {
+    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+    assert(rwlock != 0);
+
+    int errorcode = pthread_rwlock_unlock(rwlock);
+    return errorcode == 0;
+  }
+  return false;
+}
+
+bool
+RWMutexImpl::writer_acquire()
+{
+  if (pthread_enabled)
+  {
+    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+    assert(rwlock != 0);
+
+    int errorcode = pthread_rwlock_wrlock(rwlock);
+    return errorcode == 0;
+  }
+  return false;
+}
+
+bool
+RWMutexImpl::writer_release()
+{
+  if (pthread_enabled)
+  {
+    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+    assert(rwlock != 0);
+
+    int errorcode = pthread_rwlock_unlock(rwlock);
+    return errorcode == 0;
+  }
+  return false;
+}
+
+}
+
+#elif defined(LLVM_ON_UNIX)
+#include "Unix/RWMutex.inc"
+#elif defined( LLVM_ON_WIN32)
+#include "Win32/RWMutex.inc"
+#else
+#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in System/Mutex.cpp
+#endif
+#endif
diff --git a/lib/System/Threading.cpp b/lib/System/Threading.cpp
new file mode 100644
index 000000000000..a2d7f82715d9
--- /dev/null
+++ b/lib/System/Threading.cpp
@@ -0,0 +1,63 @@
+//===-- llvm/System/Threading.cpp- Control multithreading mode --*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements llvm_start_multithreaded() and friends.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/System/Threading.h"
+#include "llvm/System/Atomic.h"
+#include "llvm/System/Mutex.h"
+#include <cassert>
+
+using namespace llvm;
+
+static bool multithreaded_mode = false;
+
+static sys::Mutex* global_lock = 0;
+
+bool llvm::llvm_start_multithreaded() {
+#ifdef LLVM_MULTITHREADED
+  assert(!multithreaded_mode && "Already multithreaded!");
+  multithreaded_mode = true;
+  global_lock = new sys::Mutex(true);
+  
+  // We fence here to ensure that all initialization is complete BEFORE we
+  // return from llvm_start_multithreaded().
+  sys::MemoryFence();
+  return true;
+#else
+  return false;
+#endif
+}
+
+void llvm::llvm_stop_multithreaded() {
+#ifdef LLVM_MULTITHREADED
+  assert(multithreaded_mode && "Not currently multithreaded!");
+  
+  // We fence here to insure that all threaded operations are complete BEFORE we
+  // return from llvm_stop_multithreaded().
+  sys::MemoryFence();
+  
+  multithreaded_mode = false;
+  delete global_lock;
+#endif
+}
+
+bool llvm::llvm_is_multithreaded() {
+  return multithreaded_mode;
+}
+
+void llvm::llvm_acquire_global_lock() {
+  if (multithreaded_mode) global_lock->acquire();
+}
+
+void llvm::llvm_release_global_lock() {
+  if (multithreaded_mode) global_lock->release();
+}
diff --git a/lib/System/Unix/Mutex.inc b/lib/System/Unix/Mutex.inc
index 4a015a676fc7..10e7ecb75a5f 100644
--- a/lib/System/Unix/Mutex.inc
+++ b/lib/System/Unix/Mutex.inc
@@ -20,28 +20,28 @@ namespace llvm
 {
 using namespace sys;
 
-Mutex::Mutex( bool recursive)
+MutexImpl::MutexImpl( bool recursive)
 {
 }
 
-Mutex::~Mutex()
+MutexImpl::~MutexImpl()
 {
 }
 
 bool 
-Mutex::acquire()
+MutexImpl::MutexImpl()
 {
   return true;
 }
 
 bool 
-Mutex::release()
+MutexImpl::release()
 {
   return true;
 }
 
 bool 
-Mutex::tryacquire( void )
+MutexImpl::tryacquire( void )
 {
   return true;
 }
diff --git a/lib/System/Unix/Path.inc b/lib/System/Unix/Path.inc
index d5edee1b03b3..1f73571cf140 100644
--- a/lib/System/Unix/Path.inc
+++ b/lib/System/Unix/Path.inc
@@ -104,6 +104,14 @@ Path::isValid() const {
 }
 
 bool
+Path::isAbsolute(const char *NameStart, unsigned NameLen) {
+  assert(NameStart);
+  if (NameLen == 0)
+    return false;
+  return NameStart[0] == '/';
+}
+
+bool
 Path::isAbsolute() const {
   if (path.empty())
     return false;
diff --git a/lib/System/Unix/RWMutex.inc b/lib/System/Unix/RWMutex.inc
new file mode 100644
index 000000000000..e83d41ef4cfe
--- /dev/null
+++ b/lib/System/Unix/RWMutex.inc
@@ -0,0 +1,43 @@
+//= llvm/System/Unix/RWMutex.inc - Unix Reader/Writer Mutual Exclusion Lock  =//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific (non-pthread) RWMutex class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+using namespace sys;
+
+RWMutexImpl::RWMutexImpl() { }
+
+RWMutexImpl::~RWMutexImpl() { }
+
+bool RWMutexImpl::reader_acquire() {
+  return true;
+}
+
+bool RWMutexImpl::reader_release() {
+  return true;
+}
+
+bool RWMutexImpl::writer_acquire() {
+  return true;
+}
+
+bool RWMutexImpl::writer_release() {
+  return true;
+}
+
+}
diff --git a/lib/System/Unix/Unix.h b/lib/System/Unix/Unix.h
index 452226f4f79a..c2c06dd114e5 100644
--- a/lib/System/Unix/Unix.h
+++ b/lib/System/Unix/Unix.h
@@ -79,12 +79,19 @@ static inline bool MakeErrMsg(
     return true;
   char buffer[MAXPATHLEN];
   buffer[0] = 0;
+  char* str = buffer;
   if (errnum == -1)
     errnum = errno;
 #ifdef HAVE_STRERROR_R
   // strerror_r is thread-safe.
   if (errnum)
+# if defined(__GLIBC__) && defined(_GNU_SOURCE)
+    // glibc defines its own incompatible version of strerror_r
+    // which may not use the buffer supplied.
+    str = strerror_r(errnum,buffer,MAXPATHLEN-1);
+# else
     strerror_r(errnum,buffer,MAXPATHLEN-1);
+# endif
 #elif HAVE_STRERROR
   // Copy the thread un-safe result of strerror into
   // the buffer as fast as possible to minimize impact
@@ -97,7 +104,7 @@ static inline bool MakeErrMsg(
   // but, oh well, just use a generic message
   sprintf(buffer, "Error #%d", errnum);
 #endif
-  *ErrMsg = prefix + ": " + buffer;
+  *ErrMsg = prefix + ": " + str;
   return true;
 }
 
diff --git a/lib/System/Win32/Mutex.inc b/lib/System/Win32/Mutex.inc
index 7c1723be73fc..75f01fefacbb 100644
--- a/lib/System/Win32/Mutex.inc
+++ b/lib/System/Win32/Mutex.inc
@@ -22,13 +22,13 @@
 namespace llvm {
 using namespace sys;
 
-Mutex::Mutex(bool /*recursive*/)
+MutexImpl::MutexImpl(bool /*recursive*/)
 {
   data_ = new CRITICAL_SECTION;
   InitializeCriticalSection((LPCRITICAL_SECTION)data_);
 }
 
-Mutex::~Mutex()
+MutexImpl::~MutexImpl()
 {
   DeleteCriticalSection((LPCRITICAL_SECTION)data_);
   delete (LPCRITICAL_SECTION)data_;
@@ -36,21 +36,21 @@ Mutex::~Mutex()
 }
 
 bool 
-Mutex::acquire()
+MutexImpl::acquire()
 {
   EnterCriticalSection((LPCRITICAL_SECTION)data_);
   return true;
 }
 
 bool 
-Mutex::release()
+MutexImpl::release()
 {
   LeaveCriticalSection((LPCRITICAL_SECTION)data_);
   return true;
 }
 
 bool 
-Mutex::tryacquire()
+MutexImpl::tryacquire()
 {
   return TryEnterCriticalSection((LPCRITICAL_SECTION)data_);
 }
diff --git a/lib/System/Win32/Path.inc b/lib/System/Win32/Path.inc
index fbf8f6688a57..683c94bba44e 100644
--- a/lib/System/Win32/Path.inc
+++ b/lib/System/Win32/Path.inc
@@ -125,6 +125,20 @@ Path::isValid() const {
   return true;
 }
 
+bool
+Path::isAbsolute(const char *NameStart, unsigned NameLen) {
+  assert(NameStart);
+  switch (NameLen) {
+  case 0:
+    return false;
+  case 1:
+  case 2:
+    return NameStart[0] == '/';
+  default:
+    return NameStart[0] == '/' || (NameStart[1] == ':' && NameStart[2] == '/');
+  }
+}
+
 bool 
 Path::isAbsolute() const {
   switch (path.length()) {
@@ -234,7 +248,9 @@ Path::GetCurrentDirectory() {
 /// GetMainExecutable - Return the path to the main executable, given the
 /// value of argv[0] from program startup.
 Path Path::GetMainExecutable(const char *argv0, void *MainAddr) {
-  return Path();
+  char pathname[MAX_PATH];
+  DWORD ret = ::GetModuleFileNameA(NULL, pathname, MAX_PATH);
+  return ret != MAX_PATH ? Path(pathname) : Path();
 }
 
 
diff --git a/lib/System/Win32/RWMutex.inc b/lib/System/Win32/RWMutex.inc
new file mode 100644
index 000000000000..e2692269e3a0
--- /dev/null
+++ b/lib/System/Win32/RWMutex.inc
@@ -0,0 +1,58 @@
+//= llvm/System/Win32/Mutex.inc - Win32 Reader/Writer Mutual Exclusion Lock  =//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Win32 specific (non-pthread) RWMutex class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//===          is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+#include "Win32.h"
+
+// FIXME: Windows does not have reader-writer locks pre-Vista.  If you want
+// real reader-writer locks, you a pthreads implementation for Windows.
+
+namespace llvm {
+using namespace sys;
+
+RWMutexImpl::RWMutexImpl() {
+  data_ = calloc(1, sizeof(CRITICAL_SECTION));
+  InitializeCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+}
+
+RWMutexImpl::~RWMutexImpl() {
+  DeleteCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+  free(data_);
+}
+
+bool RWMutexImpl::reader_acquire() {
+  EnterCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+  return true;
+}
+
+bool RWMutexImpl::reader_release() {
+  LeaveCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+  return true;
+}
+
+bool RWMutexImpl::writer_acquire() {
+  EnterCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+  return true;
+}
+
+bool RWMutexImpl::writer_release() {
+  LeaveCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+  return true;
+}
+
+
+}
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 594811d6357f..9001e5033c7d 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -45,62 +45,72 @@ def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2",
 // ARM Processors supported.
 //
 
-class Proc<string Name, list<SubtargetFeature> Features>
- : Processor<Name, NoItineraries, Features>;
+include "ARMSchedule.td"
+
+class ProcNoItin<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, GenericItineraries, Features>;
 
 // V4 Processors.
-def : Proc<"generic",         []>;
-def : Proc<"arm8",            []>;
-def : Proc<"arm810",          []>;
-def : Proc<"strongarm",       []>;
-def : Proc<"strongarm110",    []>;
-def : Proc<"strongarm1100",   []>;
-def : Proc<"strongarm1110",   []>;
+def : ProcNoItin<"generic",         []>;
+def : ProcNoItin<"arm8",            []>;
+def : ProcNoItin<"arm810",          []>;
+def : ProcNoItin<"strongarm",       []>;
+def : ProcNoItin<"strongarm110",    []>;
+def : ProcNoItin<"strongarm1100",   []>;
+def : ProcNoItin<"strongarm1110",   []>;
 
 // V4T Processors.
-def : Proc<"arm7tdmi",        [ArchV4T]>;
-def : Proc<"arm7tdmi-s",      [ArchV4T]>;
-def : Proc<"arm710t",         [ArchV4T]>;
-def : Proc<"arm720t",         [ArchV4T]>;
-def : Proc<"arm9",            [ArchV4T]>;
-def : Proc<"arm9tdmi",        [ArchV4T]>;
-def : Proc<"arm920",          [ArchV4T]>;
-def : Proc<"arm920t",         [ArchV4T]>;
-def : Proc<"arm922t",         [ArchV4T]>;
-def : Proc<"arm940t",         [ArchV4T]>;
-def : Proc<"ep9312",          [ArchV4T]>;
+def : ProcNoItin<"arm7tdmi",        [ArchV4T]>;
+def : ProcNoItin<"arm7tdmi-s",      [ArchV4T]>;
+def : ProcNoItin<"arm710t",         [ArchV4T]>;
+def : ProcNoItin<"arm720t",         [ArchV4T]>;
+def : ProcNoItin<"arm9",            [ArchV4T]>;
+def : ProcNoItin<"arm9tdmi",        [ArchV4T]>;
+def : ProcNoItin<"arm920",          [ArchV4T]>;
+def : ProcNoItin<"arm920t",         [ArchV4T]>;
+def : ProcNoItin<"arm922t",         [ArchV4T]>;
+def : ProcNoItin<"arm940t",         [ArchV4T]>;
+def : ProcNoItin<"ep9312",          [ArchV4T]>;
 
 // V5T Processors.
-def : Proc<"arm10tdmi",       [ArchV5T]>;
-def : Proc<"arm1020t",        [ArchV5T]>;
+def : ProcNoItin<"arm10tdmi",       [ArchV5T]>;
+def : ProcNoItin<"arm1020t",        [ArchV5T]>;
 
 // V5TE Processors.
-def : Proc<"arm9e",           [ArchV5TE]>;
-def : Proc<"arm926ej-s",      [ArchV5TE]>;
-def : Proc<"arm946e-s",       [ArchV5TE]>;
-def : Proc<"arm966e-s",       [ArchV5TE]>;
-def : Proc<"arm968e-s",       [ArchV5TE]>;
-def : Proc<"arm10e",          [ArchV5TE]>;
-def : Proc<"arm1020e",        [ArchV5TE]>;
-def : Proc<"arm1022e",        [ArchV5TE]>;
-def : Proc<"xscale",          [ArchV5TE]>;
-def : Proc<"iwmmxt",          [ArchV5TE]>;
+def : ProcNoItin<"arm9e",           [ArchV5TE]>;
+def : ProcNoItin<"arm926ej-s",      [ArchV5TE]>;
+def : ProcNoItin<"arm946e-s",       [ArchV5TE]>;
+def : ProcNoItin<"arm966e-s",       [ArchV5TE]>;
+def : ProcNoItin<"arm968e-s",       [ArchV5TE]>;
+def : ProcNoItin<"arm10e",          [ArchV5TE]>;
+def : ProcNoItin<"arm1020e",        [ArchV5TE]>;
+def : ProcNoItin<"arm1022e",        [ArchV5TE]>;
+def : ProcNoItin<"xscale",          [ArchV5TE]>;
+def : ProcNoItin<"iwmmxt",          [ArchV5TE]>;
 
 // V6 Processors.
-def : Proc<"arm1136j-s",      [ArchV6]>;
-def : Proc<"arm1136jf-s",     [ArchV6, FeatureVFP2]>;
-def : Proc<"arm1176jz-s",     [ArchV6]>;
-def : Proc<"arm1176jzf-s",    [ArchV6, FeatureVFP2]>;
-def : Proc<"mpcorenovfp",     [ArchV6]>;
-def : Proc<"mpcore",          [ArchV6, FeatureVFP2]>;
+def : Processor<"arm1136j-s",       V6Itineraries,
+                [ArchV6]>;
+def : Processor<"arm1136jf-s",      V6Itineraries,
+                [ArchV6, FeatureVFP2]>;
+def : Processor<"arm1176jz-s",      V6Itineraries,
+                [ArchV6]>;
+def : Processor<"arm1176jzf-s",     V6Itineraries,
+                [ArchV6, FeatureVFP2]>;
+def : Processor<"mpcorenovfp",      V6Itineraries,
+                [ArchV6]>;
+def : Processor<"mpcore",           V6Itineraries,
+                [ArchV6, FeatureVFP2]>;
 
 // V6T2 Processors.
-def : Proc<"arm1156t2-s",     [ArchV6T2, FeatureThumb2]>;
-def : Proc<"arm1156t2f-s",    [ArchV6T2, FeatureThumb2, FeatureVFP2]>;
+def : Processor<"arm1156t2-s",     V6Itineraries,
+                [ArchV6T2, FeatureThumb2]>;
+def : Processor<"arm1156t2f-s",    V6Itineraries,
+                [ArchV6T2, FeatureThumb2, FeatureVFP2]>;
 
 // V7 Processors.
-def : Proc<"cortex-a8",       [ArchV7A, FeatureThumb2, FeatureNEON]>;
-def : Proc<"cortex-a9",       [ArchV7A, FeatureThumb2, FeatureNEON]>;
+def : ProcNoItin<"cortex-a8",       [ArchV7A, FeatureThumb2, FeatureNEON]>;
+def : ProcNoItin<"cortex-a9",       [ArchV7A, FeatureThumb2, FeatureNEON]>;
 
 //===----------------------------------------------------------------------===//
 // Register File Description
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index f126760cd5e9..47151e667c40 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -17,11 +17,6 @@ class CCIfSubtarget<string F, CCAction A>:
 class CCIfAlign<string Align, CCAction A>:
   CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
 
-/// CCIfFloatABI - Match of the float ABI and the arg. ABIType may be "Hard" or
-///                "Soft".
-class CCIfFloatABI<string ABIType, CCAction A>:
-  CCIf<!strconcat("llvm::FloatABIType == llvm::FloatABI::", ABIType), A>;
-
 //===----------------------------------------------------------------------===//
 // ARM APCS Calling Convention
 //===----------------------------------------------------------------------===//
@@ -105,25 +100,3 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[
                                  S9, S10, S11, S12, S13, S14, S15]>>,
   CCDelegateTo<RetCC_ARM_AAPCS_Common>
 ]>;
-
-//===----------------------------------------------------------------------===//
-// ARM Calling Convention Dispatch
-//===----------------------------------------------------------------------===//
-
-def CC_ARM : CallingConv<[
-  CCIfSubtarget<"isAAPCS_ABI()",
-                CCIfSubtarget<"hasVFP2()",
-                              CCIfFloatABI<"Hard",
-                                           CCDelegateTo<CC_ARM_AAPCS_VFP>>>>,
-  CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo<CC_ARM_AAPCS>>,
-  CCDelegateTo<CC_ARM_APCS>
-]>;
-
-def RetCC_ARM : CallingConv<[
-  CCIfSubtarget<"isAAPCS_ABI()",
-                CCIfSubtarget<"hasVFP2()",
-                              CCIfFloatABI<"Hard",
-                                           CCDelegateTo<RetCC_ARM_AAPCS_VFP>>>>,
-  CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo<RetCC_ARM_AAPCS>>,
-  CCDelegateTo<RetCC_ARM_APCS>
-]>;
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 44fac12019b3..f6629fe3c875 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -900,6 +900,10 @@ void Emitter<CodeEmitter>::emitMiscLoadStoreInstruction(const MachineInstr &MI,
   // Set first operand
   Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
 
+  // Skip LDRD and STRD's second operand.
+  if (TID.Opcode == ARM::LDRD || TID.Opcode == ARM::STRD)
+    ++OpIdx;
+
   // Set second operand
   if (ImplicitRn)
     // Special handling for implicit use (e.g. PC).
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index ca3a9cb40329..1ed9e8080a20 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -52,8 +52,13 @@ public:
 
   virtual const char *getPassName() const {
     return "ARM Instruction Selection";
-  } 
-  
+  }
+
+ /// getI32Imm - Return a target constant with the specified value, of type i32.
+  inline SDValue getI32Imm(unsigned Imm) {
+    return CurDAG->getTargetConstant(Imm, MVT::i32);
+  }
+
   SDNode *Select(SDValue Op);
   virtual void InstructionSelect();
   bool SelectAddrMode2(SDValue Op, SDValue N, SDValue &Base,
@@ -84,6 +89,9 @@ public:
   bool SelectThumbAddrModeSP(SDValue Op, SDValue N, SDValue &Base,
                              SDValue &OffImm);
 
+  bool SelectShifterOperand(SDValue Op, SDValue N,
+                            SDValue &BaseReg, SDValue &Opc);
+
   bool SelectShifterOperandReg(SDValue Op, SDValue N, SDValue &A,
                                SDValue &B, SDValue &C);
   
@@ -509,8 +517,30 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue Op, SDValue N,
   return false;
 }
 
+bool ARMDAGToDAGISel::SelectShifterOperand(SDValue Op,
+                                           SDValue N,
+                                           SDValue &BaseReg,
+                                           SDValue &Opc) {
+  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+
+  // Don't match base register only case. That is matched to a separate
+  // lower complexity pattern with explicit register operand.
+  if (ShOpcVal == ARM_AM::no_shift) return false;
+
+  BaseReg = N.getOperand(0);
+  unsigned ShImmVal = 0;
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)))
+    ShImmVal = RHS->getZExtValue() & 31;
+  else
+    return false;
+
+  Opc = getI32Imm(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal));
+
+  return true;
+}
+
 bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue Op,
-                                              SDValue N, 
+                                              SDValue N,
                                               SDValue &BaseReg,
                                               SDValue &ShReg,
                                               SDValue &Opc) {
@@ -549,6 +579,10 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
   switch (N->getOpcode()) {
   default: break;
   case ISD::Constant: {
+    // ARMv6T2 and later should materialize imms via MOV / MOVT pair.
+    if (Subtarget->hasV6T2Ops() || Subtarget->hasThumb2())
+      break;
+
     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
     bool UseCP = true;
     if (Subtarget->isThumb())
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index ec8bd1f8b31a..2443625d4bdb 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -292,6 +292,25 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setIfCvtBlockSizeLimit(Subtarget->isThumb() ? 0 : 10);
   setIfCvtDupBlockSizeLimit(Subtarget->isThumb() ? 0 : 2);
 
+  if (!Subtarget->isThumb()) {
+    // Use branch latency information to determine if-conversion limits.
+    // FIXME: If-converter should use instruction latency of the branch being
+    // eliminated to compute the threshold. For ARMv6, the branch "latency"
+    // varies depending on whether it's dynamically or statically predicted
+    // and on whether the destination is in the prefetch buffer.
+    const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+    const InstrItineraryData &InstrItins = Subtarget->getInstrItineraryData();
+    unsigned Latency= InstrItins.getLatency(TII->get(ARM::Bcc).getSchedClass());
+    if (Latency > 1) {
+      setIfCvtBlockSizeLimit(Latency-1);
+      if (Latency > 2)
+        setIfCvtDupBlockSizeLimit(Latency-2);
+    } else {
+      setIfCvtBlockSizeLimit(10);
+      setIfCvtDupBlockSizeLimit(2);
+    }
+  }
+
   maxStoresPerMemcpy = 1;   //// temporary - rewrite interface to use type
   // Do not enable CodePlacementOpt for now: it currently runs after the
   // ARMConstantIslandPass and messes up branch relaxation and placement
@@ -415,7 +434,7 @@ static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
                                         ARM::NoRegister };
 
   unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 4);
-  if (Reg == 0) 
+  if (Reg == 0)
     return false; // we didn't handle it
 
   unsigned i;
@@ -487,6 +506,33 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
                                    State);
 }
 
+/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
+/// given CallingConvention value.
+CCAssignFn *ARMTargetLowering::CCAssignFnForNode(unsigned CC,
+                                                 bool Return) const {
+  switch (CC) {
+  default:
+   assert(0 && "Unsupported calling convention");
+  case CallingConv::C:
+  case CallingConv::Fast:
+   // Use target triple & subtarget features to do actual dispatch.
+   if (Subtarget->isAAPCS_ABI()) {
+     if (Subtarget->hasVFP2() &&
+         FloatABIType == FloatABI::Hard)
+       return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+     else
+       return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
+   } else
+     return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+  case CallingConv::ARM_AAPCS_VFP:
+   return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+  case CallingConv::ARM_AAPCS:
+   return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
+  case CallingConv::ARM_APCS:
+   return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+  }
+}
+
 /// LowerCallResult - Lower the result values of an ISD::CALL into the
 /// appropriate copies out of appropriate physical registers.  This assumes that
 /// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
@@ -501,7 +547,8 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
   SmallVector<CCValAssign, 16> RVLocs;
   bool isVarArg = TheCall->isVarArg();
   CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
-  CCInfo.AnalyzeCallResult(TheCall, RetCC_ARM);
+  CCInfo.AnalyzeCallResult(TheCall,
+                           CCAssignFnForNode(CallingConv, /* Return*/ true));
 
   SmallVector<SDValue, 8> ResultVals;
 
@@ -586,8 +633,6 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
   MVT RetVT           = TheCall->getRetValType(0);
   SDValue Chain       = TheCall->getChain();
   unsigned CC         = TheCall->getCallingConv();
-  assert((CC == CallingConv::C ||
-          CC == CallingConv::Fast) && "unknown calling convention");
   bool isVarArg       = TheCall->isVarArg();
   SDValue Callee      = TheCall->getCallee();
   DebugLoc dl         = TheCall->getDebugLoc();
@@ -595,7 +640,7 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
-  CCInfo.AnalyzeCallOperands(TheCall, CC_ARM);
+  CCInfo.AnalyzeCallOperands(TheCall, CCAssignFnForNode(CC, /* Return*/ false));
 
   // Get a count of how many bytes are to be pushed on the stack.
   unsigned NumBytes = CCInfo.getNextStackOffset();
@@ -788,7 +833,7 @@ SDValue ARMTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
   CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs);
 
   // Analyze return values of ISD::RET.
-  CCInfo.AnalyzeReturn(Op.getNode(), RetCC_ARM);
+  CCInfo.AnalyzeReturn(Op.getNode(), CCAssignFnForNode(CC, /* Return */ true));
 
   // If this is the first return lowered for this function, add
   // the regs to the liveout set for the function.
@@ -1085,7 +1130,8 @@ ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
-  CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_ARM);
+  CCInfo.AnalyzeFormalArguments(Op.getNode(),
+                                CCAssignFnForNode(CC, /* Return*/ false));
 
   SmallVector<SDValue, 16> ArgValues;
 
@@ -1456,7 +1502,7 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
   MVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();  // FIXME probably not meaningful
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
-  unsigned FrameReg = (Subtarget->isThumb() || Subtarget->useThumbBacktraces())
+  unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin())
     ? ARM::R7 : ARM::R11;
   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
   while (Depth--)
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 2dab2dbba663..8f53e396eaaf 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -151,6 +151,7 @@ namespace llvm {
     ///
     unsigned ARMPCLabelIndex;
 
+    CCAssignFn *CCAssignFnForNode(unsigned CC, bool Return) const;
     SDValue LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG,
                              const SDValue &StackPtr, const CCValAssign &VA,
                              SDValue Chain, SDValue Arg, ISD::ArgFlagsTy Flags);
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 4b0dbb5dacdb..d19fb8eea87f 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -697,7 +697,6 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
 
   bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
   MachineInstr *PopMI = MF.CreateMachineInstr(get(ARM::tPOP),MI->getDebugLoc());
-  MBB.insert(MI, PopMI);
   for (unsigned i = CSI.size(); i != 0; --i) {
     unsigned Reg = CSI[i-1].getReg();
     if (Reg == ARM::LR) {
@@ -706,10 +705,15 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
         continue;
       Reg = ARM::PC;
       PopMI->setDesc(get(ARM::tPOP_RET));
-      MBB.erase(MI);
+      MI = MBB.erase(MI);
     }
     PopMI->addOperand(MachineOperand::CreateReg(Reg, true));
   }
+
+  // It's illegal to emit pop instruction without operands.
+  if (PopMI->getNumOperands() > 0)
+    MBB.insert(MI, PopMI);
+
   return true;
 }
 
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index cc9f1a5759d0..4707e3b7a97f 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -90,12 +90,12 @@ def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp>;
 //===----------------------------------------------------------------------===//
 // ARM Instruction Predicate Definitions.
 //
-def HasV5T   : Predicate<"Subtarget->hasV5TOps()">;
-def HasV5TE  : Predicate<"Subtarget->hasV5TEOps()">;
-def HasV6    : Predicate<"Subtarget->hasV6Ops()">;
-def IsThumb  : Predicate<"Subtarget->isThumb()">;
-def IsThumb2 : Predicate<"Subtarget->isThumb2()">;
-def IsARM    : Predicate<"!Subtarget->isThumb()">;
+def HasV5T    : Predicate<"Subtarget->hasV5TOps()">;
+def HasV5TE   : Predicate<"Subtarget->hasV5TEOps()">;
+def HasV6     : Predicate<"Subtarget->hasV6Ops()">;
+def IsThumb   : Predicate<"Subtarget->isThumb()">;
+def HasThumb2 : Predicate<"Subtarget->hasThumb2()">;
+def IsARM     : Predicate<"!Subtarget->isThumb()">;
 
 //===----------------------------------------------------------------------===//
 // ARM Flag Definitions.
@@ -539,7 +539,7 @@ let isReturn = 1, isTerminator = 1 in
                     LdStMulFrm, "ldm${p}${addr:submode} $addr, $dst1",
                     []>;
 
-let isCall = 1,
+let isCall = 1, Itinerary = IIC_Br,
   Defs = [R0, R1, R2, R3, R12, LR,
           D0, D1, D2, D3, D4, D5, D6, D7, CPSR] in {
   def BL  : ABXI<0b1011, (outs), (ins i32imm:$func, variable_ops),
@@ -567,7 +567,7 @@ let isCall = 1,
   }
 }
 
-let isBranch = 1, isTerminator = 1 in {
+let isBranch = 1, isTerminator = 1, Itinerary = IIC_Br in {
   // B is "predicable" since it can be xformed into a Bcc.
   let isBarrier = 1 in {
     let isPredicable = 1 in
@@ -647,9 +647,8 @@ def LDRSB : AI3ldsb<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
 
 let mayLoad = 1 in {
 // Load doubleword
-def LDRD  : AI3ldd<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
-                 "ldr", "d $dst, $addr",
-                []>, Requires<[IsARM, HasV5T]>;
+def LDRD : AI3ldd<(outs GPR:$dst1, GPR:$dst2), (ins addrmode3:$addr), LdMiscFrm,
+                "ldr", "d $dst1, $addr", []>, Requires<[IsARM, HasV5T]>;
 
 // Indexed loads
 def LDR_PRE  : AI2ldwpr<(outs GPR:$dst, GPR:$base_wb),
@@ -709,9 +708,8 @@ def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm,
 
 // Store doubleword
 let mayStore = 1 in
-def STRD : AI3std<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm,
-               "str", "d $src, $addr",
-               []>, Requires<[IsARM, HasV5T]>;
+def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr),StMiscFrm,
+               "str", "d $src1, $addr", []>, Requires<[IsARM, HasV5T]>;
 
 // Indexed stores
 def STR_PRE  : AI2stwpr<(outs GPR:$base_wb),
@@ -1387,6 +1385,12 @@ def : ARMV5TEPat<(add GPR:$acc,
 include "ARMInstrThumb.td"
 
 //===----------------------------------------------------------------------===//
+// Thumb2 Support
+//
+
+include "ARMInstrThumb2.td"
+
+//===----------------------------------------------------------------------===//
 // Floating Point Support
 //
 
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 54232f6dfe6a..9297f08d800b 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -277,6 +277,7 @@ def tPUSH : TI<(outs), (ins reglist:$src1, variable_ops),
 //
 
 // Add with carry
+let isCommutable = 1 in
 def tADC : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                "adc $dst, $rhs",
                [(set tGPR:$dst, (adde tGPR:$lhs, tGPR:$rhs))]>;
@@ -311,6 +312,7 @@ def tADDrSPi : TI<(outs tGPR:$dst), (ins GPR:$sp, i32imm:$rhs),
 def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
                   "add $dst, $rhs * 4", []>;
 
+let isCommutable = 1 in
 def tAND : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                 "and $dst, $rhs",
                 [(set tGPR:$dst, (and tGPR:$lhs, tGPR:$rhs))]>;
@@ -358,6 +360,7 @@ def tCMPNZr : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs),
 
 // TODO: A7-37: CMP(3) - cmp hi regs
 
+let isCommutable = 1 in
 def tEOR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                "eor $dst, $rhs",
                [(set tGPR:$dst, (xor tGPR:$lhs, tGPR:$rhs))]>;
@@ -399,6 +402,7 @@ def tMOVhir2hir : TI<(outs GPR:$dst), (ins GPR:$src),
                       "cpy $dst, $src\t@ hir2hir", []>;
 } // neverHasSideEffects
 
+let isCommutable = 1 in
 def tMUL : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                "mul $dst, $rhs",
                [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>;
@@ -411,6 +415,7 @@ def tNEG : TI<(outs tGPR:$dst), (ins tGPR:$src),
               "neg $dst, $src",
               [(set tGPR:$dst, (ineg tGPR:$src))]>;
 
+let isCommutable = 1 in
 def tORR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                "orr $dst, $rhs",
                [(set tGPR:$dst, (or tGPR:$lhs, tGPR:$rhs))]>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 168fb45f11ea..07c71da46d62 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -10,3 +10,199 @@
 // This file describes the Thumb2 instruction set.
 //
 //===----------------------------------------------------------------------===//
+
+// Shifted operands. No register controlled shifts for Thumb2.
+// Note: We do not support rrx shifted operands yet.
+def t2_so_reg : Operand<i32>,    // reg imm
+                ComplexPattern<i32, 2, "SelectShifterOperand",
+                               [shl,srl,sra,rotr]> {
+  let PrintMethod = "printSOOperand";
+  let MIOperandInfo = (ops GPR, i32imm);
+}
+
+def LO16 : SDNodeXForm<imm, [{
+  // Transformation function: shift the immediate value down into the low bits.
+  return getI32Imm((unsigned short)N->getZExtValue());
+}]>;
+
+def HI16 : SDNodeXForm<imm, [{
+  // Transformation function: shift the immediate value down into the low bits.
+  return getI32Imm((unsigned)N->getZExtValue() >> 16);
+}]>;
+
+def imm16high : PatLeaf<(i32 imm), [{
+  // Returns true if all bits out of the [31..16] range are 0.
+  return ((N->getZExtValue() & 0xFFFF0000ULL) == N->getZExtValue());
+}], HI16>;
+
+def imm16high0xffff : PatLeaf<(i32 imm), [{
+  // Returns true if lo 16 bits are set and this is a 32-bit value. 
+  return ((N->getZExtValue() & 0x0000FFFFULL) == 0xFFFFULL);
+}], HI16>;
+
+def imm0_4095 : PatLeaf<(i32 imm), [{ 
+  return (uint32_t)N->getZExtValue() < 4096; 
+}]>; 
+
+def imm0_4095_neg : PatLeaf<(i32 imm), [{ 
+ return (uint32_t)-N->getZExtValue() < 4096; 
+}], imm_neg_XFORM>; 
+
+def imm0_65535 : PatLeaf<(i32 imm), [{ 
+  return N->getZExtValue() < 65536; 
+}]>; 
+
+// A6.3.2 Modified immediate constants in Thumb instructions (#<const>)
+// FIXME: Move it the the addrmode matcher code.
+def t2_so_imm : PatLeaf<(i32 imm), [{
+  uint64_t v = N->getZExtValue();
+  if (v == 0 || v > 0xffffffffUL) return false;
+  // variant1 - 0b0000x - 8-bit which could be zero (not supported for now)
+
+  // variant2 - 0b00nnx - 8-bit repeated inside the 32-bit room
+  unsigned hi16 = (unsigned)(v >> 16);
+  unsigned lo16 = (unsigned)(v & 0xffffUL);
+  bool valid = (hi16 == lo16) && (
+    (v & 0x00ff00ffUL) == 0 ||        // type 0001x
+    (v & 0xff00ff00UL) == 0 ||        // type 0010x
+    ((lo16 >> 8) == (lo16 & 0xff)));  // type 0011x
+  if (valid) return true;
+
+  // variant3 - 0b01000..0b11111 - 8-bit shifted inside the 32-bit room
+  unsigned shift = CountLeadingZeros_32(v);
+  uint64_t mask = (0xff000000ULL >> shift);
+  // If valid, it is type 01000 + shift
+  return ((shift < 24) && (v & mask) > 0) && ((v & (~mask)) == 0);
+}]>;
+
+
+//===----------------------------------------------------------------------===//
+//  Thumb-2 to cover the functionality of the ARM instruction set.
+//
+
+/// T2I_bin_irs - Defines a set of (op reg, {so_imm|reg|so_reg}) patterns for a
+//  binary operation that produces a value.
+multiclass T2I_bin_irs<string opc, PatFrag opnode> {
+   // shifted imm
+   def ri : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
+                       !strconcat(opc, " $dst, $lhs, $rhs"),
+                       [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
+                      Requires<[HasThumb2]>;
+   // register
+   def rr : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
+                       !strconcat(opc, " $dst, $lhs, $rhs"),
+                       [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, 
+                      Requires<[HasThumb2]>;
+   // shifted register
+   def rs : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
+                       !strconcat(opc, " $dst, $lhs, $rhs"),
+                       [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, 
+                      Requires<[HasThumb2]>;
+}
+
+/// T2I_bin_s_irs - Similar to T2I_bin_irs except it sets the 's' bit so the
+/// instruction modifies the CPSR register.
+let Defs = [CPSR] in {
+multiclass T2I_bin_s_irs<string opc, PatFrag opnode> {
+   // shifted imm
+   def ri : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
+                       !strconcat(opc, "s $dst, $lhs, $rhs"),
+                       [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
+                      Requires<[HasThumb2]>;
+
+   // register
+   def rr : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
+                       !strconcat(opc, "s $dst, $lhs, $rhs"),
+                       [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, 
+                      Requires<[HasThumb2]>;
+
+   // shifted register
+   def rs : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
+                       !strconcat(opc, "s $dst, $lhs, $rhs"),
+                       [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, 
+                      Requires<[HasThumb2]>;
+}
+}
+
+/// T2I_bin_c_irs - Similar to T2I_bin_irs except it uses the 's' bit. Also the
+/// instruction can optionally set the CPSR register.
+let Uses = [CPSR] in {
+multiclass T2I_bin_c_irs<string opc, PatFrag opnode> {
+   // shifted imm
+   def ri : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs, cc_out:$s),
+                       !strconcat(opc, "${s} $dst, $lhs, $rhs"),
+                       [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
+                      Requires<[HasThumb2]>;
+
+   // register
+   def rr : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs, cc_out:$s),
+                       !strconcat(opc, "${s} $dst, $lhs, $rhs"),
+                       [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, 
+                      Requires<[HasThumb2]>;
+
+   // shifted register
+   def rs : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs, cc_out:$s),
+                       !strconcat(opc, "${s} $dst, $lhs, $rhs"),
+                       [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, 
+                      Requires<[HasThumb2]>;
+}
+}
+
+//===----------------------------------------------------------------------===//
+//  Arithmetic Instructions.
+//
+
+//===----------------------------------------------------------------------===//
+//  Move Instructions.
+//
+def tMOVi16  : PseudoInst<(outs GPR:$dst), (ins i32imm:$src),
+                          "movw $dst, $src",
+                          [(set GPR:$dst, imm0_65535:$src)]>, 
+                         Requires<[HasThumb2]>;
+
+let isTwoAddress = 1 in
+def tMOVTi16 : PseudoInst<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm),
+                          "movt $dst, $imm",
+                          [(set GPR:$dst, (or (and GPR:$src, 0xffff), 
+                                              imm16high:$imm))]>,
+                         Requires<[HasThumb2]>;
+
+def : Pat<(and (or GPR:$src, imm16high:$imm1), imm16high0xffff:$imm2),
+          (tMOVTi16 GPR:$src, (HI16 imm16high:$imm1))>,
+         Requires<[HasThumb2]>;
+
+def : Pat<(i32 imm:$imm),
+          (tMOVTi16 (tMOVi16 (LO16 imm:$imm)),(HI16 imm:$imm))>,
+         Requires<[HasThumb2]>;
+
+//===----------------------------------------------------------------------===//
+//  Arithmetic Instructions.
+//
+defm t2ADD  : T2I_bin_irs <"add", BinOpFrag<(add node:$LHS, node:$RHS)>>;
+defm t2SUB  : T2I_bin_irs <"sub", BinOpFrag<(sub node:$LHS, node:$RHS)>>;
+
+def tADDri12 : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
+                          "add $dst, $lhs, $rhs", 
+                          [(set GPR:$dst, (add GPR:$lhs, imm0_4095:$rhs))]>,
+                         Requires<[HasThumb2]>; 
+def tSUBri12 : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), 
+                          "sub $dst, $lhs, $rhs",
+                          [(set GPR:$dst, (add GPR:$lhs, imm0_4095_neg:$rhs))]>,
+                         Requires<[HasThumb2]>;
+
+defm t2ADDS : T2I_bin_s_irs<"add", BinOpFrag<(addc node:$LHS, node:$RHS)>>;
+defm t2SUBS : T2I_bin_s_irs<"sub", BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+
+defm t2ADC : T2I_bin_c_irs<"adc", BinOpFrag<(adde node:$LHS, node:$RHS)>>;
+defm t2SBC : T2I_bin_c_irs<"sbc", BinOpFrag<(sube node:$LHS, node:$RHS)>>;
+
+
+def tMLS : PseudoInst<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), 
+                      "mls $dst, $a, $b, $c", 
+                      [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>,
+                     Requires<[HasThumb2]>;
+
+def tORNrs : PseudoInst<(outs GPR:$dst), (ins GPR:$src1, t2_so_reg:$src2),
+                        "orn $dst, $src1, $src2",
+                        [(set GPR:$dst, (or GPR:$src1, (not t2_so_reg: $src2)))]>,
+                       Requires<[HasThumb2]>;
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 684ecb4ce793..59cf125a9b99 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -17,19 +17,22 @@
 #include "ARMAddressingModes.h"
 #include "ARMMachineFunctionInfo.h"
 #include "ARMRegisterInfo.h"
+#include "llvm/DerivedTypes.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
@@ -39,6 +42,12 @@ STATISTIC(NumSTMGened , "Number of stm instructions generated");
 STATISTIC(NumFLDMGened, "Number of fldm instructions generated");
 STATISTIC(NumFSTMGened, "Number of fstm instructions generated");
 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
+STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
+STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
+STATISTIC(NumLDRD2LDM,  "Number of ldrd instructions turned back into ldm");
+STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
+STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
+STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
 
 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
 /// load / store instructions to form ldm / stm instructions.
@@ -82,6 +91,8 @@ namespace {
                       SmallVector<MachineBasicBlock::iterator, 4> &Merges);
 
     void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
+    bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator &MBBI);
     bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
     bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
   };
@@ -586,13 +597,19 @@ void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
 static int getMemoryOpOffset(const MachineInstr *MI) {
   int Opcode = MI->getOpcode();
   bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
+  bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
   unsigned NumOperands = MI->getDesc().getNumOperands();
   unsigned OffField = MI->getOperand(NumOperands-3).getImm();
   int Offset = isAM2
-    ? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4;
+    ? ARM_AM::getAM2Offset(OffField)
+    : (isAM3 ? ARM_AM::getAM3Offset(OffField)
+             : ARM_AM::getAM5Offset(OffField) * 4);
   if (isAM2) {
     if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
       Offset = -Offset;
+  } else if (isAM3) {
+    if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
+      Offset = -Offset;
   } else {
     if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
       Offset = -Offset;
@@ -600,6 +617,120 @@ static int getMemoryOpOffset(const MachineInstr *MI) {
   return Offset;
 }
 
+static void InsertLDR_STR(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator &MBBI,
+                          int OffImm, bool isDef,
+                          DebugLoc dl, unsigned NewOpc,
+                          unsigned Reg, bool RegDeadKill,
+                          unsigned BaseReg, bool BaseKill,
+                          unsigned OffReg, bool OffKill,
+                          ARMCC::CondCodes Pred, unsigned PredReg,
+                          const TargetInstrInfo *TII) {
+  unsigned Offset;
+  if (OffImm < 0)
+    Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
+  else
+    Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
+  if (isDef)
+    BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+      .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
+      .addReg(BaseReg, getKillRegState(BaseKill))
+      .addReg(OffReg,  getKillRegState(OffKill))
+      .addImm(Offset)
+      .addImm(Pred).addReg(PredReg);
+  else
+    BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+      .addReg(Reg, getKillRegState(RegDeadKill))
+      .addReg(BaseReg, getKillRegState(BaseKill))
+      .addReg(OffReg,  getKillRegState(OffKill))
+      .addImm(Offset)
+      .addImm(Pred).addReg(PredReg);
+}
+
+bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
+                                          MachineBasicBlock::iterator &MBBI) {
+  MachineInstr *MI = &*MBBI;
+  unsigned Opcode = MI->getOpcode();
+  if (Opcode == ARM::LDRD || Opcode == ARM::STRD) {
+    unsigned EvenReg = MI->getOperand(0).getReg();
+    unsigned OddReg  = MI->getOperand(1).getReg();
+    unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
+    unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
+    if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
+      return false;
+
+    bool isLd = Opcode == ARM::LDRD;
+    bool EvenDeadKill = isLd ?
+      MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
+    bool OddDeadKill  = isLd ?
+      MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
+    const MachineOperand &BaseOp = MI->getOperand(2);
+    unsigned BaseReg = BaseOp.getReg();
+    bool BaseKill = BaseOp.isKill();
+    const MachineOperand &OffOp = MI->getOperand(3);
+    unsigned OffReg = OffOp.getReg();
+    bool OffKill = OffOp.isKill();
+    int OffImm = getMemoryOpOffset(MI);
+    unsigned PredReg = 0;
+    ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+
+    if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
+      // Ascending register numbers and no offset. It's safe to change it to a
+      // ldm or stm.
+      unsigned NewOpc = (Opcode == ARM::LDRD) ? ARM::LDM : ARM::STM;
+      if (isLd) {
+        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+          .addReg(BaseReg, getKillRegState(BaseKill))
+          .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
+          .addImm(Pred).addReg(PredReg)
+          .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
+          .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
+        ++NumLDRD2LDM;
+      } else {
+        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+          .addReg(BaseReg, getKillRegState(BaseKill))
+          .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
+          .addImm(Pred).addReg(PredReg)
+          .addReg(EvenReg, getKillRegState(EvenDeadKill))
+          .addReg(OddReg, getKillRegState(OddDeadKill));
+        ++NumSTRD2STM;
+      }
+    } else {
+      // Split into two instructions.
+      unsigned NewOpc = (Opcode == ARM::LDRD) ? ARM::LDR : ARM::STR;
+      DebugLoc dl = MBBI->getDebugLoc();
+      // If this is a load and base register is killed, it may have been
+      // re-defed by the load, make sure the first load does not clobber it.
+      if (isLd &&
+          (BaseKill || OffKill) &&
+          (TRI->regsOverlap(EvenReg, BaseReg) ||
+           (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
+        assert(!TRI->regsOverlap(OddReg, BaseReg) &&
+               (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
+        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, OddReg, OddDeadKill,
+                      BaseReg, false, OffReg, false, Pred, PredReg, TII);
+        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill,
+                      BaseReg, BaseKill, OffReg, OffKill, Pred, PredReg, TII);
+      } else {
+        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
+                      EvenReg, EvenDeadKill, BaseReg, false, OffReg, false,
+                      Pred, PredReg, TII);
+        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
+                      OddReg, OddDeadKill, BaseReg, BaseKill, OffReg, OffKill,
+                      Pred, PredReg, TII);
+      }
+      if (isLd)
+        ++NumLDRD2LDR;
+      else
+        ++NumSTRD2STR;
+    }
+
+    MBBI = prior(MBBI);
+    MBB.erase(MI);
+  }
+  return false;
+}
+
 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
 /// ops of the same base and incrementing offset into LDM / STM ops.
 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
@@ -617,6 +748,9 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
   RS->enterBasicBlock(&MBB);
   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
   while (MBBI != E) {
+    if (FixInvalidRegPairOp(MBB, MBBI))
+      continue;
+
     bool Advance  = false;
     bool TryMerge = false;
     bool Clobber  = false;
@@ -817,8 +951,10 @@ namespace {
     static char ID;
     ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
 
+    const TargetData *TD;
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
+    const ARMSubtarget *STI;
     MachineRegisterInfo *MRI;
 
     virtual bool runOnMachineFunction(MachineFunction &Fn);
@@ -828,6 +964,11 @@ namespace {
     }
 
   private:
+    bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
+                          unsigned &NewOpc, unsigned &EvenReg,
+                          unsigned &OddReg, unsigned &BaseReg,
+                          unsigned &OffReg, unsigned &Offset,
+                          unsigned &PredReg, ARMCC::CondCodes &Pred);
     bool RescheduleOps(MachineBasicBlock *MBB,
                        SmallVector<MachineInstr*, 4> &Ops,
                        unsigned Base, bool isLd,
@@ -838,8 +979,10 @@ namespace {
 }
 
 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+  TD  = Fn.getTarget().getTargetData();
   TII = Fn.getTarget().getInstrInfo();
   TRI = Fn.getTarget().getRegisterInfo();
+  STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
   MRI = &Fn.getRegInfo();
 
   bool Modified = false;
@@ -850,15 +993,19 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
   return Modified;
 }
 
-static bool IsSafeToMove(bool isLd, unsigned Base,
-                         MachineBasicBlock::iterator I,
-                         MachineBasicBlock::iterator E,
-                         SmallPtrSet<MachineInstr*, 4> MoveOps,
-                         const TargetRegisterInfo *TRI) {
+static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
+                                      MachineBasicBlock::iterator I,
+                                      MachineBasicBlock::iterator E,
+                                      SmallPtrSet<MachineInstr*, 4> &MemOps,
+                                      SmallSet<unsigned, 4> &MemRegs,
+                                      const TargetRegisterInfo *TRI) {
   // Are there stores / loads / calls between them?
   // FIXME: This is overly conservative. We should make use of alias information
   // some day.
+  SmallSet<unsigned, 4> AddedRegPressure;
   while (++I != E) {
+    if (MemOps.count(&*I))
+      continue;
     const TargetInstrDesc &TID = I->getDesc();
     if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
       return false;
@@ -871,15 +1018,76 @@ static bool IsSafeToMove(bool isLd, unsigned Base,
       // str r1, [r0]
       // strh r5, [r0]
       // str r4, [r0, #+4]
-      if (TID.mayStore() && !MoveOps.count(&*I))
+      if (TID.mayStore())
         return false;
     }
     for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
       MachineOperand &MO = I->getOperand(j);
-      if (MO.isReg() && MO.isDef() && TRI->regsOverlap(MO.getReg(), Base))
+      if (!MO.isReg())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (MO.isDef() && TRI->regsOverlap(Reg, Base))
         return false;
+      if (Reg != Base && !MemRegs.count(Reg))
+        AddedRegPressure.insert(Reg);
     }
   }
+
+  // Estimate register pressure increase due to the transformation.
+  if (MemRegs.size() <= 4)
+    // Ok if we are moving small number of instructions.
+    return true;
+  return AddedRegPressure.size() <= MemRegs.size() * 2;
+}
+
+bool
+ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
+                                          DebugLoc &dl,
+                                          unsigned &NewOpc, unsigned &EvenReg,
+                                          unsigned &OddReg, unsigned &BaseReg,
+                                          unsigned &OffReg, unsigned &Offset,
+                                          unsigned &PredReg,
+                                          ARMCC::CondCodes &Pred) {
+  // FIXME: FLDS / FSTS -> FLDD / FSTD
+  unsigned Opcode = Op0->getOpcode();
+  if (Opcode == ARM::LDR)
+    NewOpc = ARM::LDRD;
+  else if (Opcode == ARM::STR)
+    NewOpc = ARM::STRD;
+  else
+    return 0;
+
+  // Must sure the base address satisfies i64 ld / st alignment requirement.
+  if (!Op0->hasOneMemOperand() ||
+      !Op0->memoperands_begin()->getValue() ||
+      Op0->memoperands_begin()->isVolatile())
+    return false;
+
+  unsigned Align = Op0->memoperands_begin()->getAlignment();
+  unsigned ReqAlign = STI->hasV6Ops()
+    ? TD->getPrefTypeAlignment(Type::Int64Ty) : 8; // Pre-v6 need 8-byte align
+  if (Align < ReqAlign)
+    return false;
+
+  // Then make sure the immediate offset fits.
+  int OffImm = getMemoryOpOffset(Op0);
+  ARM_AM::AddrOpc AddSub = ARM_AM::add;
+  if (OffImm < 0) {
+    AddSub = ARM_AM::sub;
+    OffImm = - OffImm;
+  }
+  if (OffImm >= 256) // 8 bits
+    return false;
+  Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
+
+  EvenReg = Op0->getOperand(0).getReg();
+  OddReg  = Op1->getOperand(0).getReg();
+  if (EvenReg == OddReg)
+    return false;
+  BaseReg = Op0->getOperand(1).getReg();
+  OffReg = Op0->getOperand(2).getReg();
+  Pred = getInstrPredicate(Op0, PredReg);
+  dl = Op0->getDebugLoc();
   return true;
 }
 
@@ -902,6 +1110,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
     MachineInstr *FirstOp = 0;
     MachineInstr *LastOp = 0;
     int LastOffset = 0;
+    unsigned LastOpcode = 0;
     unsigned LastBytes = 0;
     unsigned NumMove = 0;
     for (int i = Ops.size() - 1; i >= 0; --i) {
@@ -916,6 +1125,10 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
         LastOp = Op;
       }
 
+      unsigned Opcode = Op->getOpcode();
+      if (LastOpcode && Opcode != LastOpcode)
+        break;
+
       int Offset = getMemoryOpOffset(Op);
       unsigned Bytes = getLSMultipleTransferSize(Op);
       if (LastBytes) {
@@ -924,34 +1137,80 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
       }
       LastOffset = Offset;
       LastBytes = Bytes;
-      if (++NumMove == 4)
+      LastOpcode = Opcode;
+      if (++NumMove == 8) // FIXME: Tune
         break;
     }
 
     if (NumMove <= 1)
       Ops.pop_back();
     else {
-      SmallPtrSet<MachineInstr*, 4> MoveOps;
-      for (int i = NumMove-1; i >= 0; --i)
-        MoveOps.insert(Ops[i]);
+      SmallPtrSet<MachineInstr*, 4> MemOps;
+      SmallSet<unsigned, 4> MemRegs;
+      for (int i = NumMove-1; i >= 0; --i) {
+        MemOps.insert(Ops[i]);
+        MemRegs.insert(Ops[i]->getOperand(0).getReg());
+      }
 
       // Be conservative, if the instructions are too far apart, don't
       // move them. We want to limit the increase of register pressure.
-      bool DoMove = (LastLoc - FirstLoc) < NumMove*4;
+      bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
       if (DoMove)
-        DoMove = IsSafeToMove(isLd, Base, FirstOp, LastOp, MoveOps, TRI);
+        DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
+                                           MemOps, MemRegs, TRI);
       if (!DoMove) {
         for (unsigned i = 0; i != NumMove; ++i)
           Ops.pop_back();
       } else {
         // This is the new location for the loads / stores.
         MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
-        while (InsertPos != MBB->end() && MoveOps.count(InsertPos))
+        while (InsertPos != MBB->end() && MemOps.count(InsertPos))
           ++InsertPos;
-        for (unsigned i = 0; i != NumMove; ++i) {
-          MachineInstr *Op = Ops.back();
+
+        // If we are moving a pair of loads / stores, see if it makes sense
+        // to try to allocate a pair of registers that can form register pairs.
+        MachineInstr *Op0 = Ops.back();
+        MachineInstr *Op1 = Ops[Ops.size()-2];
+        unsigned EvenReg = 0, OddReg = 0;
+        unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
+        ARMCC::CondCodes Pred = ARMCC::AL;
+        unsigned NewOpc = 0;
+        unsigned Offset = 0;
+        DebugLoc dl;
+        if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
+                                             EvenReg, OddReg, BaseReg, OffReg,
+                                             Offset, PredReg, Pred)) {
+          Ops.pop_back();
           Ops.pop_back();
-          MBB->splice(InsertPos, MBB, Op);
+
+          // Form the pair instruction.
+          if (isLd) {
+            BuildMI(*MBB, InsertPos, dl, TII->get(NewOpc))
+              .addReg(EvenReg, RegState::Define)
+              .addReg(OddReg, RegState::Define)
+              .addReg(BaseReg).addReg(0).addImm(Offset)
+              .addImm(Pred).addReg(PredReg);
+            ++NumLDRDFormed;
+          } else {
+            BuildMI(*MBB, InsertPos, dl, TII->get(NewOpc))
+              .addReg(EvenReg)
+              .addReg(OddReg)
+              .addReg(BaseReg).addReg(0).addImm(Offset)
+              .addImm(Pred).addReg(PredReg);
+            ++NumSTRDFormed;
+          }
+          MBB->erase(Op0);
+          MBB->erase(Op1);
+
+          // Add register allocation hints to form register pairs.
+          MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
+          MRI->setRegAllocationHint(OddReg,  ARMRI::RegPairOdd, EvenReg);
+        } else {
+          for (unsigned i = 0; i != NumMove; ++i) {
+            MachineInstr *Op = Ops.back();
+            Ops.pop_back();
+            MBB->splice(InsertPos, MBB, Op);
+          }
         }
 
         NumLdStMoved += NumMove;
@@ -1039,7 +1298,8 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
       }
 
       if (StopHere) {
-        // Found a duplicate (a base+offset combination that's seen earlier). Backtrack.
+        // Found a duplicate (a base+offset combination that's seen earlier).
+        // Backtrack.
         --Loc;
         break;
       }
diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp
index 199858f525c8..bbc13001ae55 100644
--- a/lib/Target/ARM/ARMRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -159,7 +159,7 @@ ARMRegisterInfo::ARMRegisterInfo(const TargetInstrInfo &tii,
                                  const ARMSubtarget &sti)
   : ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
     TII(tii), STI(sti),
-    FramePtr((STI.useThumbBacktraces() || STI.isThumb()) ? ARM::R7 : ARM::R11) {
+    FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11) {
 }
 
 static inline
@@ -194,10 +194,6 @@ void ARMRegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
       .addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
 }
 
-const TargetRegisterClass *ARMRegisterInfo::getPointerRegClass() const {
-  return &ARM::GPRRegClass;
-}
-
 /// isLowRegister - Returns true if the register is low register r0-r7.
 ///
 bool ARMRegisterInfo::isLowRegister(unsigned Reg) const {
@@ -304,6 +300,191 @@ ARMRegisterInfo::isReservedReg(const MachineFunction &MF, unsigned Reg) const {
   return false;
 }
 
+const TargetRegisterClass *ARMRegisterInfo::getPointerRegClass() const {
+  return &ARM::GPRRegClass;
+}
+
+/// getAllocationOrder - Returns the register allocation order for a specified
+/// register class in the form of a pair of TargetRegisterClass iterators.
+std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
+ARMRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
+                                    unsigned HintType, unsigned HintReg,
+                                    const MachineFunction &MF) const {
+  // Alternative register allocation orders when favoring even / odd registers
+  // of register pairs.
+
+  // No FP, R9 is available.
+  static const unsigned GPREven1[] = {
+    ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, ARM::R10,
+    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7,
+    ARM::R9, ARM::R11
+  };
+  static const unsigned GPROdd1[] = {
+    ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R9, ARM::R11,
+    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
+    ARM::R8, ARM::R10
+  };
+
+  // FP is R7, R9 is available.
+  static const unsigned GPREven2[] = {
+    ARM::R0, ARM::R2, ARM::R4,          ARM::R8, ARM::R10,
+    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6,
+    ARM::R9, ARM::R11
+  };
+  static const unsigned GPROdd2[] = {
+    ARM::R1, ARM::R3, ARM::R5,          ARM::R9, ARM::R11,
+    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
+    ARM::R8, ARM::R10
+  };
+
+  // FP is R11, R9 is available.
+  static const unsigned GPREven3[] = {
+    ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8,
+    ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7,
+    ARM::R9
+  };
+  static const unsigned GPROdd3[] = {
+    ARM::R1, ARM::R3, ARM::R5, ARM::R6, ARM::R9,
+    ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R7,
+    ARM::R8
+  };
+
+  // No FP, R9 is not available.
+  static const unsigned GPREven4[] = {
+    ARM::R0, ARM::R2, ARM::R4, ARM::R6,          ARM::R10,
+    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8,
+    ARM::R11
+  };
+  static const unsigned GPROdd4[] = {
+    ARM::R1, ARM::R3, ARM::R5, ARM::R7,          ARM::R11,
+    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
+    ARM::R10
+  };
+
+  // FP is R7, R9 is not available.
+  static const unsigned GPREven5[] = {
+    ARM::R0, ARM::R2, ARM::R4,                   ARM::R10,
+    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, ARM::R8,
+    ARM::R11
+  };
+  static const unsigned GPROdd5[] = {
+    ARM::R1, ARM::R3, ARM::R5,                   ARM::R11,
+    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
+    ARM::R10
+  };
+
+  // FP is R11, R9 is not available.
+  static const unsigned GPREven6[] = {
+    ARM::R0, ARM::R2, ARM::R4, ARM::R6,
+    ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8
+  };
+  static const unsigned GPROdd6[] = {
+    ARM::R1, ARM::R3, ARM::R5, ARM::R7,
+    ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8
+  };
+
+
+  if (HintType == ARMRI::RegPairEven) {
+    if (isPhysicalRegister(HintReg) && getRegisterPairEven(HintReg, MF) == 0)
+      // It's no longer possible to fulfill this hint. Return the default
+      // allocation order.
+      return std::make_pair(RC->allocation_order_begin(MF),
+                            RC->allocation_order_end(MF));
+
+    if (!STI.isTargetDarwin() && !hasFP(MF)) {
+      if (!STI.isR9Reserved())
+        return std::make_pair(GPREven1,
+                              GPREven1 + (sizeof(GPREven1)/sizeof(unsigned)));
+      else
+        return std::make_pair(GPREven4,
+                              GPREven4 + (sizeof(GPREven4)/sizeof(unsigned)));
+    } else if (FramePtr == ARM::R7) {
+      if (!STI.isR9Reserved())
+        return std::make_pair(GPREven2,
+                              GPREven2 + (sizeof(GPREven2)/sizeof(unsigned)));
+      else
+        return std::make_pair(GPREven5,
+                              GPREven5 + (sizeof(GPREven5)/sizeof(unsigned)));
+    } else { // FramePtr == ARM::R11
+      if (!STI.isR9Reserved())
+        return std::make_pair(GPREven3,
+                              GPREven3 + (sizeof(GPREven3)/sizeof(unsigned)));
+      else
+        return std::make_pair(GPREven6,
+                              GPREven6 + (sizeof(GPREven6)/sizeof(unsigned)));
+    }
+  } else if (HintType == ARMRI::RegPairOdd) {
+    if (isPhysicalRegister(HintReg) && getRegisterPairOdd(HintReg, MF) == 0)
+      // It's no longer possible to fulfill this hint. Return the default
+      // allocation order.
+      return std::make_pair(RC->allocation_order_begin(MF),
+                            RC->allocation_order_end(MF));
+
+    if (!STI.isTargetDarwin() && !hasFP(MF)) {
+      if (!STI.isR9Reserved())
+        return std::make_pair(GPROdd1,
+                              GPROdd1 + (sizeof(GPROdd1)/sizeof(unsigned)));
+      else
+        return std::make_pair(GPROdd4,
+                              GPROdd4 + (sizeof(GPROdd4)/sizeof(unsigned)));
+    } else if (FramePtr == ARM::R7) {
+      if (!STI.isR9Reserved())
+        return std::make_pair(GPROdd2,
+                              GPROdd2 + (sizeof(GPROdd2)/sizeof(unsigned)));
+      else
+        return std::make_pair(GPROdd5,
+                              GPROdd5 + (sizeof(GPROdd5)/sizeof(unsigned)));
+    } else { // FramePtr == ARM::R11
+      if (!STI.isR9Reserved())
+        return std::make_pair(GPROdd3,
+                              GPROdd3 + (sizeof(GPROdd3)/sizeof(unsigned)));
+      else
+        return std::make_pair(GPROdd6,
+                              GPROdd6 + (sizeof(GPROdd6)/sizeof(unsigned)));
+    }
+  }
+  return std::make_pair(RC->allocation_order_begin(MF),
+                        RC->allocation_order_end(MF));
+}
+
+/// ResolveRegAllocHint - Resolves the specified register allocation hint
+/// to a physical register. Returns the physical register if it is successful.
+unsigned
+ARMRegisterInfo::ResolveRegAllocHint(unsigned Type, unsigned Reg,
+                                     const MachineFunction &MF) const {
+  if (Reg == 0 || !isPhysicalRegister(Reg))
+    return 0;
+  if (Type == 0)
+    return Reg;
+  else if (Type == (unsigned)ARMRI::RegPairOdd)
+    // Odd register.
+    return getRegisterPairOdd(Reg, MF);
+  else if (Type == (unsigned)ARMRI::RegPairEven)
+    // Even register.
+    return getRegisterPairEven(Reg, MF);
+  return 0;
+}
+
+void
+ARMRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
+                                    MachineFunction &MF) const {
+  MachineRegisterInfo *MRI = &MF.getRegInfo();
+  std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg);
+  if ((Hint.first == (unsigned)ARMRI::RegPairOdd ||
+       Hint.first == (unsigned)ARMRI::RegPairEven) &&
+      Hint.second && TargetRegisterInfo::isVirtualRegister(Hint.second)) {
+    // If 'Reg' is one of the even / odd register pair and it's now changed
+    // (e.g. coalesced) into a different register. The other register of the
+    // pair allocation hint must be updated to reflect the relationship
+    // change.
+    unsigned OtherReg = Hint.second;
+    Hint = MRI->getRegAllocationHint(OtherReg);
+    if (Hint.second == Reg)
+      // Make sure the pair has not already divorced.
+      MRI->setRegAllocationHint(OtherReg, Hint.first, NewReg);
+  }
+}
+
 bool
 ARMRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
   const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -1506,9 +1687,8 @@ unsigned ARMRegisterInfo::getRARegister() const {
 
 unsigned ARMRegisterInfo::getFrameRegister(MachineFunction &MF) const {
   if (STI.isTargetDarwin() || hasFP(MF))
-    return (STI.useThumbBacktraces() || STI.isThumb()) ? ARM::R7 : ARM::R11;
-  else
-    return ARM::SP;
+    return FramePtr;
+  return ARM::SP;
 }
 
 unsigned ARMRegisterInfo::getEHExceptionRegister() const {
@@ -1525,4 +1705,152 @@ int ARMRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
   return ARMGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
 }
 
+unsigned ARMRegisterInfo::getRegisterPairEven(unsigned Reg,
+                                              const MachineFunction &MF) const {
+  switch (Reg) {
+  default: break;
+  // Return 0 if either register of the pair is a special register.
+  // So no R12, etc.
+  case ARM::R1:
+    return ARM::R0;
+  case ARM::R3:
+    // FIXME!
+    return STI.isThumb() ? 0 : ARM::R2;
+  case ARM::R5:
+    return ARM::R4;
+  case ARM::R7:
+    return isReservedReg(MF, ARM::R7)  ? 0 : ARM::R6;
+  case ARM::R9:
+    return isReservedReg(MF, ARM::R9)  ? 0 :ARM::R8;
+  case ARM::R11:
+    return isReservedReg(MF, ARM::R11) ? 0 : ARM::R10;
+
+  case ARM::S1:
+    return ARM::S0;
+  case ARM::S3:
+    return ARM::S2;
+  case ARM::S5:
+    return ARM::S4;
+  case ARM::S7:
+    return ARM::S6;
+  case ARM::S9:
+    return ARM::S8;
+  case ARM::S11:
+    return ARM::S10;
+  case ARM::S13:
+    return ARM::S12;
+  case ARM::S15:
+    return ARM::S14;
+  case ARM::S17:
+    return ARM::S16;
+  case ARM::S19:
+    return ARM::S18;
+  case ARM::S21:
+    return ARM::S20;
+  case ARM::S23:
+    return ARM::S22;
+  case ARM::S25:
+    return ARM::S24;
+  case ARM::S27:
+    return ARM::S26;
+  case ARM::S29:
+    return ARM::S28;
+  case ARM::S31:
+    return ARM::S30;
+
+  case ARM::D1:
+    return ARM::D0;
+  case ARM::D3:
+    return ARM::D2;
+  case ARM::D5:
+    return ARM::D4;
+  case ARM::D7:
+    return ARM::D6;
+  case ARM::D9:
+    return ARM::D8;
+  case ARM::D11:
+    return ARM::D10;
+  case ARM::D13:
+    return ARM::D12;
+  case ARM::D15:
+    return ARM::D14;
+  }
+
+  return 0;
+}
+
+unsigned ARMRegisterInfo::getRegisterPairOdd(unsigned Reg,
+                                             const MachineFunction &MF) const {
+  switch (Reg) {
+  default: break;
+  // Return 0 if either register of the pair is a special register.
+  // So no R12, etc.
+  case ARM::R0:
+    return ARM::R1;
+  case ARM::R2:
+    // FIXME!
+    return STI.isThumb() ? 0 : ARM::R3;
+  case ARM::R4:
+    return ARM::R5;
+  case ARM::R6:
+    return isReservedReg(MF, ARM::R7)  ? 0 : ARM::R7;
+  case ARM::R8:
+    return isReservedReg(MF, ARM::R9)  ? 0 :ARM::R9;
+  case ARM::R10:
+    return isReservedReg(MF, ARM::R11) ? 0 : ARM::R11;
+
+  case ARM::S0:
+    return ARM::S1;
+  case ARM::S2:
+    return ARM::S3;
+  case ARM::S4:
+    return ARM::S5;
+  case ARM::S6:
+    return ARM::S7;
+  case ARM::S8:
+    return ARM::S9;
+  case ARM::S10:
+    return ARM::S11;
+  case ARM::S12:
+    return ARM::S13;
+  case ARM::S14:
+    return ARM::S15;
+  case ARM::S16:
+    return ARM::S17;
+  case ARM::S18:
+    return ARM::S19;
+  case ARM::S20:
+    return ARM::S21;
+  case ARM::S22:
+    return ARM::S23;
+  case ARM::S24:
+    return ARM::S25;
+  case ARM::S26:
+    return ARM::S27;
+  case ARM::S28:
+    return ARM::S29;
+  case ARM::S30:
+    return ARM::S31;
+
+  case ARM::D0:
+    return ARM::D1;
+  case ARM::D2:
+    return ARM::D3;
+  case ARM::D4:
+    return ARM::D5;
+  case ARM::D6:
+    return ARM::D7;
+  case ARM::D8:
+    return ARM::D9;
+  case ARM::D10:
+    return ARM::D11;
+  case ARM::D12:
+    return ARM::D13;
+  case ARM::D14:
+    return ARM::D15;
+  }
+
+  return 0;
+}
+
 #include "ARMGenRegisterInfo.inc"
diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h
index e1d9efbcabf7..e8f4fd805d5b 100644
--- a/lib/Target/ARM/ARMRegisterInfo.h
+++ b/lib/Target/ARM/ARMRegisterInfo.h
@@ -22,12 +22,17 @@ namespace llvm {
   class TargetInstrInfo;
   class Type;
 
+/// Register allocation hints.
+namespace ARMRI {
+  enum {
+    RegPairOdd  = 1,
+    RegPairEven = 2
+  };
+}
+
 struct ARMRegisterInfo : public ARMGenRegisterInfo {
   const TargetInstrInfo &TII;
   const ARMSubtarget &STI;
-private:
-  /// FramePtr - ARM physical register used as frame ptr.
-  unsigned FramePtr;
 
 public:
   ARMRegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI);
@@ -49,10 +54,6 @@ public:
   /// if the register is a single precision VFP register.
   static unsigned getRegisterNumbering(unsigned RegEnum, bool &isSPVFP);
 
-  /// getPointerRegClass - Return the register class to use to hold pointers.
-  /// This is used for addressing modes.
-  const TargetRegisterClass *getPointerRegClass() const;
-
   /// Code Generation virtual methods...
   const TargetRegisterClass *
     getPhysicalRegisterRegClass(unsigned Reg, MVT VT = MVT::Other) const;
@@ -65,6 +66,19 @@ public:
 
   bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
 
+  const TargetRegisterClass *getPointerRegClass() const;
+
+  std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
+  getAllocationOrder(const TargetRegisterClass *RC,
+                     unsigned HintType, unsigned HintReg,
+                     const MachineFunction &MF) const;
+
+  unsigned ResolveRegAllocHint(unsigned Type, unsigned Reg,
+                               const MachineFunction &MF) const;
+
+  void UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
+                          MachineFunction &MF) const;
+
   bool requiresRegisterScavenging(const MachineFunction &MF) const;
 
   bool hasFP(const MachineFunction &MF) const;
@@ -95,6 +109,15 @@ public:
   int getDwarfRegNum(unsigned RegNum, bool isEH) const;
   
   bool isLowRegister(unsigned Reg) const;
+
+private:
+  /// FramePtr - ARM physical register used as frame ptr.
+  unsigned FramePtr;
+
+  unsigned getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const;
+
+  unsigned getRegisterPairOdd(unsigned Reg, const MachineFunction &MF) const;
+
 };
 
 } // end namespace llvm
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index ebe7d582a664..d864079f85e1 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -134,7 +134,7 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
     GPRClass::allocation_order_begin(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
       const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
-      if (Subtarget.useThumbBacktraces()) {
+      if (Subtarget.isTargetDarwin()) {
         if (Subtarget.isR9Reserved())
           return ARM_GPR_AO_4;
         else
@@ -154,7 +154,7 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
       const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
       GPRClass::iterator I;
 
-      if (Subtarget.useThumbBacktraces()) {
+      if (Subtarget.isTargetDarwin()) {
         if (Subtarget.isR9Reserved()) {
           I = ARM_GPR_AO_4 + (sizeof(ARM_GPR_AO_4)/sizeof(unsigned));
         } else {
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
new file mode 100644
index 000000000000..75fa707f9f93
--- /dev/null
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -0,0 +1,35 @@
+//===- ARMSchedule.td - ARM Scheduling Definitions ---------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Functional units across ARM processors
+//
+def FU_iALU   : FuncUnit; // Integer alu unit
+def FU_iLdSt  : FuncUnit; // Integer load / store unit
+def FU_FpALU  : FuncUnit; // FP alu unit
+def FU_FpLdSt : FuncUnit; // FP load / store unit
+def FU_Br     : FuncUnit; // Branch unit
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for ARM
+//
+def IIC_iALU    : InstrItinClass;
+def IIC_iLoad   : InstrItinClass;
+def IIC_iStore  : InstrItinClass;
+def IIC_fpALU   : InstrItinClass;
+def IIC_fpLoad  : InstrItinClass;
+def IIC_fpStore : InstrItinClass;
+def IIC_Br      : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// Processor instruction itineraries.
+
+def GenericItineraries : ProcessorItineraries<[]>;
+
+include "ARMScheduleV6.td"
diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td
new file mode 100644
index 000000000000..596a57f8aefd
--- /dev/null
+++ b/lib/Target/ARM/ARMScheduleV6.td
@@ -0,0 +1,22 @@
+//===- ARMSchedule.td - ARM v6 Scheduling Definitions ------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM v6 processors.
+//
+//===----------------------------------------------------------------------===//
+
+def V6Itineraries : ProcessorItineraries<[
+  InstrItinData<IIC_iALU    , [InstrStage<1, [FU_iALU]>]>,
+  InstrItinData<IIC_iLoad   , [InstrStage<2, [FU_iLdSt]>]>,
+  InstrItinData<IIC_iStore  , [InstrStage<1, [FU_iLdSt]>]>,
+  InstrItinData<IIC_fpALU   , [InstrStage<6, [FU_FpALU]>]>,
+  InstrItinData<IIC_fpLoad  , [InstrStage<2, [FU_FpLdSt]>]>,
+  InstrItinData<IIC_fpStore , [InstrStage<1, [FU_FpLdSt]>]>,
+  InstrItinData<IIC_Br      , [InstrStage<3, [FU_Br]>]>
+]>;
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index a978380b627f..7ac7b4923d68 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -24,7 +24,6 @@ ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS,
   , ARMFPUType(None)
   , IsThumb(isThumb)
   , ThumbMode(Thumb1)
-  , UseThumbBacktraces(false)
   , IsR9Reserved(false)
   , stackAlignment(4)
   , CPUString("generic")
@@ -83,8 +82,6 @@ ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS,
   if (isAAPCS_ABI())
     stackAlignment = 8;
 
-  if (isTargetDarwin()) {
-    UseThumbBacktraces = true;
+  if (isTargetDarwin())
     IsR9Reserved = true;
-  }
 }
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 07040558d878..c3cc7fff6e3f 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -14,6 +14,7 @@
 #ifndef ARMSUBTARGET_H
 #define ARMSUBTARGET_H
 
+#include "llvm/Target/TargetInstrItineraries.h"
 #include "llvm/Target/TargetSubtarget.h"
 #include <string>
 
@@ -48,9 +49,6 @@ protected:
   /// ThumbMode - Indicates supported Thumb version.
   ThumbTypeEnum ThumbMode;
 
-  /// UseThumbBacktraces - True if we use thumb style backtraces.
-  bool UseThumbBacktraces;
-
   /// IsR9Reserved - True if R9 is a not available as general purpose register.
   bool IsR9Reserved;
 
@@ -61,6 +59,9 @@ protected:
   /// CPUString - String name of used CPU.
   std::string CPUString;
 
+  /// Selected instruction itineraries (one entry per itinerary class.)
+  InstrItineraryData InstrItins;
+  
  public:
   enum {
     isELF, isDarwin
@@ -106,14 +107,17 @@ protected:
   bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; }
 
   bool isThumb() const { return IsThumb; }
-  bool isThumb1() const { return IsThumb && (ThumbMode == Thumb1); }
-  bool isThumb2() const { return IsThumb && (ThumbMode >= Thumb2); }
+  bool isThumb1Only() const { return IsThumb && (ThumbMode == Thumb1); }
+  bool hasThumb2() const { return IsThumb && (ThumbMode >= Thumb2); }
 
-  bool useThumbBacktraces() const { return UseThumbBacktraces; }
   bool isR9Reserved() const { return IsR9Reserved; }
 
   const std::string & getCPUString() const { return CPUString; }
 
+  /// getInstrItins - Return the instruction itineraies based on subtarget 
+  /// selection.
+  const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+
   /// getStackAlignment - Returns the minimum alignment known to hold of the
   /// stack frame on entry to the function and which must be maintained by every
   /// function for this subtarget.
diff --git a/lib/Target/ARM/ARMTargetAsmInfo.cpp b/lib/Target/ARM/ARMTargetAsmInfo.cpp
index 4107dccd2a69..42b8eae5e056 100644
--- a/lib/Target/ARM/ARMTargetAsmInfo.cpp
+++ b/lib/Target/ARM/ARMTargetAsmInfo.cpp
@@ -17,80 +17,42 @@
 #include <cctype>
 using namespace llvm;
 
-
 const char *const llvm::arm_asm_table[] = {
-                                      "{r0}", "r0",
-                                      "{r1}", "r1",
-                                      "{r2}", "r2",
-                                      "{r3}", "r3",
-                                      "{r4}", "r4",
-                                      "{r5}", "r5",
-                                      "{r6}", "r6",
-                                      "{r7}", "r7",
-                                      "{r8}", "r8",
-                                      "{r9}", "r9",
-                                      "{r10}", "r10",
-                                      "{r11}", "r11",
-                                      "{r12}", "r12",
-                                      "{r13}", "r13",
-                                      "{r14}", "r14",
-                                      "{lr}", "lr",
-                                      "{sp}", "sp",
-                                      "{ip}", "ip",
-                                      "{fp}", "fp",
-                                      "{sl}", "sl",
-                                      "{memory}", "memory",
-                                      "{cc}", "cc",
-                                      0,0};
+  "{r0}", "r0",
+  "{r1}", "r1",
+  "{r2}", "r2",
+  "{r3}", "r3",
+  "{r4}", "r4",
+  "{r5}", "r5",
+  "{r6}", "r6",
+  "{r7}", "r7",
+  "{r8}", "r8",
+  "{r9}", "r9",
+  "{r10}", "r10",
+  "{r11}", "r11",
+  "{r12}", "r12",
+  "{r13}", "r13",
+  "{r14}", "r14",
+  "{lr}", "lr",
+  "{sp}", "sp",
+  "{ip}", "ip",
+  "{fp}", "fp",
+  "{sl}", "sl",
+  "{memory}", "memory",
+  "{cc}", "cc",
+  0,0
+};
 
 ARMDarwinTargetAsmInfo::ARMDarwinTargetAsmInfo(const ARMTargetMachine &TM):
   ARMTargetAsmInfo<DarwinTargetAsmInfo>(TM) {
   Subtarget = &TM.getSubtarget<ARMSubtarget>();
 
-  GlobalPrefix = "_";
-  PrivateGlobalPrefix = "L";
-  LessPrivateGlobalPrefix = "l";
-  StringConstantPrefix = "\1LC";
-  BSSSection = 0;                       // no BSS section
   ZeroDirective = "\t.space\t";
   ZeroFillDirective = "\t.zerofill\t";  // Uses .zerofill
   SetDirective = "\t.set\t";
-  WeakRefDirective = "\t.weak_reference\t";
-  WeakDefDirective = "\t.weak_definition ";
-  HiddenDirective = "\t.private_extern\t";
   ProtectedDirective = NULL;
-  JumpTableDataSection = ".const";
-  CStringSection = "\t.cstring";
   HasDotTypeDotSizeDirective = false;
-  HasSingleParameterDotFile = false;
-  NeedsIndirectEncoding = true;
-  if (TM.getRelocationModel() == Reloc::Static) {
-    StaticCtorsSection = ".constructor";
-    StaticDtorsSection = ".destructor";
-  } else {
-    StaticCtorsSection = ".mod_init_func";
-    StaticDtorsSection = ".mod_term_func";
-  }
-
-  // In non-PIC modes, emit a special label before jump tables so that the
-  // linker can perform more accurate dead code stripping.
-  if (TM.getRelocationModel() != Reloc::PIC_) {
-    // Emit a local label that is preserved until the linker runs.
-    JumpTableSpecialLabelPrefix = "l";
-  }
-
-  NeedsSet = true;
-  DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug";
-  DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug";
-  DwarfLineSection = ".section __DWARF,__debug_line,regular,debug";
-  DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug";
-  DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug";
-  DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug";
-  DwarfStrSection = ".section __DWARF,__debug_str,regular,debug";
-  DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug";
-  DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug";
-  DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug";
-  DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug";
+  SupportsDebugInformation = true;
 }
 
 ARMELFTargetAsmInfo::ARMELFTargetAsmInfo(const ARMTargetMachine &TM):
@@ -115,7 +77,7 @@ ARMELFTargetAsmInfo::ARMELFTargetAsmInfo(const ARMTargetMachine &TM):
   DwarfLocSection =     "\t.section\t.debug_loc,\"\",%progbits";
   DwarfARangesSection = "\t.section\t.debug_aranges,\"\",%progbits";
   DwarfRangesSection =  "\t.section\t.debug_ranges,\"\",%progbits";
-  DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",%progbits";
+  DwarfMacroInfoSection = "\t.section\t.debug_macinfo,\"\",%progbits";
 
   if (Subtarget->isAAPCS_ABI()) {
     StaticCtorsSection = "\t.section .init_array,\"aw\",%init_array";
@@ -124,6 +86,7 @@ ARMELFTargetAsmInfo::ARMELFTargetAsmInfo(const ARMTargetMachine &TM):
     StaticCtorsSection = "\t.section .ctors,\"aw\",%progbits";
     StaticDtorsSection = "\t.section .dtors,\"aw\",%progbits";
   }
+  SupportsDebugInformation = true;
 }
 
 /// Count the number of comma-separated arguments.
diff --git a/lib/Target/ARM/ARMTargetAsmInfo.h b/lib/Target/ARM/ARMTargetAsmInfo.h
index 9e6f8568f76a..683692f00aab 100644
--- a/lib/Target/ARM/ARMTargetAsmInfo.h
+++ b/lib/Target/ARM/ARMTargetAsmInfo.h
@@ -26,8 +26,7 @@ namespace llvm {
 
   template <class BaseTAI>
   struct ARMTargetAsmInfo : public BaseTAI {
-    explicit ARMTargetAsmInfo(const ARMTargetMachine &TM):
-      BaseTAI(TM) {
+    explicit ARMTargetAsmInfo(const ARMTargetMachine &TM) : BaseTAI(TM) {
       BaseTAI::AsmTransCBE = arm_asm_table;
 
       BaseTAI::AlignmentIsInBytes = false;
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 7033907bbb01..8006b9be32eb 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -23,9 +23,6 @@
 #include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
-static cl::opt<bool>
-EnablePreLdStOpti("arm-pre-alloc-loadstore-opti", cl::Hidden,
-                  cl::desc("Enable pre-regalloc load store optimization pass"));
 static cl::opt<bool> DisableLdStOpti("disable-arm-loadstore-opti", cl::Hidden,
                               cl::desc("Disable load store optimization pass"));
 static cl::opt<bool> DisableIfConversion("disable-arm-if-conversion",cl::Hidden,
@@ -42,6 +39,11 @@ int ARMTargetMachineModule = 0;
 static RegisterTarget<ARMTargetMachine>   X("arm",   "ARM");
 static RegisterTarget<ThumbTargetMachine> Y("thumb", "Thumb");
 
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeARMTarget() { }
+}
+
 // No assembler printer by default
 ARMTargetMachine::AsmPrinterCtorFn ARMTargetMachine::AsmPrinterCtor = 0;
 
@@ -97,7 +99,8 @@ ARMTargetMachine::ARMTargetMachine(const Module &M, const std::string &FS,
     InstrInfo(Subtarget),
     FrameInfo(Subtarget),
     JITInfo(),
-    TLInfo(*this) {
+    TLInfo(*this),
+    InstrItins(Subtarget.getInstrItineraryData()) {
   DefRelocModel = getRelocationModel();
 }
 
@@ -149,8 +152,6 @@ bool ARMTargetMachine::addInstSelector(PassManagerBase &PM,
 
 bool ARMTargetMachine::addPreRegAlloc(PassManagerBase &PM,
                                       CodeGenOpt::Level OptLevel) {
-  if (!EnablePreLdStOpti)
-    return false;
   // FIXME: temporarily disabling load / store optimization pass for Thumb mode.
   if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && !Subtarget.isThumb())
     PM.add(createARMLoadStoreOptimizationPass(true));
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 7192c1bb6184..c4c8e6c1d985 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -28,13 +28,14 @@ namespace llvm {
 class Module;
 
 class ARMTargetMachine : public LLVMTargetMachine {
-  ARMSubtarget      Subtarget;
-  const TargetData  DataLayout;       // Calculates type size & alignment
-  ARMInstrInfo      InstrInfo;
-  ARMFrameInfo      FrameInfo;
-  ARMJITInfo        JITInfo;
-  ARMTargetLowering TLInfo;
-  Reloc::Model      DefRelocModel;    // Reloc model before it's overridden.
+  ARMSubtarget        Subtarget;
+  const TargetData    DataLayout;       // Calculates type size & alignment
+  ARMInstrInfo        InstrInfo;
+  ARMFrameInfo        FrameInfo;
+  ARMJITInfo          JITInfo;
+  ARMTargetLowering   TLInfo;
+  InstrItineraryData  InstrItins;
+  Reloc::Model        DefRelocModel;    // Reloc model before it's overridden.
 
 protected:
   // To avoid having target depend on the asmprinter stuff libraries, asmprinter
@@ -59,6 +60,9 @@ public:
   virtual       ARMTargetLowering *getTargetLowering() const {
     return const_cast<ARMTargetLowering*>(&TLInfo);
   }
+  virtual const InstrItineraryData getInstrItineraryData() const {  
+    return InstrItins;
+  }
 
   static void registerAsmPrinter(AsmPrinterCtorFn F) {
     AsmPrinterCtor = F;
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index d908cf436313..948a10070d47 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -45,7 +45,6 @@ STATISTIC(EmittedInsts, "Number of machine instrs printed");
 namespace {
   class VISIBILITY_HIDDEN ARMAsmPrinter : public AsmPrinter {
     DwarfWriter *DW;
-    MachineModuleInfo *MMI;
 
     /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
     /// make the right decision when printing asm code for different targets.
@@ -84,7 +83,7 @@ namespace {
     explicit ARMAsmPrinter(raw_ostream &O, TargetMachine &TM,
                            const TargetAsmInfo *T, CodeGenOpt::Level OL,
                            bool V)
-      : AsmPrinter(O, TM, T, OL, V), DW(0), MMI(NULL), AFI(NULL), MCP(NULL),
+      : AsmPrinter(O, TM, T, OL, V), DW(0), AFI(NULL), MCP(NULL),
         InCPMode(false) {
       Subtarget = &TM.getSubtarget<ARMSubtarget>();
     }
@@ -97,6 +96,7 @@ namespace {
                       const char *Modifier = 0);
     void printSOImmOperand(const MachineInstr *MI, int opNum);
     void printSOImm2PartOperand(const MachineInstr *MI, int opNum);
+    void printSOOperand(const MachineInstr *MI, int OpNum);
     void printSORegOperand(const MachineInstr *MI, int opNum);
     void printAddrMode2Operand(const MachineInstr *MI, int OpNo);
     void printAddrMode2OffsetOperand(const MachineInstr *MI, int OpNo);
@@ -396,6 +396,28 @@ void ARMAsmPrinter::printSOImm2PartOperand(const MachineInstr *MI, int OpNum) {
   printSOImm(O, ARM_AM::getSOImmVal(V2), VerboseAsm, TAI);
 }
 
+// Constant shifts so_reg is a 3-operand unit corresponding to register forms of
+// the A5.1 "Addressing Mode 1 - Data-processing operands" forms.  This
+// includes:
+// REG 0 - e.g. R5
+// REG IMM, SH_OPC - e.g. R5, LSL #3
+void ARMAsmPrinter::printSOOperand(const MachineInstr *MI, int OpNum) {
+  const MachineOperand &MO1 = MI->getOperand(OpNum);
+  const MachineOperand &MO2 = MI->getOperand(OpNum+1);
+
+  unsigned Reg = MO1.getReg();
+  assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+  O << TM.getRegisterInfo()->getAsmName(Reg);
+
+  // Print the shift opc.
+  O << ", "
+    << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm()))
+    << " ";
+
+  assert(MO2.isImm() && "Not a valid t2_so_reg value!");
+  O << "#" << ARM_AM::getSORegOffset(MO2.getImm());
+}
+
 // so_reg is a 4-operand unit corresponding to register forms of the A5.1
 // "Addressing Mode 1 - Data-processing operands" forms.  This includes:
 //    REG 0   0    - e.g. R5
@@ -805,17 +827,11 @@ void ARMAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
 bool ARMAsmPrinter::doInitialization(Module &M) {
 
   bool Result = AsmPrinter::doInitialization(M);
-
-  // Emit initial debug information.
-  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
-  assert(MMI);
   DW = getAnalysisIfAvailable<DwarfWriter>();
-  assert(DW && "Dwarf Writer is not available");
-  DW->BeginModule(&M, MMI, O, this, TAI);
 
-  // Darwin wants symbols to be quoted if they have complex names.
-  if (Subtarget->isTargetDarwin())
-    Mang->setUseQuotes(true);
+  // Thumb-2 instructions are supported only in unified assembler syntax mode.
+  if (Subtarget->hasThumb2())
+    O << "\t.syntax unified\n";
 
   // Emit ARM Build Attributes
   if (Subtarget->isTargetELF()) {
@@ -1115,3 +1131,9 @@ namespace {
     }
   } Registrator;
 }
+
+// Force static initialization when called from
+// llvm/InitializeAllAsmPrinters.h
+namespace llvm {
+  void InitializeARMAsmPrinter() { }
+}
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
index 7ed8ef60302d..1be171353573 100644
--- a/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -39,7 +39,7 @@ static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
 
 AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) : TargetLowering(TM) {
   // Set up the TargetLowering object.
-  //I am having problems with shr n ubyte 1
+  //I am having problems with shr n i8 1
   setShiftAmountType(MVT::i64);
   setBooleanContents(ZeroOrOneBooleanContent);
   
diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp
index 4c830541f16a..cdd4fa4b2445 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.cpp
+++ b/lib/Target/Alpha/AlphaTargetMachine.cpp
@@ -21,16 +21,17 @@
 
 using namespace llvm;
 
-/// AlphaTargetMachineModule - Note that this is used on hosts that cannot link
-/// in a library unless there are references into the library.  In particular,
-/// it seems that it is not possible to get things to work on Win32 without
-/// this.  Though it is unused, do not remove it.
-extern "C" int AlphaTargetMachineModule;
-int AlphaTargetMachineModule = 0;
-
 // Register the targets
 static RegisterTarget<AlphaTargetMachine> X("alpha", "Alpha [experimental]");
 
+// No assembler printer by default
+AlphaTargetMachine::AsmPrinterCtorFn AlphaTargetMachine::AsmPrinterCtor = 0;
+
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeAlphaTarget() { }
+}
+
 const TargetAsmInfo *AlphaTargetMachine::createTargetAsmInfo() const {
   return new AlphaTargetAsmInfo(*this);
 }
@@ -92,23 +93,32 @@ bool AlphaTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
                                             bool Verbose,
                                             raw_ostream &Out) {
   PM.add(createAlphaLLRPPass(*this));
-  PM.add(createAlphaCodePrinterPass(Out, *this, OptLevel, Verbose));
+  // Output assembly language.
+  assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+  if (AsmPrinterCtor)
+    PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
   return false;
 }
 bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM,
                                         CodeGenOpt::Level OptLevel,
                                         bool DumpAsm, MachineCodeEmitter &MCE) {
   PM.add(createAlphaCodeEmitterPass(*this, MCE));
-  if (DumpAsm)
-    PM.add(createAlphaCodePrinterPass(errs(), *this, OptLevel, true));
+  if (DumpAsm) {
+    assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+    if (AsmPrinterCtor)
+      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+  }
   return false;
 }
 bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM,
                                         CodeGenOpt::Level OptLevel,
                                         bool DumpAsm, JITCodeEmitter &JCE) {
   PM.add(createAlphaJITCodeEmitterPass(*this, JCE));
-  if (DumpAsm)
-    PM.add(createAlphaCodePrinterPass(errs(), *this, OptLevel, true));
+  if (DumpAsm) {
+    assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+    if (AsmPrinterCtor)
+      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+  }
   return false;
 }
 bool AlphaTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h
index 51224e80de70..946ca559ca04 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.h
+++ b/lib/Target/Alpha/AlphaTargetMachine.h
@@ -33,10 +33,18 @@ class AlphaTargetMachine : public LLVMTargetMachine {
   AlphaJITInfo JITInfo;
   AlphaSubtarget Subtarget;
   AlphaTargetLowering TLInfo;
-  
+
 protected:
   virtual const TargetAsmInfo *createTargetAsmInfo() const;
-  
+
+  // To avoid having target depend on the asmprinter stuff libraries, asmprinter
+  // set this functions to ctor pointer at startup time if they are linked in.
+  typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
+                                            TargetMachine &tm,
+                                            CodeGenOpt::Level OptLevel,
+                                            bool verbose);
+  static AsmPrinterCtorFn AsmPrinterCtor;
+
 public:
   AlphaTargetMachine(const Module &M, const std::string &FS);
 
@@ -46,7 +54,7 @@ public:
   virtual const AlphaRegisterInfo *getRegisterInfo() const {
     return &InstrInfo.getRegisterInfo();
   }
-  virtual AlphaTargetLowering* getTargetLowering() const { 
+  virtual AlphaTargetLowering* getTargetLowering() const {
     return const_cast<AlphaTargetLowering*>(&TLInfo);
   }
   virtual const TargetData       *getTargetData() const { return &DataLayout; }
@@ -56,7 +64,7 @@ public:
 
   static unsigned getJITMatchQuality();
   static unsigned getModuleMatchQuality(const Module &M);
-  
+
   // Pass Pipeline Configuration
   virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
@@ -75,6 +83,10 @@ public:
                                     CodeGenOpt::Level OptLevel,
                                     bool DumpAsm,
                                     JITCodeEmitter &JCE);
+
+  static void registerAsmPrinter(AsmPrinterCtorFn F) {
+    AsmPrinterCtor = F;
+  }
 };
 
 } // end namespace llvm
diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
index 74b48ee66235..7b73bb302c38 100644
--- a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
+++ b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
@@ -303,3 +303,17 @@ bool AlphaAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
   O << ")";
   return false;
 }
+
+// Force static initialization when called from
+// llvm/InitializeAllAsmPrinters.h
+namespace llvm {
+  void InitializeAlphaAsmPrinter() { }
+}
+
+namespace {
+  static struct Register {
+    Register() {
+      AlphaTargetMachine::registerAsmPrinter(createAlphaCodePrinterPass);
+    }
+  } Registrator;
+}
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index 5814d2750edc..c3554f6b5734 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -59,6 +59,11 @@ int CBackendTargetMachineModule = 0;
 // Register the target.
 static RegisterTarget<CTargetMachine> X("c", "C backend");
 
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeCBackendTarget() { }
+}
+
 namespace {
   /// CBackendNameAllUsedStructsAndMergeFunctions - This pass inserts names for
   /// any unnamed structure types that are used by the program, and merges
@@ -1449,6 +1454,17 @@ std::string CWriter::GetValueName(const Value *Operand) {
 /// writeInstComputationInline - Emit the computation for the specified
 /// instruction inline, with no destination provided.
 void CWriter::writeInstComputationInline(Instruction &I) {
+  // We can't currently support integer types other than 1, 8, 16, 32, 64.
+  // Validate this.
+  const Type *Ty = I.getType();
+  if (Ty->isInteger() && (Ty!=Type::Int1Ty && Ty!=Type::Int8Ty &&
+        Ty!=Type::Int16Ty && Ty!=Type::Int32Ty && Ty!=Type::Int64Ty)) {
+      cerr << "The C backend does not currently support integer "
+           << "types of widths other than 1, 8, 16, 32, 64.\n";
+      cerr << "This is being tracked as PR 4158.\n";
+      abort();
+  }
+
   // If this is a non-trivial bool computation, make sure to truncate down to
   // a 1 bit value.  This is important because we want "add i1 x, y" to return
   // "0" when x and y are true, not "2" for example.
diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
index da1bf074de96..26a8ece21cf7 100644
--- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
+++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
@@ -287,12 +287,11 @@ namespace {
   /// LinuxAsmPrinter - SPU assembly printer, customized for Linux
   class VISIBILITY_HIDDEN LinuxAsmPrinter : public SPUAsmPrinter {
     DwarfWriter *DW;
-    MachineModuleInfo *MMI;
   public:
     explicit LinuxAsmPrinter(raw_ostream &O, SPUTargetMachine &TM,
                              const TargetAsmInfo *T, CodeGenOpt::Level F,
                              bool V)
-      : SPUAsmPrinter(O, TM, T, F, V), DW(0), MMI(0) {}
+      : SPUAsmPrinter(O, TM, T, F, V), DW(0) {}
 
     virtual const char *getPassName() const {
       return "STI CBEA SPU Assembly Printer";
@@ -490,12 +489,8 @@ LinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF)
 
 bool LinuxAsmPrinter::doInitialization(Module &M) {
   bool Result = AsmPrinter::doInitialization(M);
-  SwitchToTextSection("\t.text");
-  // Emit initial debug information.
   DW = getAnalysisIfAvailable<DwarfWriter>();
-  assert(DW && "Dwarf Writer is not available");
-  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
-  DW->BeginModule(&M, MMI, O, this, TAI);
+  SwitchToTextSection("\t.text");
   return Result;
 }
 
@@ -621,3 +616,17 @@ FunctionPass *llvm::createSPUAsmPrinterPass(raw_ostream &o,
                                             bool verbose) {
   return new LinuxAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
 }
+
+// Force static initialization when called from
+// llvm/InitializeAllAsmPrinters.h
+namespace llvm {
+  void InitializeCellSPUAsmPrinter() { }
+}
+
+namespace {
+  static struct Register {
+    Register() {
+      SPUTargetMachine::registerAsmPrinter(createSPUAsmPrinterPass);
+    }
+  } Registrator;
+}
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index 864a914bba78..b28644378bda 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -249,6 +249,25 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
   setOperationAction(ISD::MUL,  MVT::i32,    Legal);
   setOperationAction(ISD::MUL,  MVT::i64,    Legal);
 
+  // Expand double-width multiplication
+  // FIXME: It would probably be reasonable to support some of these operations
+  setOperationAction(ISD::UMUL_LOHI, MVT::i8,  Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i8,  Expand);
+  setOperationAction(ISD::MULHU,     MVT::i8,  Expand);
+  setOperationAction(ISD::MULHS,     MVT::i8,  Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
+  setOperationAction(ISD::MULHU,     MVT::i16, Expand);
+  setOperationAction(ISD::MULHS,     MVT::i16, Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::MULHU,     MVT::i32, Expand);
+  setOperationAction(ISD::MULHS,     MVT::i32, Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+  setOperationAction(ISD::MULHU,     MVT::i64, Expand);
+  setOperationAction(ISD::MULHS,     MVT::i64, Expand);
+
   // Need to custom handle (some) common i8, i64 math ops
   setOperationAction(ISD::ADD,  MVT::i8,     Custom);
   setOperationAction(ISD::ADD,  MVT::i64,    Legal);
diff --git a/lib/Target/CellSPU/SPUTargetAsmInfo.cpp b/lib/Target/CellSPU/SPUTargetAsmInfo.cpp
index ff88ed810716..2868ff7592c2 100644
--- a/lib/Target/CellSPU/SPUTargetAsmInfo.cpp
+++ b/lib/Target/CellSPU/SPUTargetAsmInfo.cpp
@@ -41,7 +41,6 @@ SPULinuxTargetAsmInfo::SPULinuxTargetAsmInfo(const SPUTargetMachine &TM) :
 
   SupportsDebugInformation = true;
   NeedsSet = true;
-  SupportsMacInfoSection = false;
   DwarfAbbrevSection =  "\t.section        .debug_abbrev,\"\",@progbits";
   DwarfInfoSection =    "\t.section        .debug_info,\"\",@progbits";
   DwarfLineSection =    "\t.section        .debug_line,\"\",@progbits";
@@ -52,7 +51,7 @@ SPULinuxTargetAsmInfo::SPULinuxTargetAsmInfo(const SPUTargetMachine &TM) :
   DwarfLocSection =     "\t.section        .debug_loc,\"\",@progbits";
   DwarfARangesSection = "\t.section        .debug_aranges,\"\",@progbits";
   DwarfRangesSection =  "\t.section        .debug_ranges,\"\",@progbits";
-  DwarfMacInfoSection = "\t.section        .debug_macinfo,\"\",progbits";
+  DwarfMacroInfoSection = 0;  // macro info not supported.
 
   // Exception handling is not supported on CellSPU (think about it: you only
   // have 256K for code+data. Would you support exception handling?)
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
index 7fa902243c7b..c675ebb09a61 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -23,20 +23,20 @@
 
 using namespace llvm;
 
-/// CellSPUTargetMachineModule - Note that this is used on hosts that
-/// cannot link in a library unless there are references into the
-/// library.  In particular, it seems that it is not possible to get
-/// things to work on Win32 without this.  Though it is unused, do not
-/// remove it.
-extern "C" int CellSPUTargetMachineModule;
-int CellSPUTargetMachineModule = 0;
-
 namespace {
   // Register the targets
   RegisterTarget<SPUTargetMachine>
   CELLSPU("cellspu", "STI CBEA Cell SPU [experimental]");
 }
 
+// No assembler printer by default
+SPUTargetMachine::AsmPrinterCtorFn SPUTargetMachine::AsmPrinterCtor = 0;
+
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeCellSPUTarget() { }
+}
+
 const std::pair<unsigned, int> *
 SPUFrameInfo::getCalleeSaveSpillSlots(unsigned &NumEntries) const {
   NumEntries = 1;
@@ -93,6 +93,9 @@ bool SPUTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
                                           CodeGenOpt::Level OptLevel,
                                           bool Verbose,
                                           raw_ostream &Out) {
-  PM.add(createSPUAsmPrinterPass(Out, *this, OptLevel, Verbose));
+  // Output assembly language.
+  assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+  if (AsmPrinterCtor)
+    PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
   return false;
 }
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
index cd3920333851..d8fe3000d980 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.h
+++ b/lib/Target/CellSPU/SPUTargetMachine.h
@@ -35,10 +35,18 @@ class SPUTargetMachine : public LLVMTargetMachine {
   SPUFrameInfo        FrameInfo;
   SPUTargetLowering   TLInfo;
   InstrItineraryData  InstrItins;
-  
+
 protected:
   virtual const TargetAsmInfo *createTargetAsmInfo() const;
-  
+
+  // To avoid having target depend on the asmprinter stuff libraries, asmprinter
+  // set this functions to ctor pointer at startup time if they are linked in.
+  typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
+                                            SPUTargetMachine &tm,
+                                            CodeGenOpt::Level OptLevel,
+                                            bool verbose);
+  static AsmPrinterCtorFn AsmPrinterCtor;
+
 public:
   SPUTargetMachine(const Module &M, const std::string &FS);
 
@@ -78,7 +86,7 @@ public:
     return &DataLayout;
   }
 
-  virtual const InstrItineraryData getInstrItineraryData() const {  
+  virtual const InstrItineraryData getInstrItineraryData() const {
     return InstrItins;
   }
   
@@ -88,6 +96,10 @@ public:
   virtual bool addAssemblyEmitter(PassManagerBase &PM,
                                   CodeGenOpt::Level OptLevel,
                                   bool Verbose, raw_ostream &Out);
+
+  static void registerAsmPrinter(AsmPrinterCtorFn F) {
+    AsmPrinterCtor = F;
+  }
 };
 
 } // end namespace llvm
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 04a6829d9cf6..1feea96e3d9d 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -82,6 +82,11 @@ int CppBackendTargetMachineModule = 0;
 // Register the target.
 static RegisterTarget<CPPTargetMachine> X("cpp", "C++ backend");
 
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeCppBackendTarget() { }
+}
+
 namespace {
   typedef std::vector<const Type*> TypeList;
   typedef std::map<const Type*,std::string> TypeMap;
diff --git a/lib/Target/DarwinTargetAsmInfo.cpp b/lib/Target/DarwinTargetAsmInfo.cpp
index 05d235177642..d7d675abf255 100644
--- a/lib/Target/DarwinTargetAsmInfo.cpp
+++ b/lib/Target/DarwinTargetAsmInfo.cpp
@@ -50,6 +50,53 @@ DarwinTargetAsmInfo::DarwinTargetAsmInfo(const TargetMachine &TM)
   ConstDataSection = getUnnamedSection(".const_data", SectionFlags::None);
   DataCoalSection = getNamedSection("\t__DATA,__datacoal_nt,coalesced",
                                     SectionFlags::Writeable);
+    
+  
+  // Common settings for all Darwin targets.
+  // Syntax:
+  GlobalPrefix = "_";
+  PrivateGlobalPrefix = "L";
+  LessPrivateGlobalPrefix = "l";  // Marker for some ObjC metadata
+  StringConstantPrefix = "\1LC";
+  NeedsSet = true;
+  NeedsIndirectEncoding = true;
+  AllowQuotesInName = true;
+  HasSingleParameterDotFile = false;
+
+  // In non-PIC modes, emit a special label before jump tables so that the
+  // linker can perform more accurate dead code stripping.  We do not check the
+  // relocation model here since it can be overridden later.
+  JumpTableSpecialLabelPrefix = "l";
+    
+  // Directives:
+  WeakDefDirective = "\t.weak_definition ";
+  WeakRefDirective = "\t.weak_reference ";
+  HiddenDirective = "\t.private_extern ";
+    
+  // Sections:
+  CStringSection = "\t.cstring";
+  JumpTableDataSection = "\t.const\n";
+  BSSSection = 0;
+
+  if (TM.getRelocationModel() == Reloc::Static) {
+    StaticCtorsSection = ".constructor";
+    StaticDtorsSection = ".destructor";
+  } else {
+    StaticCtorsSection = ".mod_init_func";
+    StaticDtorsSection = ".mod_term_func";
+  }
+    
+  DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug";
+  DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug";
+  DwarfLineSection = ".section __DWARF,__debug_line,regular,debug";
+  DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug";
+  DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug";
+  DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug";
+  DwarfStrSection = ".section __DWARF,__debug_str,regular,debug";
+  DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug";
+  DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug";
+  DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug";
+  DwarfMacroInfoSection = ".section __DWARF,__debug_macinfo,regular,debug";
 }
 
 /// emitUsedDirectiveFor - On Darwin, internally linked data beginning with
diff --git a/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp b/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp
index fc54e23a44d7..662c667037fc 100644
--- a/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp
+++ b/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp
@@ -374,3 +374,18 @@ FunctionPass *llvm::createIA64CodePrinterPass(raw_ostream &o,
                                               bool verbose) {
   return new IA64AsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
 }
+
+namespace {
+  static struct Register {
+    Register() {
+      IA64TargetMachine::registerAsmPrinter(createIA64CodePrinterPass);
+    }
+  } Registrator;
+}
+
+
+// Force static initialization when called from
+// llvm/InitializeAllAsmPrinters.h
+namespace llvm {
+  void InitializeIA64AsmPrinter() { }
+}
diff --git a/lib/Target/IA64/IA64ISelLowering.cpp b/lib/Target/IA64/IA64ISelLowering.cpp
index 34a0686564c0..c545b9c0eb1f 100644
--- a/lib/Target/IA64/IA64ISelLowering.cpp
+++ b/lib/Target/IA64/IA64ISelLowering.cpp
@@ -107,6 +107,10 @@ IA64TargetLowering::IA64TargetLowering(TargetMachine &TM)
   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
   setOperationAction(ISD::VAARG             , MVT::Other, Custom);
   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
+
+  // FIXME: These should be legal
+  setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand);
+  setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand);
   
   // Use the default implementation.
   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
diff --git a/lib/Target/IA64/IA64TargetMachine.cpp b/lib/Target/IA64/IA64TargetMachine.cpp
index 878a00a44518..0b93ee5c4ae8 100644
--- a/lib/Target/IA64/IA64TargetMachine.cpp
+++ b/lib/Target/IA64/IA64TargetMachine.cpp
@@ -19,16 +19,18 @@
 #include "llvm/Target/TargetMachineRegistry.h"
 using namespace llvm;
 
-/// IA64TargetMachineModule - Note that this is used on hosts that cannot link
-/// in a library unless there are references into the library.  In particular,
-/// it seems that it is not possible to get things to work on Win32 without
-/// this.  Though it is unused, do not remove it.
-extern "C" int IA64TargetMachineModule;
-int IA64TargetMachineModule = 0;
-
-static RegisterTarget<IA64TargetMachine> X("ia64", 
+// Register the target
+static RegisterTarget<IA64TargetMachine> X("ia64",
                                            "IA-64 (Itanium) [experimental]");
 
+// No assembler printer by default
+IA64TargetMachine::AsmPrinterCtorFn IA64TargetMachine::AsmPrinterCtor = 0;
+
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeIA64Target() { }
+}
+
 const TargetAsmInfo *IA64TargetMachine::createTargetAsmInfo() const {
   return new IA64TargetAsmInfo(*this);
 }
@@ -88,7 +90,10 @@ bool IA64TargetMachine::addAssemblyEmitter(PassManagerBase &PM,
                                            CodeGenOpt::Level OptLevel,
                                            bool Verbose,
                                            raw_ostream &Out) {
-  PM.add(createIA64CodePrinterPass(Out, *this, OptLevel, Verbose));
+  // Output assembly language.
+  assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+  if (AsmPrinterCtor)
+    PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
   return false;
 }
 
diff --git a/lib/Target/IA64/IA64TargetMachine.h b/lib/Target/IA64/IA64TargetMachine.h
index 29d625ce673a..a64da9fca2b8 100644
--- a/lib/Target/IA64/IA64TargetMachine.h
+++ b/lib/Target/IA64/IA64TargetMachine.h
@@ -30,24 +30,32 @@ class IA64TargetMachine : public LLVMTargetMachine {
   TargetFrameInfo    FrameInfo;
   //IA64JITInfo      JITInfo;
   IA64TargetLowering TLInfo;
-  
+
 protected:
   virtual const TargetAsmInfo *createTargetAsmInfo() const;
 
+  // To avoid having target depend on the asmprinter stuff libraries, asmprinter
+  // set this functions to ctor pointer at startup time if they are linked in.
+  typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
+                                            IA64TargetMachine &tm,
+                                            CodeGenOpt::Level OptLevel,
+                                            bool verbose);
+  static AsmPrinterCtorFn AsmPrinterCtor;
+
 public:
   IA64TargetMachine(const Module &M, const std::string &FS);
 
   virtual const IA64InstrInfo      *getInstrInfo() const { return &InstrInfo; }
   virtual const TargetFrameInfo    *getFrameInfo() const { return &FrameInfo; }
   virtual const IA64Subtarget  *getSubtargetImpl() const { return &Subtarget; }
-  virtual       IA64TargetLowering *getTargetLowering() const { 
+  virtual       IA64TargetLowering *getTargetLowering() const {
     return const_cast<IA64TargetLowering*>(&TLInfo);
   }
   virtual const IA64RegisterInfo   *getRegisterInfo() const {
     return &InstrInfo.getRegisterInfo();
   }
   virtual const TargetData       *getTargetData() const { return &DataLayout; }
-  
+
   static unsigned getModuleMatchQuality(const Module &M);
 
   // Pass Pipeline Configuration
@@ -56,6 +64,10 @@ public:
   virtual bool addAssemblyEmitter(PassManagerBase &PM,
                                   CodeGenOpt::Level OptLevel,
                                   bool Verbose, raw_ostream &Out);
+
+  static void registerAsmPrinter(AsmPrinterCtorFn F) {
+    AsmPrinterCtor = F;
+  }
 };
 } // End llvm namespace
 
diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp
index 37e5b1eccde4..0aff14fee26c 100644
--- a/lib/Target/MSIL/MSILWriter.cpp
+++ b/lib/Target/MSIL/MSILWriter.cpp
@@ -55,6 +55,11 @@ int MSILTargetMachineModule = 0;
 
 static RegisterTarget<MSILTarget> X("msil", "MSIL backend");
 
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeMSILTarget() { }
+}
+
 bool MSILModule::runOnModule(Module &M) {
   ModulePtr = &M;
   TD = &getAnalysis<TargetData>();
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index 78869463f3a2..0f5244d666fc 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -35,6 +35,11 @@ int MSP430TargetMachineModule = 0;
 static RegisterTarget<MSP430TargetMachine>
 X("msp430", "MSP430 [experimental]");
 
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeMSP430Target() { }
+}
+
 MSP430TargetMachine::MSP430TargetMachine(const Module &M,
                                          const std::string &FS) :
   Subtarget(*this, M, FS),
diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
index dfb62382e75d..077ec96d17b7 100644
--- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
@@ -578,3 +578,17 @@ doFinalization(Module &M)
 
   return AsmPrinter::doFinalization(M);
 }
+
+namespace {
+  static struct Register {
+    Register() {
+      MipsTargetMachine::registerAsmPrinter(createMipsCodePrinterPass);
+    }
+  } Registrator;
+}
+
+// Force static initialization when called from
+// llvm/InitializeAllAsmPrinters.h
+namespace llvm {
+  void InitializeMipsAsmPrinter() { }
+}
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 4517cfc96a43..42afcebabec7 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -95,6 +95,7 @@ MipsTargetLowering(MipsTargetMachine &TM): TargetLowering(TM)
   setOperationAction(ISD::JumpTable,          MVT::i32,   Custom);
   setOperationAction(ISD::ConstantPool,       MVT::i32,   Custom);
   setOperationAction(ISD::SELECT,             MVT::f32,   Custom);
+  setOperationAction(ISD::SELECT,             MVT::f64,   Custom);
   setOperationAction(ISD::SELECT,             MVT::i32,   Custom);
   setOperationAction(ISD::SETCC,              MVT::f32,   Custom);
   setOperationAction(ISD::SETCC,              MVT::f64,   Custom);
@@ -122,6 +123,7 @@ MipsTargetLowering(MipsTargetMachine &TM): TargetLowering(TM)
   setOperationAction(ISD::SRA_PARTS,         MVT::i32,   Expand);
   setOperationAction(ISD::SRL_PARTS,         MVT::i32,   Expand);
   setOperationAction(ISD::FCOPYSIGN,         MVT::f32,   Expand);
+  setOperationAction(ISD::FCOPYSIGN,         MVT::f64,   Expand);
 
   // We don't have line number support yet.
   setOperationAction(ISD::DBG_STOPPOINT,     MVT::Other, Expand);
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index ef524e3ecd72..83b9b62e8ee8 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -31,6 +31,14 @@ int MipsTargetMachineModule = 0;
 static RegisterTarget<MipsTargetMachine>    X("mips", "Mips");
 static RegisterTarget<MipselTargetMachine>  Y("mipsel", "Mipsel");
 
+MipsTargetMachine::AsmPrinterCtorFn MipsTargetMachine::AsmPrinterCtor = 0;
+
+
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeMipsTarget() { }
+}
+
 const TargetAsmInfo *MipsTargetMachine::
 createTargetAsmInfo() const 
 {
@@ -125,9 +133,9 @@ addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
 // true if AssemblyEmitter is supported
 bool MipsTargetMachine::
 addAssemblyEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, 
-                   bool Verbose, raw_ostream &Out) 
-{
+                   bool Verbose, raw_ostream &Out)  {
   // Output assembly language.
-  PM.add(createMipsCodePrinterPass(Out, *this, OptLevel, Verbose));
+  assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+  PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
   return false;
 }
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index a9e1df27ae7f..85fafadde7c4 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -33,10 +33,23 @@ namespace llvm {
   
   protected:
     virtual const TargetAsmInfo *createTargetAsmInfo() const;
-  
+  protected:
+    // To avoid having target depend on the asmprinter stuff libraries,
+    // asmprinter set this functions to ctor pointer at startup time if they are
+    // linked in.
+    typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
+                                              MipsTargetMachine &tm,
+                                              CodeGenOpt::Level OptLevel,
+                                              bool verbose);
+    static AsmPrinterCtorFn AsmPrinterCtor;
+    
   public:
     MipsTargetMachine(const Module &M, const std::string &FS, bool isLittle);
 
+    static void registerAsmPrinter(AsmPrinterCtorFn F) {
+      AsmPrinterCtor = F;
+    }
+    
     virtual const MipsInstrInfo   *getInstrInfo()     const 
     { return &InstrInfo; }
     virtual const TargetFrameInfo *getFrameInfo()     const 
diff --git a/lib/Target/PIC16/PIC16AsmPrinter.cpp b/lib/Target/PIC16/PIC16AsmPrinter.cpp
index f9a880126500..ca1089b6df08 100644
--- a/lib/Target/PIC16/PIC16AsmPrinter.cpp
+++ b/lib/Target/PIC16/PIC16AsmPrinter.cpp
@@ -48,27 +48,10 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   const Function *F = MF.getFunction();
   CurrentFnName = Mang->getValueName(F);
 
-  // Iterate over the first basic block instructions to find if it has a
-  // DebugLoc. If so emit .file directive. Instructions such as movlw do not
-  // have valid DebugLoc, so need to iterate over instructions.
-  MachineFunction::const_iterator I = MF.begin();
-  for (MachineBasicBlock::const_iterator MBBI = I->begin(), E = I->end();
-       MBBI != E; MBBI++) {
-    const DebugLoc DLoc = MBBI->getDebugLoc();
-    if (!DLoc.isUnknown()) {
-      GlobalVariable *CU = MF.getDebugLocTuple(DLoc).CompileUnit;
-      unsigned line = MF.getDebugLocTuple(DLoc).Line;
-      DbgInfo.EmitFileDirective(CU);
-      DbgInfo.SetFunctBeginLine(line);
-      break;
-    }
-  }
-
   // Emit the function frame (args and temps).
   EmitFunctionFrame(MF);
 
-  // Emit function begin debug directive.
-  DbgInfo.EmitFunctBeginDI(F);
+  DbgInfo.BeginFunction(MF);
 
   // Emit the autos section of function.
   EmitAutos(CurrentFnName);
@@ -89,9 +72,7 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   // Emit function start label.
   O << CurrentFnName << ":\n";
 
-  // For emitting line directives, we need to keep track of the current
-  // source line. When it changes then only emit the line directive.
-  unsigned CurLine = 0;
+  DebugLoc CurDL;
   O << "\n"; 
   // Print out code for the function.
   for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
@@ -109,12 +90,9 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 
       // Emit the line directive if source line changed.
       const DebugLoc DL = II->getDebugLoc();
-      if (!DL.isUnknown()) {
-        unsigned line = MF.getDebugLocTuple(DL).Line;
-        if (line != CurLine) {
-          O << "\t.line " << line << "\n";
-          CurLine = line;
-        }
+      if (!DL.isUnknown() && DL != CurDL) {
+        DbgInfo.ChangeDebugLoc(MF, DL);
+        CurDL = DL;
       }
         
       // Print the assembly for the instruction.
@@ -123,7 +101,7 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   }
   
   // Emit function end debug directives.
-  DbgInfo.EmitFunctEndDI(F, CurLine);
+  DbgInfo.EndFunction(MF);
 
   return false;  // we didn't modify anything.
 }
@@ -226,7 +204,7 @@ bool PIC16AsmPrinter::doInitialization (Module &M) {
     I->setSection(TAI->SectionForGlobal(I)->getName());
   }
 
-  DbgInfo.Init(M);
+  DbgInfo.BeginModule(M);
   EmitFunctionDecls(M);
   EmitUndefinedVars(M);
   EmitDefinedVars(M);
@@ -313,8 +291,7 @@ void PIC16AsmPrinter::EmitRomData (Module &M)
 bool PIC16AsmPrinter::doFinalization(Module &M) {
   printLibcallDecls();
   EmitRemainingAutos();
-  DbgInfo.EmitVarDebugInfo(M);
-  DbgInfo.EmitEOF();
+  DbgInfo.EndModule(M);
   O << "\n\t" << "END\n";
   bool Result = AsmPrinter::doFinalization(M);
   return Result;
diff --git a/lib/Target/PIC16/PIC16AsmPrinter.h b/lib/Target/PIC16/PIC16AsmPrinter.h
index 8bdcf7258efa..3ec565965496 100644
--- a/lib/Target/PIC16/PIC16AsmPrinter.h
+++ b/lib/Target/PIC16/PIC16AsmPrinter.h
@@ -32,7 +32,7 @@ namespace llvm {
     explicit PIC16AsmPrinter(raw_ostream &O, PIC16TargetMachine &TM,
                              const TargetAsmInfo *T, CodeGenOpt::Level OL,
                              bool V)
-      : AsmPrinter(O, TM, T, OL, V), DbgInfo(O,T) {
+      : AsmPrinter(O, TM, T, OL, V), DbgInfo(O, T) {
       PTLI = TM.getTargetLowering();
       PTAI = static_cast<const PIC16TargetAsmInfo *> (T);
     }
diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp
index d7ebea7fa9db..27551cd13d84 100644
--- a/lib/Target/PIC16/PIC16DebugInfo.cpp
+++ b/lib/Target/PIC16/PIC16DebugInfo.cpp
@@ -14,91 +14,23 @@
 #include "PIC16.h"
 #include "PIC16DebugInfo.h" 
 #include "llvm/GlobalVariable.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Support/DebugLoc.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
-void PIC16DbgInfo::PopulateDebugInfo(DIType Ty, unsigned short &TypeNo,
-                                     bool &HasAux, int Aux[], 
-                                     std::string &TypeName) {
-  if (Ty.isBasicType(Ty.getTag())) {
-    std::string Name = "";
-    Ty.getName(Name);
-    unsigned short BaseTy = GetTypeDebugNumber(Name);
-    TypeNo = TypeNo << PIC16Dbg::S_BASIC;
-    TypeNo = TypeNo | (0xffff & BaseTy);
-  }
-  else if (Ty.isDerivedType(Ty.getTag())) {
-    switch(Ty.getTag())
-    {
-      case dwarf::DW_TAG_pointer_type:
-        TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
-        TypeNo = TypeNo | PIC16Dbg::DT_PTR;
-        break;
-      default:
-        TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
-    }
-    DIType BaseType = DIDerivedType(Ty.getGV()).getTypeDerivedFrom();
-    PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TypeName);
-  }
-  else if (Ty.isCompositeType(Ty.getTag())) {
-    switch (Ty.getTag()) {
-      case dwarf::DW_TAG_array_type: {
-        DICompositeType CTy = DICompositeType(Ty.getGV());
-        DIArray Elements = CTy.getTypeArray();
-        unsigned short size = 1;
-        unsigned short Dimension[4]={0,0,0,0};
-        for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
-          DIDescriptor Element = Elements.getElement(i);
-          if (Element.getTag() == dwarf::DW_TAG_subrange_type) {
-            TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
-            TypeNo = TypeNo | PIC16Dbg::DT_ARY;
-            DISubrange SubRange = DISubrange(Element.getGV());
-            Dimension[i] = SubRange.getHi() - SubRange.getLo() + 1;
-            // Each dimension is represented by 2 bytes starting at byte 9.
-            Aux[8+i*2+0] = Dimension[i];
-            Aux[8+i*2+1] = Dimension[i] >> 8;
-            size = size * Dimension[i];
-          }
-        }
-        HasAux = true;
-        // In auxillary entry for array, 7th and 8th byte represent array size.
-        Aux[6] = size & 0xff;
-        Aux[7] = size >> 8;
-        DIType BaseType = CTy.getTypeDerivedFrom();
-        PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TypeName);
-
-        break;
-      }
-      case dwarf:: DW_TAG_union_type:
-      case dwarf::DW_TAG_structure_type: {
-        DICompositeType CTy = DICompositeType(Ty.getGV());
-        TypeNo = TypeNo << PIC16Dbg::S_BASIC;
-        if (Ty.getTag() == dwarf::DW_TAG_structure_type)
-          TypeNo = TypeNo | PIC16Dbg::T_STRUCT;
-        else
-          TypeNo = TypeNo | PIC16Dbg::T_UNION;
-        CTy.getName(TypeName);
-        // UniqueSuffix is .number where number is obtained from 
-        // llvm.dbg.composite<number>.
-        std::string UniqueSuffix = "." + Ty.getGV()->getName().substr(18);
-        TypeName += UniqueSuffix;
-        unsigned short size = CTy.getSizeInBits()/8;
-        // 7th and 8th byte represent size.   
-        HasAux = true;
-        Aux[6] = size & 0xff;
-        Aux[7] = size >> 8;
-        break;
-      }
-      case dwarf::DW_TAG_enumeration_type: {
-        TypeNo = TypeNo << PIC16Dbg::S_BASIC;
-        TypeNo = TypeNo | PIC16Dbg::T_ENUM;
-        break;
-      }
-      default:
-        TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
-    }
-  }
+/// PopulateDebugInfo - Populate the TypeNo, Aux[] and TagName from Ty.
+///
+void PIC16DbgInfo::PopulateDebugInfo (DIType Ty, unsigned short &TypeNo,
+                                      bool &HasAux, int Aux[], 
+                                      std::string &TagName) {
+  if (Ty.isBasicType(Ty.getTag())) 
+    PopulateBasicTypeInfo (Ty, TypeNo);
+  else if (Ty.isDerivedType(Ty.getTag())) 
+    PopulateDerivedTypeInfo (Ty, TypeNo, HasAux, Aux, TagName);
+  else if (Ty.isCompositeType(Ty.getTag())) 
+    PopulateCompositeTypeInfo (Ty, TypeNo, HasAux, Aux, TagName);
   else {
     TypeNo = PIC16Dbg::T_NULL;
     HasAux = false;
@@ -106,7 +38,127 @@ void PIC16DbgInfo::PopulateDebugInfo(DIType Ty, unsigned short &TypeNo,
   return;
 }
 
+/// PopulateBasicTypeInfo- Populate TypeNo for basic type from Ty.
+///
+void PIC16DbgInfo::PopulateBasicTypeInfo (DIType Ty, unsigned short &TypeNo) {
+  std::string Name = "";
+  Ty.getName(Name);
+  unsigned short BaseTy = GetTypeDebugNumber(Name);
+  TypeNo = TypeNo << PIC16Dbg::S_BASIC;
+  TypeNo = TypeNo | (0xffff & BaseTy);
+}
+
+/// PopulateDerivedTypeInfo - Populate TypeNo, Aux[], TagName for derived type 
+/// from Ty. Derived types are mostly pointers.
+///
+void PIC16DbgInfo::PopulateDerivedTypeInfo (DIType Ty, unsigned short &TypeNo,
+                                            bool &HasAux, int Aux[],
+                                            std::string &TagName) {
+
+  switch(Ty.getTag())
+  {
+    case dwarf::DW_TAG_pointer_type:
+      TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
+      TypeNo = TypeNo | PIC16Dbg::DT_PTR;
+      break;
+    default:
+      TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
+  }
+  
+  // We also need to encode the the information about the base type of
+  // pointer in TypeNo.
+  DIType BaseType = DIDerivedType(Ty.getGV()).getTypeDerivedFrom();
+  PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TagName);
+}
+
+/// PopulateArrayTypeInfo - Populate TypeNo, Aux[] for array from Ty.
+void PIC16DbgInfo::PopulateArrayTypeInfo (DIType Ty, unsigned short &TypeNo,
+                                          bool &HasAux, int Aux[],
+                                          std::string &TagName) {
 
+  DICompositeType CTy = DICompositeType(Ty.getGV());
+  DIArray Elements = CTy.getTypeArray();
+  unsigned short size = 1;
+  unsigned short Dimension[4]={0,0,0,0};
+  for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+    DIDescriptor Element = Elements.getElement(i);
+    if (Element.getTag() == dwarf::DW_TAG_subrange_type) {
+      TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
+      TypeNo = TypeNo | PIC16Dbg::DT_ARY;
+      DISubrange SubRange = DISubrange(Element.getGV());
+      Dimension[i] = SubRange.getHi() - SubRange.getLo() + 1;
+      // Each dimension is represented by 2 bytes starting at byte 9.
+      Aux[8+i*2+0] = Dimension[i];
+      Aux[8+i*2+1] = Dimension[i] >> 8;
+      size = size * Dimension[i];
+    }
+  }
+  HasAux = true;
+  // In auxillary entry for array, 7th and 8th byte represent array size.
+  Aux[6] = size & 0xff;
+  Aux[7] = size >> 8;
+  DIType BaseType = CTy.getTypeDerivedFrom();
+  PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TagName);
+}
+
+/// PopulateStructOrUnionTypeInfo - Populate TypeNo, Aux[] , TagName for 
+/// structure or union.
+///
+void PIC16DbgInfo::PopulateStructOrUnionTypeInfo (DIType Ty, 
+                                                  unsigned short &TypeNo,
+                                                  bool &HasAux, int Aux[],
+                                                  std::string &TagName) {
+  DICompositeType CTy = DICompositeType(Ty.getGV());
+  TypeNo = TypeNo << PIC16Dbg::S_BASIC;
+  if (Ty.getTag() == dwarf::DW_TAG_structure_type)
+    TypeNo = TypeNo | PIC16Dbg::T_STRUCT;
+  else
+    TypeNo = TypeNo | PIC16Dbg::T_UNION;
+  CTy.getName(TagName);
+  // UniqueSuffix is .number where number is obtained from
+  // llvm.dbg.composite<number>.
+  std::string UniqueSuffix = "." + Ty.getGV()->getName().substr(18);
+  TagName += UniqueSuffix;
+  unsigned short size = CTy.getSizeInBits()/8;
+  // 7th and 8th byte represent size.
+  HasAux = true;
+  Aux[6] = size & 0xff;
+  Aux[7] = size >> 8;
+}
+
+/// PopulateEnumTypeInfo - Populate TypeNo for enum from Ty.
+void PIC16DbgInfo::PopulateEnumTypeInfo (DIType Ty, unsigned short &TypeNo) {
+  TypeNo = TypeNo << PIC16Dbg::S_BASIC;
+  TypeNo = TypeNo | PIC16Dbg::T_ENUM;
+}
+
+/// PopulateCompositeTypeInfo - Populate TypeNo, Aux[] and TagName for 
+/// composite types from Ty.
+///
+void PIC16DbgInfo::PopulateCompositeTypeInfo (DIType Ty, unsigned short &TypeNo,
+                                              bool &HasAux, int Aux[],
+                                              std::string &TagName) {
+  switch (Ty.getTag()) {
+    case dwarf::DW_TAG_array_type: {
+      PopulateArrayTypeInfo (Ty, TypeNo, HasAux, Aux, TagName);
+      break;
+    }
+    case dwarf:: DW_TAG_union_type:
+    case dwarf::DW_TAG_structure_type: {
+      PopulateStructOrUnionTypeInfo (Ty, TypeNo, HasAux, Aux, TagName);
+      break;
+    }
+    case dwarf::DW_TAG_enumeration_type: {
+      PopulateEnumTypeInfo (Ty, TypeNo);
+      break;
+    }
+    default:
+      TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
+  }
+}
+
+/// GetTypeDebugNumber - Get debug type number for given type.
+///
 unsigned PIC16DbgInfo::GetTypeDebugNumber(std::string &type)  {
   if (type == "char")
     return PIC16Dbg::T_CHAR;
@@ -127,8 +179,10 @@ unsigned PIC16DbgInfo::GetTypeDebugNumber(std::string &type)  {
   else
     return 0;
 }
-
-short PIC16DbgInfo::getClass(DIGlobalVariable DIGV) {
+ 
+/// GetStorageClass - Get storage class for give debug variable.
+///
+short PIC16DbgInfo::getStorageClass(DIGlobalVariable DIGV) {
   short ClassNo;
   if (PAN::isLocalName(DIGV.getGlobal()->getName())) {
     // Generating C_AUTO here fails due to error in linker. Change it once
@@ -142,12 +196,126 @@ short PIC16DbgInfo::getClass(DIGlobalVariable DIGV) {
   return ClassNo;
 }
 
-void PIC16DbgInfo::Init(Module &M) {
-  // Do all debug related initializations here.
-  EmitFileDirective(M);
+/// BeginModule - Emit necessary debug info to start a Module and do other
+/// required initializations.
+void PIC16DbgInfo::BeginModule(Module &M) {
+  // Emit file directive for module.
+  GlobalVariable *CU = M.getNamedGlobal("llvm.dbg.compile_unit");
+  if (CU) {
+    EmitDebugDirectives = true;
+    SwitchToCU(CU);
+  }
+
+  // Emit debug info for decls of composite types.
   EmitCompositeTypeDecls(M);
 }
 
+/// Helper to find first valid debug loc for a function.
+///
+static const DebugLoc GetDebugLocForFunction(const MachineFunction &MF) {
+  DebugLoc DL;
+  for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+       I != E; ++I) {
+    for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+         II != E; ++II) {
+      DL = II->getDebugLoc();
+      if (!DL.isUnknown())
+        return DL;
+    }
+  }
+  return DL;
+}
+
+/// BeginFunction - Emit necessary debug info to start a function.
+///
+void PIC16DbgInfo::BeginFunction(const MachineFunction &MF) {
+  if (! EmitDebugDirectives) return;
+  
+  // Retreive the first valid debug Loc and process it.
+  const DebugLoc &DL = GetDebugLocForFunction(MF);
+  ChangeDebugLoc(MF, DL, true);
+
+  EmitFunctBeginDI(MF.getFunction());
+  
+  // Set current line to 0 so that.line directive is genearted after .bf.
+  CurLine = 0;
+}
+
+/// ChangeDebugLoc - Take necessary steps when DebugLoc changes.
+/// CurFile and CurLine may change as a result of this.
+///
+void PIC16DbgInfo::ChangeDebugLoc(const MachineFunction &MF,  
+                                  const DebugLoc &DL, bool IsInBeginFunction) {
+  if (! EmitDebugDirectives) return;
+  assert (! DL.isUnknown()  && "can't change to invalid debug loc");
+
+  GlobalVariable *CU = MF.getDebugLocTuple(DL).CompileUnit;
+  unsigned line = MF.getDebugLocTuple(DL).Line;
+
+  SwitchToCU(CU);
+  SwitchToLine(line, IsInBeginFunction);
+}
+
+/// SwitchToLine - Emit line directive for a new line.
+///
+void PIC16DbgInfo::SwitchToLine(unsigned Line, bool IsInBeginFunction) {
+  if (CurLine == Line) return;
+  if (!IsInBeginFunction)  O << "\n\t.line " << Line << "\n";
+  CurLine = Line;
+}
+
+/// EndFunction - Emit .ef for end of function.
+///
+void PIC16DbgInfo::EndFunction(const MachineFunction &MF) {
+  if (! EmitDebugDirectives) return;
+  EmitFunctEndDI(MF.getFunction(), CurLine);
+}
+
+/// EndModule - Emit .eof for end of module.
+///
+void PIC16DbgInfo::EndModule(Module &M) {
+  if (! EmitDebugDirectives) return;
+  EmitVarDebugInfo(M);
+  if (CurFile != "") O << "\n\t.eof";
+}
+ 
+/// EmitCompositeTypeElements - Emit debug information for members of a 
+/// composite type.
+/// 
+void PIC16DbgInfo::EmitCompositeTypeElements (DICompositeType CTy,
+                                              std::string UniqueSuffix) { 
+  unsigned long Value = 0;
+  DIArray Elements = CTy.getTypeArray();
+  for (unsigned i = 0, N = Elements.getNumElements(); i < N; i++) {
+    DIDescriptor Element = Elements.getElement(i);
+    unsigned short TypeNo = 0;
+    bool HasAux = false;
+    int ElementAux[PIC16Dbg::AuxSize] = { 0 };
+    std::string TagName = "";
+    std::string ElementName;
+    GlobalVariable *GV = Element.getGV();
+    DIDerivedType DITy(GV);
+    DITy.getName(ElementName);
+    unsigned short ElementSize = DITy.getSizeInBits()/8;
+    // Get mangleddd name for this structure/union  element.
+    std::string MangMemName = ElementName + UniqueSuffix;
+    PopulateDebugInfo(DITy, TypeNo, HasAux, ElementAux, TagName);
+    short Class;
+    if( CTy.getTag() == dwarf::DW_TAG_union_type)
+      Class = PIC16Dbg::C_MOU;
+    else if  (CTy.getTag() == dwarf::DW_TAG_structure_type)
+      Class = PIC16Dbg::C_MOS;
+    EmitSymbol(MangMemName, Class, TypeNo, Value);
+    if (CTy.getTag() == dwarf::DW_TAG_structure_type)
+      Value += ElementSize;
+    if (HasAux)
+      EmitAuxEntry(MangMemName, ElementAux, PIC16Dbg::AuxSize, TagName);
+  }
+}
+
+/// EmitCompositeTypeDecls - Emit composite type declarations like structure 
+/// and union declarations.
+///
 void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) {
   for(iplist<GlobalVariable>::iterator I = M.getGlobalList().begin(),
       E = M.getGlobalList().end(); I != E; I++) {
@@ -178,33 +346,10 @@ void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) {
 
         // Emit auxiliary debug information for structure/union tag. 
         EmitAuxEntry(MangledCTyName, Aux, PIC16Dbg::AuxSize);
-        unsigned long Value = 0;
-        DIArray Elements = CTy.getTypeArray();
-        for (unsigned i = 0, N = Elements.getNumElements(); i < N; i++) {
-          DIDescriptor Element = Elements.getElement(i);
-          unsigned short TypeNo = 0;
-          bool HasAux = false;
-          int ElementAux[PIC16Dbg::AuxSize] = { 0 };
-          std::string TypeName = "";
-          std::string ElementName;
-          GlobalVariable *GV = Element.getGV();
-          DIDerivedType DITy(GV);
-          DITy.getName(ElementName);
-          unsigned short ElementSize = DITy.getSizeInBits()/8;
-          // Get mangleddd name for this structure/union  element.
-          std::string MangMemName = ElementName + UniqueSuffix;
-	  PopulateDebugInfo(DITy, TypeNo, HasAux, ElementAux, TypeName);
-          short Class;
-          if( CTy.getTag() == dwarf::DW_TAG_union_type)
-            Class = PIC16Dbg::C_MOU;
-          else if  (CTy.getTag() == dwarf::DW_TAG_structure_type)
-            Class = PIC16Dbg::C_MOS;
-          EmitSymbol(MangMemName, Class, TypeNo, Value);
-          if (CTy.getTag() == dwarf::DW_TAG_structure_type)
-            Value += ElementSize;
-          if (HasAux)
-            EmitAuxEntry(MangMemName, ElementAux, PIC16Dbg::AuxSize, TypeName);
-        }
+
+        // Emit members.
+        EmitCompositeTypeElements (CTy, UniqueSuffix);
+
         // Emit mangled Symbol for end of structure/union.
         std::string EOSSymbol = ".eos" + UniqueSuffix;
         EmitSymbol(EOSSymbol, PIC16Dbg::C_EOS);
@@ -214,6 +359,8 @@ void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) {
   }
 }
 
+/// EmitFunctBeginDI - Emit .bf for function.
+///
 void PIC16DbgInfo::EmitFunctBeginDI(const Function *F) {
   std::string FunctName = F->getName();
   if (EmitDebugDirectives) {
@@ -221,16 +368,20 @@ void PIC16DbgInfo::EmitFunctBeginDI(const Function *F) {
     std::string BlockBeginSym = ".bb." + FunctName;
 
     int BFAux[PIC16Dbg::AuxSize] = {0};
-    BFAux[4] = FunctBeginLine;
-    BFAux[5] = FunctBeginLine >> 8;
+    BFAux[4] = CurLine;
+    BFAux[5] = CurLine >> 8;
+
     // Emit debug directives for beginning of function.
     EmitSymbol(FunctBeginSym, PIC16Dbg::C_FCN);
     EmitAuxEntry(FunctBeginSym, BFAux, PIC16Dbg::AuxSize);
+
     EmitSymbol(BlockBeginSym, PIC16Dbg::C_BLOCK);
     EmitAuxEntry(BlockBeginSym, BFAux, PIC16Dbg::AuxSize);
   }
 }
 
+/// EmitFunctEndDI - Emit .ef for function end.
+///
 void PIC16DbgInfo::EmitFunctEndDI(const Function *F, unsigned Line) {
   std::string FunctName = F->getName();
   if (EmitDebugDirectives) {
@@ -241,8 +392,8 @@ void PIC16DbgInfo::EmitFunctEndDI(const Function *F, unsigned Line) {
     EmitSymbol(BlockEndSym, PIC16Dbg::C_BLOCK);
     int EFAux[PIC16Dbg::AuxSize] = {0};
     // 5th and 6th byte stand for line number.
-    EFAux[4] = Line;
-    EFAux[5] = Line >> 8;
+    EFAux[4] = CurLine;
+    EFAux[5] = CurLine >> 8;
     EmitAuxEntry(BlockEndSym, EFAux, PIC16Dbg::AuxSize);
     EmitSymbol(FunctEndSym, PIC16Dbg::C_FCN);
     EmitAuxEntry(FunctEndSym, EFAux, PIC16Dbg::AuxSize);
@@ -251,15 +402,18 @@ void PIC16DbgInfo::EmitFunctEndDI(const Function *F, unsigned Line) {
 
 /// EmitAuxEntry - Emit Auxiliary debug information.
 ///
-void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int num,
-                                std::string tag) {
+void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int Num,
+                                std::string TagName) {
   O << "\n\t.dim " << VarName << ", 1" ;
-  if (tag != "")
-    O << ", " << tag;
-  for (int i = 0; i<num; i++)
+  // TagName is emitted in case of structure/union objects.
+  if (TagName != "")
+    O << ", " << TagName;
+  for (int i = 0; i<Num; i++)
     O << "," << Aux[i];
 }
 
+/// EmitSymbol - Emit .def for a symbol. Value is offset for the member.
+///
 void PIC16DbgInfo::EmitSymbol(std::string Name, short Class, unsigned short
                               Type, unsigned long Value) {
   O << "\n\t" << ".def "<< Name << ", type = " << Type << ", class = " 
@@ -268,6 +422,8 @@ void PIC16DbgInfo::EmitSymbol(std::string Name, short Class, unsigned short
     O  << ", value = " << Value;
 }
 
+/// EmitVarDebugInfo - Emit debug information for all variables.
+///
 void PIC16DbgInfo::EmitVarDebugInfo(Module &M) {
   GlobalVariable *Root = M.getGlobalVariable("llvm.dbg.global_variables");
   if (!Root)
@@ -283,47 +439,45 @@ void PIC16DbgInfo::EmitVarDebugInfo(Module &M) {
       unsigned short TypeNo = 0;
       bool HasAux = false;
       int Aux[PIC16Dbg::AuxSize] = { 0 };
-      std::string TypeName = "";
+      std::string TagName = "";
       std::string VarName = TAI->getGlobalPrefix()+DIGV.getGlobal()->getName();
-      PopulateDebugInfo(Ty, TypeNo, HasAux, Aux, TypeName);
+      PopulateDebugInfo(Ty, TypeNo, HasAux, Aux, TagName);
       // Emit debug info only if type information is availaible.
       if (TypeNo != PIC16Dbg::T_NULL) {
         O << "\n\t.type " << VarName << ", " << TypeNo;
-        short ClassNo = getClass(DIGV);
+        short ClassNo = getStorageClass(DIGV);
         O << "\n\t.class " << VarName << ", " << ClassNo;
         if (HasAux) 
-          EmitAuxEntry(VarName, Aux, PIC16Dbg::AuxSize, TypeName);
+          EmitAuxEntry(VarName, Aux, PIC16Dbg::AuxSize, TagName);
       }
     }
   }
   O << "\n";
 }
 
-void PIC16DbgInfo::EmitFileDirective(Module &M) {
-  GlobalVariable *CU = M.getNamedGlobal("llvm.dbg.compile_unit");
-  if (CU) {
-    EmitDebugDirectives = true;
-    EmitFileDirective(CU, false);
-  }
-}
+/// SwitchToCU - Switch to a new compilation unit.
+///
+void PIC16DbgInfo::SwitchToCU(GlobalVariable *CU) {
+  // Get the file path from CU.
+  DICompileUnit cu(CU);
+  std::string DirName, FileName;
+  std::string FilePath = cu.getDirectory(DirName) + "/" + 
+                         cu.getFilename(FileName);
 
-void PIC16DbgInfo::EmitFileDirective(GlobalVariable *CU, bool EmitEof) {
-  std::string Dir, FN;
-  DICompileUnit DIUnit(CU);
-  std::string File = DIUnit.getDirectory(Dir) + "/" + DIUnit.getFilename(FN);
-  if ( File != CurFile ) {
-    if (EmitEof)
-      EmitEOF();
-    O << "\n\t.file\t\"" << File << "\"\n" ;
-    CurFile = File;
-  }
+  // Nothing to do if source file is still same.
+  if ( FilePath == CurFile ) return;
+
+  // Else, close the current one and start a new.
+  if (CurFile != "") O << "\n\t.eof";
+  O << "\n\t.file\t\"" << FilePath << "\"\n" ;
+  CurFile = FilePath;
+  CurLine = 0;
 }
 
+/// EmitEOF - Emit .eof for end of file.
+///
 void PIC16DbgInfo::EmitEOF() {
   if (CurFile != "")
     O << "\n\t.EOF";
 }
 
-void PIC16DbgInfo::SetFunctBeginLine(unsigned line) {
-  FunctBeginLine = line;
-}
diff --git a/lib/Target/PIC16/PIC16DebugInfo.h b/lib/Target/PIC16/PIC16DebugInfo.h
index 9d503807b205..d126d851b50e 100644
--- a/lib/Target/PIC16/PIC16DebugInfo.h
+++ b/lib/Target/PIC16/PIC16DebugInfo.h
@@ -20,6 +20,8 @@
 #include <map>
 
 namespace llvm {
+  class MachineFunction;
+  class DebugLoc;
   namespace PIC16Dbg {
     enum VarType {
       T_NULL,
@@ -94,33 +96,64 @@ namespace llvm {
     raw_ostream &O;
     const TargetAsmInfo *TAI;
     std::string CurFile;
+    unsigned CurLine;
+
     // EmitDebugDirectives is set if debug information is available. Default
     // value for it is false.
     bool EmitDebugDirectives;
-    unsigned FunctBeginLine;
+
   public:
     PIC16DbgInfo(raw_ostream &o, const TargetAsmInfo *T) : O(o), TAI(T) {
-      CurFile = ""; 
+      CurFile = "";
+      CurLine = 0;
       EmitDebugDirectives = false; 
     }
-    void PopulateDebugInfo(DIType Ty, unsigned short &TypeNo, bool &HasAux,
+
+    void BeginModule (Module &M);
+    void BeginFunction (const MachineFunction &MF);
+    void ChangeDebugLoc (const MachineFunction &MF, const DebugLoc &DL,
+                         bool IsInBeginFunction = false);
+    void EndFunction (const MachineFunction &MF);
+    void EndModule (Module &M);
+
+
+    private:
+    void SwitchToCU (GlobalVariable *CU);
+    void SwitchToLine (unsigned Line, bool IsInBeginFunction = false);
+
+    void PopulateDebugInfo (DIType Ty, unsigned short &TypeNo, bool &HasAux,
                            int Aux[], std::string &TypeName);
-    unsigned GetTypeDebugNumber(std::string &type);
-    short getClass(DIGlobalVariable DIGV);
+    void PopulateBasicTypeInfo (DIType Ty, unsigned short &TypeNo);
+    void PopulateDerivedTypeInfo (DIType Ty, unsigned short &TypeNo, 
+                                  bool &HasAux, int Aux[],
+                                  std::string &TypeName);
+
+    void PopulateCompositeTypeInfo (DIType Ty, unsigned short &TypeNo,
+                                    bool &HasAux, int Aux[],
+                                    std::string &TypeName);
+    void PopulateArrayTypeInfo (DIType Ty, unsigned short &TypeNo,
+                                bool &HasAux, int Aux[],
+                                std::string &TypeName);
+
+    void PopulateStructOrUnionTypeInfo (DIType Ty, unsigned short &TypeNo,
+                                        bool &HasAux, int Aux[],
+                                        std::string &TypeName);
+    void PopulateEnumTypeInfo (DIType Ty, unsigned short &TypeNo);
+
+    unsigned GetTypeDebugNumber(std::string &Type);
+    short getStorageClass(DIGlobalVariable DIGV);
     void EmitFunctBeginDI(const Function *F);
-    void Init(Module &M);
     void EmitCompositeTypeDecls(Module &M);
+    void EmitCompositeTypeElements (DICompositeType CTy,
+                                    std::string UniqueSuffix);
     void EmitFunctEndDI(const Function *F, unsigned Line);
     void EmitAuxEntry(const std::string VarName, int Aux[], 
-                      int num = PIC16Dbg::AuxSize, std::string tag = "");
+                      int num = PIC16Dbg::AuxSize, std::string TagName = "");
     inline void EmitSymbol(std::string Name, short Class, 
                            unsigned short Type = PIC16Dbg::T_NULL, 
                            unsigned long Value = 0);
     void EmitVarDebugInfo(Module &M);
-    void EmitFileDirective(Module &M);
-    void EmitFileDirective(GlobalVariable *CU, bool EmitEof = true);
     void EmitEOF();
-    void SetFunctBeginLine(unsigned line);
   };
 } // end namespace llvm;
 #endif
diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp
index ba465f3fcc7a..f113a483d692 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.cpp
+++ b/lib/Target/PIC16/PIC16ISelLowering.cpp
@@ -31,42 +31,72 @@ static const char *getIntrinsicName(unsigned opcode) {
   std::string Basename;
   switch(opcode) {
   default: assert (0 && "do not know intrinsic name");
+  // Arithmetic Right shift for integer types.
   case PIC16ISD::SRA_I8: Basename = "sra.i8"; break;
   case RTLIB::SRA_I16: Basename = "sra.i16"; break;
   case RTLIB::SRA_I32: Basename = "sra.i32"; break;
 
+  // Left shift for integer types.
   case PIC16ISD::SLL_I8: Basename = "sll.i8"; break;
   case RTLIB::SHL_I16: Basename = "sll.i16"; break;
   case RTLIB::SHL_I32: Basename = "sll.i32"; break;
 
+  // Logical Right Shift for integer types.
   case PIC16ISD::SRL_I8: Basename = "srl.i8"; break;
   case RTLIB::SRL_I16: Basename = "srl.i16"; break;
   case RTLIB::SRL_I32: Basename = "srl.i32"; break;
 
+  // Multiply for integer types.
   case PIC16ISD::MUL_I8: Basename = "mul.i8"; break;
   case RTLIB::MUL_I16: Basename = "mul.i16"; break;
   case RTLIB::MUL_I32: Basename = "mul.i32"; break;
 
+  // Signed division for integers.
   case RTLIB::SDIV_I16: Basename = "sdiv.i16"; break;
   case RTLIB::SDIV_I32: Basename = "sdiv.i32"; break;
+
+  // Unsigned division for integers.
   case RTLIB::UDIV_I16: Basename = "udiv.i16"; break;
   case RTLIB::UDIV_I32: Basename = "udiv.i32"; break;
 
+  // Signed Modulas for integers.
   case RTLIB::SREM_I16: Basename = "srem.i16"; break;
   case RTLIB::SREM_I32: Basename = "srem.i32"; break;
+
+  // Unsigned Modulas for integers.
   case RTLIB::UREM_I16: Basename = "urem.i16"; break;
   case RTLIB::UREM_I32: Basename = "urem.i32"; break;
 
-  case RTLIB::FPTOSINT_F32_I32:
-               Basename = "f32_to_si32"; break;
-  case RTLIB::SINTTOFP_I32_F32:
-               Basename = "si32_to_f32"; break;
+  //////////////////////
+  // LIBCALLS FOR FLOATS
+  //////////////////////
+
+  // Float to signed integrals
+  case RTLIB::FPTOSINT_F32_I8: Basename = "f32_to_si32"; break;
+  case RTLIB::FPTOSINT_F32_I16: Basename = "f32_to_si32"; break;
+  case RTLIB::FPTOSINT_F32_I32: Basename = "f32_to_si32"; break;
+
+  // Signed integrals to float. char and int are first sign extended to i32 
+  // before being converted to float, so an I8_F32 or I16_F32 isn't required.
+  case RTLIB::SINTTOFP_I32_F32: Basename = "si32_to_f32"; break;
+
+  // Float to Unsigned conversions.
+  // Signed conversion can be used for unsigned conversion as well.
+  // In signed and unsigned versions only the interpretation of the 
+  // MSB is different. Bit representation remains the same. 
+  case RTLIB::FPTOUINT_F32_I8: Basename = "f32_to_si32"; break;
+  case RTLIB::FPTOUINT_F32_I16: Basename = "f32_to_si32"; break;
+  case RTLIB::FPTOUINT_F32_I32: Basename = "f32_to_si32"; break;
+
+  // Unsigned to Float conversions. char and int are first zero extended 
+  // before being converted to float.
+  case RTLIB::UINTTOFP_I32_F32: Basename = "ui32_to_f32"; break;
                
+  // Floating point add, sub, mul, div.
   case RTLIB::ADD_F32: Basename = "add.f32"; break;
   case RTLIB::SUB_F32: Basename = "sub.f32"; break;
   case RTLIB::MUL_F32: Basename = "mul.f32"; break;
   case RTLIB::DIV_F32: Basename = "div.f32"; break;
-  
   }
   
   std::string prefix = PAN::getTagName(PAN::PREFIX_SYMBOL);
@@ -83,7 +113,7 @@ static const char *getIntrinsicName(unsigned opcode) {
 // PIC16TargetLowering Constructor.
 PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM)
   : TargetLowering(TM), TmpSize(0) {
-  
+ 
   Subtarget = &TM.getSubtarget<PIC16Subtarget>();
 
   addRegisterClass(MVT::i8, PIC16::GPRRegisterClass);
@@ -114,6 +144,7 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM)
   // Signed division lib call names
   setLibcallName(RTLIB::SDIV_I16, getIntrinsicName(RTLIB::SDIV_I16));
   setLibcallName(RTLIB::SDIV_I32, getIntrinsicName(RTLIB::SDIV_I32));
+
   // Unsigned division lib call names
   setLibcallName(RTLIB::UDIV_I16, getIntrinsicName(RTLIB::UDIV_I16));
   setLibcallName(RTLIB::UDIV_I32, getIntrinsicName(RTLIB::UDIV_I32));
@@ -121,15 +152,36 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM)
   // Signed remainder lib call names
   setLibcallName(RTLIB::SREM_I16, getIntrinsicName(RTLIB::SREM_I16));
   setLibcallName(RTLIB::SREM_I32, getIntrinsicName(RTLIB::SREM_I32));
+
   // Unsigned remainder lib call names
   setLibcallName(RTLIB::UREM_I16, getIntrinsicName(RTLIB::UREM_I16));
   setLibcallName(RTLIB::UREM_I32, getIntrinsicName(RTLIB::UREM_I32));
  
-  // Floating point operations
+  // Floating point to signed int conversions.
+  setLibcallName(RTLIB::FPTOSINT_F32_I8, 
+                 getIntrinsicName(RTLIB::FPTOSINT_F32_I8));
+  setLibcallName(RTLIB::FPTOSINT_F32_I16, 
+                 getIntrinsicName(RTLIB::FPTOSINT_F32_I16));
   setLibcallName(RTLIB::FPTOSINT_F32_I32, 
                  getIntrinsicName(RTLIB::FPTOSINT_F32_I32));
+
+  // Signed int to floats.
   setLibcallName(RTLIB::SINTTOFP_I32_F32, 
                  getIntrinsicName(RTLIB::SINTTOFP_I32_F32));
+
+  // Floating points to unsigned ints.
+  setLibcallName(RTLIB::FPTOUINT_F32_I8, 
+                 getIntrinsicName(RTLIB::FPTOUINT_F32_I8));
+  setLibcallName(RTLIB::FPTOUINT_F32_I16, 
+                 getIntrinsicName(RTLIB::FPTOUINT_F32_I16));
+  setLibcallName(RTLIB::FPTOUINT_F32_I32, 
+                 getIntrinsicName(RTLIB::FPTOUINT_F32_I32));
+
+  // Unsigned int to floats.
+  setLibcallName(RTLIB::UINTTOFP_I32_F32, 
+                 getIntrinsicName(RTLIB::UINTTOFP_I32_F32));
+
+  // Floating point add, sub, mul ,div.
   setLibcallName(RTLIB::ADD_F32, getIntrinsicName(RTLIB::ADD_F32));
   setLibcallName(RTLIB::SUB_F32, getIntrinsicName(RTLIB::SUB_F32));
   setLibcallName(RTLIB::MUL_F32, getIntrinsicName(RTLIB::MUL_F32));
diff --git a/lib/Target/PIC16/PIC16TargetMachine.cpp b/lib/Target/PIC16/PIC16TargetMachine.cpp
index bda632608ea7..d4f46a494498 100644
--- a/lib/Target/PIC16/PIC16TargetMachine.cpp
+++ b/lib/Target/PIC16/PIC16TargetMachine.cpp
@@ -37,6 +37,11 @@ X("pic16", "PIC16 14-bit [experimental].");
 static RegisterTarget<CooperTargetMachine> 
 Y("cooper", "PIC16 Cooper [experimental].");
 
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializePIC16Target() { }
+}
+
 // PIC16TargetMachine - Traditional PIC16 Machine.
 PIC16TargetMachine::PIC16TargetMachine(const Module &M, const std::string &FS,
                                        bool Cooper)
diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
index 7723982f69ad..c7bfb6d1a1a3 100644
--- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
@@ -646,19 +646,10 @@ bool PPCLinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 
 bool PPCLinuxAsmPrinter::doInitialization(Module &M) {
   bool Result = AsmPrinter::doInitialization(M);
-
-  // Emit initial debug information.
+  DW = getAnalysisIfAvailable<DwarfWriter>();
   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
   assert(MMI);
-  DW = getAnalysisIfAvailable<DwarfWriter>();
-  assert(DW && "DwarfWriter is not available");
-  DW->BeginModule(&M, MMI, O, this, TAI);
-
-  // GNU as handles section names wrapped in quotes
-  Mang->setUseQuotes(true);
-
   SwitchToSection(TAI->getTextSection());
-
   return Result;
 }
 
@@ -875,18 +866,9 @@ bool PPCDarwinAsmPrinter::doInitialization(Module &M) {
   O << "\t.machine " << CPUDirectives[Directive] << '\n';
 
   bool Result = AsmPrinter::doInitialization(M);
-
-  // Emit initial debug information.
-  // We need this for Personality functions.
-  // AsmPrinter::doInitialization should have done this analysis.
+  DW = getAnalysisIfAvailable<DwarfWriter>();
   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
   assert(MMI);
-  DW = getAnalysisIfAvailable<DwarfWriter>();
-  assert(DW && "DwarfWriter is not available");
-  DW->BeginModule(&M, MMI, O, this, TAI);
-
-  // Darwin wants symbols to be quoted if they have complex names.
-  Mang->setUseQuotes(true);
 
   // Prime text sections so they are adjacent.  This reduces the likelihood a
   // large data or debug section causes a branch to exceed 16M limit.
@@ -1202,3 +1184,9 @@ namespace {
 
 extern "C" int PowerPCAsmPrinterForceLink;
 int PowerPCAsmPrinterForceLink = 0;
+
+// Force static initialization when called from
+// llvm/InitializeAllAsmPrinters.h
+namespace llvm {
+  void InitializePowerPCAsmPrinter() { }
+}
diff --git a/lib/Target/PowerPC/PPCTargetAsmInfo.cpp b/lib/Target/PowerPC/PPCTargetAsmInfo.cpp
index c69e591a6632..ebffd693d644 100644
--- a/lib/Target/PowerPC/PPCTargetAsmInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetAsmInfo.cpp
@@ -19,59 +19,19 @@
 using namespace llvm;
 using namespace llvm::dwarf;
 
-PPCDarwinTargetAsmInfo::PPCDarwinTargetAsmInfo(const PPCTargetMachine &TM):
+PPCDarwinTargetAsmInfo::PPCDarwinTargetAsmInfo(const PPCTargetMachine &TM) :
   PPCTargetAsmInfo<DarwinTargetAsmInfo>(TM) {
   PCSymbol = ".";
   CommentString = ";";
-  GlobalPrefix = "_";
-  PrivateGlobalPrefix = "L";
-  LessPrivateGlobalPrefix = "l";
-  StringConstantPrefix = "\1LC";
   ConstantPoolSection = "\t.const\t";
-  JumpTableDataSection = ".const";
-  CStringSection = "\t.cstring";
-  if (TM.getRelocationModel() == Reloc::Static) {
-    StaticCtorsSection = ".constructor";
-    StaticDtorsSection = ".destructor";
-  } else {
-    StaticCtorsSection = ".mod_init_func";
-    StaticDtorsSection = ".mod_term_func";
-  }
-  HasSingleParameterDotFile = false;
-  SwitchToSectionDirective = "\t.section ";
   UsedDirective = "\t.no_dead_strip\t";
-  WeakDefDirective = "\t.weak_definition ";
-  WeakRefDirective = "\t.weak_reference ";
-  HiddenDirective = "\t.private_extern ";
   SupportsExceptionHandling = true;
-  NeedsIndirectEncoding = true;
-  NeedsSet = true;
-  BSSSection = 0;
   
   DwarfEHFrameSection =
-  ".section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support";
+   ".section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support";
   DwarfExceptionSection = ".section __DATA,__gcc_except_tab";
   GlobalEHDirective = "\t.globl\t";
   SupportsWeakOmittedEHFrame = false;
-
-  DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug";
-  DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug";
-  DwarfLineSection = ".section __DWARF,__debug_line,regular,debug";
-  DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug";
-  DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug";
-  DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug";
-  DwarfStrSection = ".section __DWARF,__debug_str,regular,debug";
-  DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug";
-  DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug";
-  DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug";
-  DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug";
-  
-  // In non-PIC modes, emit a special label before jump tables so that the
-  // linker can perform more accurate dead code stripping.
-  if (TM.getRelocationModel() != Reloc::PIC_) {
-    // Emit a local label that is preserved until the linker runs.
-    JumpTableSpecialLabelPrefix = "l";
-  }
 }
 
 /// PreferredEHDataFormat - This hook allows the target to select data
@@ -131,7 +91,7 @@ PPCLinuxTargetAsmInfo::PPCLinuxTargetAsmInfo(const PPCTargetMachine &TM) :
   DwarfLocSection =     "\t.section\t.debug_loc,\"\",@progbits";
   DwarfARangesSection = "\t.section\t.debug_aranges,\"\",@progbits";
   DwarfRangesSection =  "\t.section\t.debug_ranges,\"\",@progbits";
-  DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits";
+  DwarfMacroInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits";
 
   PCSymbol = ".";
 
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index ef3f0fc04219..3e89885a77eb 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -35,6 +35,11 @@ X("ppc32", "PowerPC 32");
 static RegisterTarget<PPC64TargetMachine>
 Y("ppc64", "PowerPC 64");
 
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializePowerPCTarget() { }
+}
+
 // No assembler printer by default
 PPCTargetMachine::AsmPrinterCtorFn PPCTargetMachine::AsmPrinterCtor = 0;
 
diff --git a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
index 61707f5556fb..6a2fdcac01d7 100644
--- a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
@@ -15,6 +15,7 @@
 #define DEBUG_TYPE "asm-printer"
 #include "Sparc.h"
 #include "SparcInstrInfo.h"
+#include "SparcTargetMachine.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
@@ -24,8 +25,6 @@
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/Target/TargetAsmInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/Mangler.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/Statistic.h"
@@ -88,6 +87,7 @@ FunctionPass *llvm::createSparcCodePrinterPass(raw_ostream &o,
   return new SparcAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
 }
 
+
 /// runOnMachineFunction - This uses the printInstruction()
 /// method to print assembly for each instruction.
 ///
@@ -353,3 +353,17 @@ bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
 
   return false;
 }
+
+namespace {
+  static struct Register {
+    Register() {
+      SparcTargetMachine::registerAsmPrinter(createSparcCodePrinterPass);
+    }
+  } Registrator;
+}
+
+// Force static initialization when called from
+// llvm/InitializeAllAsmPrinters.h
+namespace llvm {
+  void InitializeSparcAsmPrinter() { }
+}
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index eda030924100..fd0f12429a04 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -18,17 +18,18 @@
 #include "llvm/Target/TargetMachineRegistry.h"
 using namespace llvm;
 
-/// SparcTargetMachineModule - Note that this is used on hosts that
-/// cannot link in a library unless there are references into the
-/// library.  In particular, it seems that it is not possible to get
-/// things to work on Win32 without this.  Though it is unused, do not
-/// remove it.
-extern "C" int SparcTargetMachineModule;
-int SparcTargetMachineModule = 0;
-
 // Register the target.
 static RegisterTarget<SparcTargetMachine> X("sparc", "SPARC");
 
+// No assembler printer by default
+SparcTargetMachine::AsmPrinterCtorFn SparcTargetMachine::AsmPrinterCtor = 0;
+
+
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeSparcTarget() { }
+}
+
 const TargetAsmInfo *SparcTargetMachine::createTargetAsmInfo() const {
   // FIXME: Handle Solaris subtarget someday :)
   return new SparcELFTargetAsmInfo(*this);
@@ -89,6 +90,8 @@ bool SparcTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
                                             bool Verbose,
                                             raw_ostream &Out) {
   // Output assembly language.
-  PM.add(createSparcCodePrinterPass(Out, *this, OptLevel, Verbose));
+  assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+  if (AsmPrinterCtor)
+    PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
   return false;
 }
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 40b44f2fb34d..8afcc73f502e 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -35,6 +35,14 @@ class SparcTargetMachine : public LLVMTargetMachine {
 protected:
   virtual const TargetAsmInfo *createTargetAsmInfo() const;
   
+  // To avoid having target depend on the asmprinter stuff libraries, asmprinter
+  // set this functions to ctor pointer at startup time if they are linked in.
+  typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
+                                            TargetMachine &tm,
+                                            CodeGenOpt::Level OptLevel,
+                                            bool verbose);
+  static AsmPrinterCtorFn AsmPrinterCtor;
+  
 public:
   SparcTargetMachine(const Module &M, const std::string &FS);
 
@@ -56,6 +64,10 @@ public:
   virtual bool addAssemblyEmitter(PassManagerBase &PM,
                                   CodeGenOpt::Level OptLevel,
                                   bool Verbose, raw_ostream &Out);
+  
+  static void registerAsmPrinter(AsmPrinterCtorFn F) {
+    AsmPrinterCtor = F;
+  }
 };
 
 } // end namespace llvm
diff --git a/lib/Target/TargetAsmInfo.cpp b/lib/Target/TargetAsmInfo.cpp
index 6a2de6f582a6..3f5f1bd3eb26 100644
--- a/lib/Target/TargetAsmInfo.cpp
+++ b/lib/Target/TargetAsmInfo.cpp
@@ -24,10 +24,10 @@
 #include "llvm/Support/Dwarf.h"
 #include <cctype>
 #include <cstring>
-
 using namespace llvm;
 
-void TargetAsmInfo::fillDefaultValues() {
+TargetAsmInfo::TargetAsmInfo(const TargetMachine &tm)
+: TM(tm) {
   BSSSection = "\t.bss";
   BSSSection_ = 0;
   ReadOnlySection = 0;
@@ -58,6 +58,7 @@ void TargetAsmInfo::fillDefaultValues() {
   InlineAsmEnd = "#NO_APP";
   AssemblerDialect = 0;
   StringConstantPrefix = ".str";
+  AllowQuotesInName = false;
   ZeroDirective = "\t.zero\t";
   ZeroDirectiveSuffix = 0;
   AsciiDirective = "\t.ascii\t";
@@ -102,7 +103,6 @@ void TargetAsmInfo::fillDefaultValues() {
   SupportsExceptionHandling = false;
   DwarfRequiresFrameSection = true;
   DwarfUsesInlineInfoSection = false;
-  SupportsMacInfoSection = true;
   NonLocalEHFrameLabel = false;
   GlobalEHDirective = 0;
   SupportsWeakOmittedEHFrame = true;
@@ -118,7 +118,7 @@ void TargetAsmInfo::fillDefaultValues() {
   DwarfLocSection = ".debug_loc";
   DwarfARangesSection = ".debug_aranges";
   DwarfRangesSection = ".debug_ranges";
-  DwarfMacInfoSection = ".debug_macinfo";
+  DwarfMacroInfoSection = ".debug_macinfo";
   DwarfEHFrameSection = ".eh_frame";
   DwarfExceptionSection = ".gcc_except_table";
   AsmTransCBE = 0;
@@ -126,11 +126,6 @@ void TargetAsmInfo::fillDefaultValues() {
   DataSection = getUnnamedSection("\t.data", SectionFlags::Writeable);
 }
 
-TargetAsmInfo::TargetAsmInfo(const TargetMachine &tm)
-  : TM(tm) {
-  fillDefaultValues();
-}
-
 TargetAsmInfo::~TargetAsmInfo() {
 }
 
diff --git a/lib/Target/X86/AsmPrinter/CMakeLists.txt b/lib/Target/X86/AsmPrinter/CMakeLists.txt
index dbd03d8b9633..368bcaa5ce34 100644
--- a/lib/Target/X86/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/X86/AsmPrinter/CMakeLists.txt
@@ -2,6 +2,7 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
 
 add_partially_linked_object(LLVMX86AsmPrinter
   X86ATTAsmPrinter.cpp
+  X86ATTInstPrinter.cpp
   X86AsmPrinter.cpp
   X86IntelAsmPrinter.cpp
   )
diff --git a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp
index 8afe2ea9e10b..60ed4f0c946a 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp
@@ -26,8 +26,10 @@
 #include "llvm/Type.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/CodeGen/DwarfWriter.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Mangler.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetAsmInfo.h"
@@ -36,6 +38,9 @@ using namespace llvm;
 
 STATISTIC(EmittedInsts, "Number of machine instrs printed");
 
+static cl::opt<bool> NewAsmPrinter("experimental-asm-printer",
+                                   cl::Hidden);
+
 static std::string getPICLabelString(unsigned FnNum,
                                      const TargetAsmInfo *TAI,
                                      const X86Subtarget* Subtarget) {
@@ -266,7 +271,7 @@ bool X86ATTAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
     O << "\t.size\t" << CurrentFnName << ", .-" << CurrentFnName << '\n';
 
   // Emit post-function debug information.
-  if (TAI->doesSupportDebugInformation())
+  if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling())
     DW->EndFunction(&MF);
 
   // Print out jump tables referenced by the function.
@@ -291,6 +296,136 @@ static inline bool shouldPrintStub(TargetMachine &TM, const X86Subtarget* ST) {
   return ST->isPICStyleStub() && TM.getRelocationModel() != Reloc::Static;
 }
 
+/// print_pcrel_imm - This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value.  These print slightly differently, for
+/// example, a $ is not emitted.
+void X86ATTAsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo) {
+  const MachineOperand &MO = MI->getOperand(OpNo);
+  switch (MO.getType()) {
+  default: assert(0 && "Unknown pcrel immediate operand");
+  case MachineOperand::MO_Immediate:
+    O << MO.getImm();
+    return;
+  case MachineOperand::MO_MachineBasicBlock:
+    printBasicBlockLabel(MO.getMBB(), false, false, VerboseAsm);
+    return;
+      
+  case MachineOperand::MO_GlobalAddress: {
+    const GlobalValue *GV = MO.getGlobal();
+    std::string Name = Mang->getValueName(GV);
+    decorateName(Name, GV);
+    
+    bool needCloseParen = false;
+    if (Name[0] == '$') {
+      // The name begins with a dollar-sign. In order to avoid having it look
+      // like an integer immediate to the assembler, enclose it in parens.
+      O << '(';
+      needCloseParen = true;
+    }
+    
+    if (shouldPrintStub(TM, Subtarget)) {
+      // Link-once, declaration, or Weakly-linked global variables need
+      // non-lazily-resolved stubs
+      if (GV->isDeclaration() || GV->isWeakForLinker()) {
+        // Dynamically-resolved functions need a stub for the function.
+        if (isa<Function>(GV)) {
+          // Function stubs are no longer needed for Mac OS X 10.5 and up.
+          if (Subtarget->isTargetDarwin() && Subtarget->getDarwinVers() >= 9) {
+            O << Name;
+          } else {
+            FnStubs.insert(Name);
+            printSuffixedName(Name, "$stub");
+          }
+        } else if (GV->hasHiddenVisibility()) {
+          if (!GV->isDeclaration() && !GV->hasCommonLinkage())
+            // Definition is not definitely in the current translation unit.
+            O << Name;
+          else {
+            HiddenGVStubs.insert(Name);
+            printSuffixedName(Name, "$non_lazy_ptr");
+          }
+        } else {
+          GVStubs.insert(Name);
+          printSuffixedName(Name, "$non_lazy_ptr");
+        }
+      } else {
+        if (GV->hasDLLImportLinkage())
+          O << "__imp_";
+        O << Name;
+      }
+    } else {
+      if (GV->hasDLLImportLinkage()) {
+        O << "__imp_";
+      }
+      O << Name;
+      
+      if (shouldPrintPLT(TM, Subtarget)) {
+        // Assemble call via PLT for externally visible symbols
+        if (!GV->hasHiddenVisibility() && !GV->hasProtectedVisibility() &&
+            !GV->hasLocalLinkage())
+          O << "@PLT";
+      }
+      if (Subtarget->isTargetCygMing() && GV->isDeclaration())
+        // Save function name for later type emission
+        FnStubs.insert(Name);
+    }
+    
+    if (GV->hasExternalWeakLinkage())
+      ExtWeakSymbols.insert(GV);
+    
+    printOffset(MO.getOffset());
+    
+    if (needCloseParen)
+      O << ')';
+    return;
+  }
+      
+  case MachineOperand::MO_ExternalSymbol: {
+    bool needCloseParen = false;
+    std::string Name(TAI->getGlobalPrefix());
+    Name += MO.getSymbolName();
+    // Print function stub suffix unless it's Mac OS X 10.5 and up.
+    if (shouldPrintStub(TM, Subtarget) && 
+        !(Subtarget->isTargetDarwin() && Subtarget->getDarwinVers() >= 9)) {
+      FnStubs.insert(Name);
+      printSuffixedName(Name, "$stub");
+      return;
+    }
+    
+    if (Name[0] == '$') {
+      // The name begins with a dollar-sign. In order to avoid having it look
+      // like an integer immediate to the assembler, enclose it in parens.
+      O << '(';
+      needCloseParen = true;
+    }
+    
+    O << Name;
+    
+    if (shouldPrintPLT(TM, Subtarget)) {
+      std::string GOTName(TAI->getGlobalPrefix());
+      GOTName+="_GLOBAL_OFFSET_TABLE_";
+      if (Name == GOTName)
+        // HACK! Emit extra offset to PC during printing GOT offset to
+        // compensate for the size of popl instruction. The resulting code
+        // should look like:
+        //   call .piclabel
+        // piclabel:
+        //   popl %some_register
+        //   addl $_GLOBAL_ADDRESS_TABLE_ + [.-piclabel], %some_register
+        O << " + [.-"
+          << getPICLabelString(getFunctionNumber(), TAI, Subtarget) << ']';
+      
+      O << "@PLT";
+    }
+    
+    if (needCloseParen)
+      O << ')';
+    
+    return;
+  }
+  }
+}
+
 void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
                                     const char *Modifier, bool NotRIPRel) {
   const MachineOperand &MO = MI->getOperand(OpNo);
@@ -312,14 +447,10 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
 
   case MachineOperand::MO_Immediate:
     if (!Modifier || (strcmp(Modifier, "debug") &&
-                      strcmp(Modifier, "mem") &&
-                      strcmp(Modifier, "call")))
+                      strcmp(Modifier, "mem")))
       O << '$';
     O << MO.getImm();
     return;
-  case MachineOperand::MO_MachineBasicBlock:
-    printBasicBlockLabel(MO.getMBB(), false, false, VerboseAsm);
-    return;
   case MachineOperand::MO_JumpTableIndex: {
     bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
     if (!isMemOp) O << '$';
@@ -359,8 +490,7 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
     return;
   }
   case MachineOperand::MO_GlobalAddress: {
-    bool isCallOp = Modifier && !strcmp(Modifier, "call");
-    bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
+    bool isMemOp = Modifier && !strcmp(Modifier, "mem");
     bool needCloseParen = false;
 
     const GlobalValue *GV = MO.getGlobal();
@@ -369,7 +499,7 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
       // If GV is an alias then use the aliasee for determining
       // thread-localness.
       if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
-        GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
+        GVar =dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
     }
 
     bool isThreadLocal = GVar && GVar->isThreadLocal();
@@ -377,7 +507,7 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
     std::string Name = Mang->getValueName(GV);
     decorateName(Name, GV);
 
-    if (!isMemOp && !isCallOp)
+    if (!isMemOp)
       O << '$';
     else if (Name[0] == '$') {
       // The name begins with a dollar-sign. In order to avoid having it look
@@ -391,15 +521,7 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
       // non-lazily-resolved stubs
       if (GV->isDeclaration() || GV->isWeakForLinker()) {
         // Dynamically-resolved functions need a stub for the function.
-        if (isCallOp && isa<Function>(GV)) {
-          // Function stubs are no longer needed for Mac OS X 10.5 and up.
-          if (Subtarget->isTargetDarwin() && Subtarget->getDarwinVers() >= 9) {
-            O << Name;
-          } else {
-            FnStubs.insert(Name);
-            printSuffixedName(Name, "$stub");
-          }
-        } else if (GV->hasHiddenVisibility()) {
+        if (GV->hasHiddenVisibility()) {
           if (!GV->isDeclaration() && !GV->hasCommonLinkage())
             // Definition is not definitely in the current translation unit.
             O << Name;
@@ -417,25 +539,12 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
         O << Name;
       }
 
-      if (!isCallOp && TM.getRelocationModel() == Reloc::PIC_)
+      if (TM.getRelocationModel() == Reloc::PIC_)
         O << '-' << getPICLabelString(getFunctionNumber(), TAI, Subtarget);
     } else {
-      if (GV->hasDLLImportLinkage()) {
+      if (GV->hasDLLImportLinkage())
         O << "__imp_";
-      }
       O << Name;
-
-      if (isCallOp) {
-        if (shouldPrintPLT(TM, Subtarget)) {
-          // Assemble call via PLT for externally visible symbols
-          if (!GV->hasHiddenVisibility() && !GV->hasProtectedVisibility() &&
-              !GV->hasLocalLinkage())
-            O << "@PLT";
-        }
-        if (Subtarget->isTargetCygMing() && GV->isDeclaration())
-          // Save function name for later type emission
-          FnStubs.insert(Name);
-      }
     }
 
     if (GV->hasExternalWeakLinkage())
@@ -443,6 +552,10 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
 
     printOffset(MO.getOffset());
 
+    if (needCloseParen)
+      O << ')';
+    
+    bool isRIPRelative = false;
     if (isThreadLocal) {
       TLSModel::Model model = getTLSModel(GVar, TM.getRelocationModel());
       switch (model) {
@@ -456,7 +569,8 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
       case TLSModel::InitialExec:
         if (Subtarget->is64Bit()) {
           assert (!NotRIPRel);
-          O << "@GOTTPOFF(%rip)";
+          O << "@GOTTPOFF";
+          isRIPRelative = true;
         } else {
           O << "@INDNTPOFF";
         }
@@ -476,43 +590,33 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
           O << "@GOT";
         else
           O << "@GOTOFF";
-      } else if (Subtarget->isPICStyleRIPRel() && !NotRIPRel) {
+      } else if (Subtarget->isPICStyleRIPRel() &&
+                 !NotRIPRel) {
         if (TM.getRelocationModel() != Reloc::Static) {
           if (Subtarget->GVRequiresExtraLoad(GV, TM, false))
             O << "@GOTPCREL";
-
-          if (needCloseParen) {
-            needCloseParen = false;
-            O << ')';
-          }
         }
-
-        // Use rip when possible to reduce code size, except when
-        // index or base register are also part of the address. e.g.
-        // foo(%rip)(%rcx,%rax,4) is not legal
-        O << "(%rip)";
+        
+        isRIPRelative = true;
       }
     }
 
-    if (needCloseParen)
-      O << ')';
-
+    // Use rip when possible to reduce code size, except when
+    // index or base register are also part of the address. e.g.
+    // foo(%rip)(%rcx,%rax,4) is not legal.
+    if (isRIPRelative)
+      O << "(%rip)";
+    
     return;
   }
   case MachineOperand::MO_ExternalSymbol: {
-    bool isCallOp = Modifier && !strcmp(Modifier, "call");
     bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
     bool needCloseParen = false;
     std::string Name(TAI->getGlobalPrefix());
     Name += MO.getSymbolName();
+
     // Print function stub suffix unless it's Mac OS X 10.5 and up.
-    if (isCallOp && shouldPrintStub(TM, Subtarget) && 
-        !(Subtarget->isTargetDarwin() && Subtarget->getDarwinVers() >= 9)) {
-      FnStubs.insert(Name);
-      printSuffixedName(Name, "$stub");
-      return;
-    }
-    if (!isMemOp && !isCallOp)
+    if (!isMemOp)
       O << '$';
     else if (Name[0] == '$') {
       // The name begins with a dollar-sign. In order to avoid having it look
@@ -536,17 +640,13 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
         //   addl $_GLOBAL_ADDRESS_TABLE_ + [.-piclabel], %some_register
         O << " + [.-"
           << getPICLabelString(getFunctionNumber(), TAI, Subtarget) << ']';
-
-      if (isCallOp)
-        O << "@PLT";
     }
 
     if (needCloseParen)
       O << ')';
 
-    if (!isCallOp && Subtarget->isPICStyleRIPRel())
+    if (Subtarget->isPICStyleRIPRel())
       O << "(%rip)";
-
     return;
   }
   default:
@@ -673,8 +773,7 @@ void X86ATTAsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI,
     printBasicBlockLabel(MBB, false, false, false);
 }
 
-bool X86ATTAsmPrinter::printAsmMRegister(const MachineOperand &MO,
-                                         const char Mode) {
+bool X86ATTAsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode) {
   unsigned Reg = MO.getReg();
   switch (Mode) {
   default: return true;  // Unknown mode.
@@ -758,38 +857,85 @@ bool X86ATTAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
   return false;
 }
 
+static void lower_lea64_32mem(MCInst *MI, unsigned OpNo) {
+  // Convert registers in the addr mode according to subreg64.
+  for (unsigned i = 0; i != 4; ++i) {
+    if (!MI->getOperand(i).isReg()) continue;
+    
+    unsigned Reg = MI->getOperand(i).getReg();
+    if (Reg == 0) continue;
+    
+    MI->getOperand(i).setReg(getX86SubSuperRegister(Reg, MVT::i64));
+  }
+}
+
 /// printMachineInstruction -- Print out a single X86 LLVM instruction MI in
 /// AT&T syntax to the current output stream.
 ///
 void X86ATTAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
   ++EmittedInsts;
 
+  if (NewAsmPrinter) {
+    if (MI->getOpcode() == TargetInstrInfo::INLINEASM) {
+      O << "\t";
+      printInlineAsm(MI);
+      return;
+    } else if (MI->isLabel()) {
+      printLabel(MI);
+      return;
+    } else if (MI->getOpcode() == TargetInstrInfo::DECLARE) {
+      printDeclare(MI);
+      return;
+    } else if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) {
+      printImplicitDef(MI);
+      return;
+    }
+    
+    O << "NEW: ";
+    MCInst TmpInst;
+    
+    TmpInst.setOpcode(MI->getOpcode());
+    
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = MI->getOperand(i);
+      
+      MCOperand MCOp;
+      if (MO.isReg()) {
+        MCOp.MakeReg(MO.getReg());
+      } else if (MO.isImm()) {
+        MCOp.MakeImm(MO.getImm());
+      } else if (MO.isMBB()) {
+        MCOp.MakeMBBLabel(getFunctionNumber(), MO.getMBB()->getNumber());
+      } else {
+        assert(0 && "Unimp");
+      }
+      
+      TmpInst.addOperand(MCOp);
+    }
+    
+    switch (TmpInst.getOpcode()) {
+    case X86::LEA64_32r:
+      // Handle the 'subreg rewriting' for the lea64_32mem operand.
+      lower_lea64_32mem(&TmpInst, 1);
+      break;
+    }
+    
+    // FIXME: Convert TmpInst.
+    printInstruction(&TmpInst);
+    O << "OLD: ";
+  }
+  
   // Call the autogenerated instruction printer routines.
   printInstruction(MI);
 }
 
 /// doInitialization
 bool X86ATTAsmPrinter::doInitialization(Module &M) {
-
-  bool Result = AsmPrinter::doInitialization(M);
-
-  if (TAI->doesSupportDebugInformation()) {
-    // Let PassManager know we need debug information and relay
-    // the MachineModuleInfo address on to DwarfWriter.
-    // AsmPrinter::doInitialization did this analysis.
+  if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling()) 
     MMI = getAnalysisIfAvailable<MachineModuleInfo>();
-    DW = getAnalysisIfAvailable<DwarfWriter>();
-    DW->BeginModule(&M, MMI, O, this, TAI);
-  }
-
-  // Darwin wants symbols to be quoted if they have complex names.
-  if (Subtarget->isTargetDarwin())
-    Mang->setUseQuotes(true);
-
-  return Result;
+  return AsmPrinter::doInitialization(M);
 }
 
-
 void X86ATTAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
   const TargetData *TD = TM.getTargetData();
 
@@ -1040,8 +1186,8 @@ bool X86ATTAsmPrinter::doFinalization(Module &M) {
     }
 
     // Emit final debug information.
-    DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>();
-    DW->EndModule();
+    if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling())
+      DW->EndModule();
 
     // Funny Darwin hack: This flag tells the linker that no global symbols
     // contain code that falls through to other global symbols (e.g. the obvious
@@ -1060,12 +1206,12 @@ bool X86ATTAsmPrinter::doFinalization(Module &M) {
     }
 
     // Emit final debug information.
-    DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>();
-    DW->EndModule();
+    if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling())
+      DW->EndModule();
   } else if (Subtarget->isTargetELF()) {
     // Emit final debug information.
-    DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>();
-    DW->EndModule();
+    if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling())
+      DW->EndModule();
   }
 
   return AsmPrinter::doFinalization(M);
diff --git a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h
index 5b40e73bcb66..68a6bc80cebb 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h
@@ -27,16 +27,16 @@
 namespace llvm {
 
 class MachineJumpTableInfo;
+class MCInst;
 
 class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter {
-  DwarfWriter *DW;
   MachineModuleInfo *MMI;
   const X86Subtarget *Subtarget;
  public:
   explicit X86ATTAsmPrinter(raw_ostream &O, X86TargetMachine &TM,
                             const TargetAsmInfo *T, CodeGenOpt::Level OL,
                             bool V)
-    : AsmPrinter(O, TM, T, OL, V), DW(0), MMI(0) {
+    : AsmPrinter(O, TM, T, OL, V), MMI(0) {
     Subtarget = &TM.getSubtarget<X86Subtarget>();
   }
 
@@ -63,10 +63,62 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter {
   /// machine instruction was sufficiently described to print it, otherwise it
   /// returns false.
   bool printInstruction(const MachineInstr *MI);
+  
+  
+  // New MCInst printing stuff.
+  bool printInstruction(const MCInst *MI);
+
+  void printOperand(const MCInst *MI, unsigned OpNo,
+                    const char *Modifier = 0, bool NotRIPRel = false);
+  void printMemReference(const MCInst *MI, unsigned Op);
+  void printLeaMemReference(const MCInst *MI, unsigned Op);
+  void printSSECC(const MCInst *MI, unsigned Op);
+  void printPICLabel(const MCInst *MI, unsigned Op);
+  void print_pcrel_imm(const MCInst *MI, unsigned OpNo);
+  
+  void printi8mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printi16mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printi32mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printi64mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printi128mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printf32mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printf64mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printf80mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printf128mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printlea32mem(const MCInst *MI, unsigned OpNo) {
+    printLeaMemReference(MI, OpNo);
+  }
+  void printlea64mem(const MCInst *MI, unsigned OpNo) {
+    printLeaMemReference(MI, OpNo);
+  }
+  void printlea64_32mem(const MCInst *MI, unsigned OpNo) {
+    printLeaMemReference(MI, OpNo);
+  }
+  
+  
 
   // These methods are used by the tablegen'erated instruction printer.
   void printOperand(const MachineInstr *MI, unsigned OpNo,
                     const char *Modifier = 0, bool NotRIPRel = false);
+  void print_pcrel_imm(const MachineInstr *MI, unsigned OpNo);
   void printi8mem(const MachineInstr *MI, unsigned OpNo) {
     printMemReference(MI, OpNo);
   }
@@ -104,7 +156,7 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter {
     printLeaMemReference(MI, OpNo, "subreg64");
   }
 
-  bool printAsmMRegister(const MachineOperand &MO, const char Mode);
+  bool printAsmMRegister(const MachineOperand &MO, char Mode);
   bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
                        unsigned AsmVariant, const char *ExtraCode);
   bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
new file mode 100644
index 000000000000..9d50edcd6380
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
@@ -0,0 +1,143 @@
+//===-- X86ATTInstPrinter.cpp - AT&T assembly instruction printing --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes code for rendering MCInst instances as AT&T-style
+// assembly.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "llvm/MC/MCInst.h"
+#include "X86ATTAsmPrinter.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+// Include the auto-generated portion of the assembly writer.
+#define MachineInstr MCInst
+#define NO_ASM_WRITER_BOILERPLATE
+#include "X86GenAsmWriter.inc"
+#undef MachineInstr
+
+void X86ATTAsmPrinter::printSSECC(const MCInst *MI, unsigned Op) {
+  switch (MI->getOperand(Op).getImm()) {
+  default: assert(0 && "Invalid ssecc argument!");
+  case 0: O << "eq"; break;
+  case 1: O << "lt"; break;
+  case 2: O << "le"; break;
+  case 3: O << "unord"; break;
+  case 4: O << "neq"; break;
+  case 5: O << "nlt"; break;
+  case 6: O << "nle"; break;
+  case 7: O << "ord"; break;
+  }
+}
+
+
+void X86ATTAsmPrinter::printPICLabel(const MCInst *MI, unsigned Op) {
+  assert(0 &&
+         "This is only used for MOVPC32r, should lower before asm printing!");
+}
+
+
+/// print_pcrel_imm - This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value.  These print slightly differently, for
+/// example, a $ is not emitted.
+void X86ATTAsmPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  
+  if (Op.isImm())
+    O << Op.getImm();
+  else if (Op.isMBBLabel())
+    // FIXME: Keep in sync with printBasicBlockLabel.  printBasicBlockLabel
+    // should eventually call into this code, not the other way around.
+    O << TAI->getPrivateGlobalPrefix() << "BB" << Op.getMBBLabelFunction()
+      << '_' << Op.getMBBLabelBlock();
+  else
+    assert(0 && "Unknown pcrel immediate operand");
+}
+
+
+void X86ATTAsmPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                    const char *Modifier, bool NotRIPRel) {
+  assert(Modifier == 0 && "Modifiers should not be used");
+  
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    O << '%';
+    unsigned Reg = Op.getReg();
+#if 0
+    if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
+      MVT VT = (strcmp(Modifier+6,"64") == 0) ?
+      MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 :
+                  ((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8));
+      Reg = getX86SubSuperRegister(Reg, VT);
+    }
+#endif
+    O << TRI->getAsmName(Reg);
+    return;
+  } else if (Op.isImm()) {
+    //if (!Modifier || (strcmp(Modifier, "debug") && strcmp(Modifier, "mem")))
+    O << '$';
+    O << Op.getImm();
+    return;
+  }
+  
+  O << "<<UNKNOWN OPERAND KIND>>";
+}
+
+void X86ATTAsmPrinter::printLeaMemReference(const MCInst *MI, unsigned Op) {
+  bool NotRIPRel = false;
+
+  const MCOperand &BaseReg  = MI->getOperand(Op);
+  const MCOperand &IndexReg = MI->getOperand(Op+2);
+  const MCOperand &DispSpec = MI->getOperand(Op+3);
+  
+  NotRIPRel |= IndexReg.getReg() || BaseReg.getReg();
+  if (DispSpec.isImm()) {
+    int64_t DispVal = DispSpec.getImm();
+    if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg()))
+      O << DispVal;
+  } else {
+    abort();
+    //assert(DispSpec.isGlobal() || DispSpec.isCPI() ||
+    //       DispSpec.isJTI() || DispSpec.isSymbol());
+    //printOperand(MI, Op+3, "mem", NotRIPRel);
+  }
+  
+  if (IndexReg.getReg() || BaseReg.getReg()) {
+    // There are cases where we can end up with ESP/RSP in the indexreg slot.
+    // If this happens, swap the base/index register to support assemblers that
+    // don't work when the index is *SP.
+    // FIXME: REMOVE THIS.
+    assert(IndexReg.getReg() != X86::ESP && IndexReg.getReg() != X86::RSP);
+    
+    O << '(';
+    if (BaseReg.getReg())
+      printOperand(MI, Op);
+    
+    if (IndexReg.getReg()) {
+      O << ',';
+      printOperand(MI, Op+2);
+      unsigned ScaleVal = MI->getOperand(Op+1).getImm();
+      if (ScaleVal != 1)
+        O << ',' << ScaleVal;
+    }
+    O << ')';
+  }
+}
+
+void X86ATTAsmPrinter::printMemReference(const MCInst *MI, unsigned Op) {
+  const MCOperand &Segment = MI->getOperand(Op+4);
+  if (Segment.getReg()) {
+    printOperand(MI, Op+4);
+    O << ':';
+  }
+  printLeaMemReference(MI, Op);
+}
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index c874849dc18e..a39203b19c24 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -29,13 +29,11 @@ FunctionPass *llvm::createX86CodePrinterPass(raw_ostream &o,
                                              bool verbose) {
   const X86Subtarget *Subtarget = &tm.getSubtarget<X86Subtarget>();
 
-  if (Subtarget->isFlavorIntel()) {
+  if (Subtarget->isFlavorIntel())
     return new X86IntelAsmPrinter(o, tm, tm.getTargetAsmInfo(),
                                   OptLevel, verbose);
-  } else {
-    return new X86ATTAsmPrinter(o, tm, tm.getTargetAsmInfo(),
-                                OptLevel, verbose);
-  }
+  return new X86ATTAsmPrinter(o, tm, tm.getTargetAsmInfo(),
+                              OptLevel, verbose);
 }
 
 namespace {
@@ -48,3 +46,9 @@ namespace {
 
 extern "C" int X86AsmPrinterForceLink;
 int X86AsmPrinterForceLink = 0;
+
+// Force static initialization when called from
+// llvm/InitializeAllAsmPrinters.h
+namespace llvm {
+  void InitializeX86AsmPrinter() { }
+}
diff --git a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp
index 659934930d46..ceae7bebc237 100644
--- a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp
@@ -223,9 +223,6 @@ void X86IntelAsmPrinter::printOp(const MachineOperand &MO,
   case MachineOperand::MO_Immediate:
     O << MO.getImm();
     return;
-  case MachineOperand::MO_MachineBasicBlock:
-    printBasicBlockLabel(MO.getMBB());
-    return;
   case MachineOperand::MO_JumpTableIndex: {
     bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
     if (!isMemOp) O << "OFFSET ";
@@ -243,14 +240,13 @@ void X86IntelAsmPrinter::printOp(const MachineOperand &MO,
     return;
   }
   case MachineOperand::MO_GlobalAddress: {
-    bool isCallOp = Modifier && !strcmp(Modifier, "call");
     bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
     GlobalValue *GV = MO.getGlobal();
     std::string Name = Mang->getValueName(GV);
 
     decorateName(Name, GV);
 
-    if (!isMemOp && !isCallOp) O << "OFFSET ";
+    if (!isMemOp) O << "OFFSET ";
     if (GV->hasDLLImportLinkage()) {
       // FIXME: This should be fixed with full support of stdcall & fastcall
       // CC's
@@ -261,8 +257,6 @@ void X86IntelAsmPrinter::printOp(const MachineOperand &MO,
     return;
   }
   case MachineOperand::MO_ExternalSymbol: {
-    bool isCallOp = Modifier && !strcmp(Modifier, "call");
-    if (!isCallOp) O << "OFFSET ";
     O << TAI->getGlobalPrefix() << MO.getSymbolName();
     return;
   }
@@ -271,6 +265,39 @@ void X86IntelAsmPrinter::printOp(const MachineOperand &MO,
   }
 }
 
+void X86IntelAsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo){
+  const MachineOperand &MO = MI->getOperand(OpNo);
+  switch (MO.getType()) {
+  default: assert(0 && "Unknown pcrel immediate operand");
+  case MachineOperand::MO_Immediate:
+    O << MO.getImm();
+    return;
+  case MachineOperand::MO_MachineBasicBlock:
+    printBasicBlockLabel(MO.getMBB());
+    return;
+    
+  case MachineOperand::MO_GlobalAddress: {
+    GlobalValue *GV = MO.getGlobal();
+    std::string Name = Mang->getValueName(GV);
+    decorateName(Name, GV);
+    
+    if (GV->hasDLLImportLinkage()) {
+      // FIXME: This should be fixed with full support of stdcall & fastcall
+      // CC's
+      O << "__imp_";
+    }
+    O << Name;
+    printOffset(MO.getOffset());
+    return;
+  }
+
+  case MachineOperand::MO_ExternalSymbol:
+    O << TAI->getGlobalPrefix() << MO.getSymbolName();
+    return;
+  }
+}
+
+
 void X86IntelAsmPrinter::printLeaMemReference(const MachineInstr *MI,
                                               unsigned Op,
                                               const char *Modifier) {
@@ -339,8 +366,8 @@ void X86IntelAsmPrinter::printPICJumpTableSetLabel(unsigned uid,
 }
 
 void X86IntelAsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op) {
-  O << "\"L" << getFunctionNumber() << "$pb\"\n";
-  O << "\"L" << getFunctionNumber() << "$pb\":";
+  O << "L" << getFunctionNumber() << "$pb\n";
+  O << "L" << getFunctionNumber() << "$pb:";
 }
 
 bool X86IntelAsmPrinter::printAsmMRegister(const MachineOperand &MO,
@@ -362,7 +389,7 @@ bool X86IntelAsmPrinter::printAsmMRegister(const MachineOperand &MO,
     break;
   }
 
-  O << '%' << TRI->getName(Reg);
+  O << TRI->getName(Reg);
   return false;
 }
 
@@ -414,7 +441,7 @@ bool X86IntelAsmPrinter::doInitialization(Module &M) {
 
   Mang->markCharUnacceptable('.');
 
-  O << "\t.686\n\t.model flat\n\n";
+  O << "\t.686\n\t.MMX\n\t.XMM\n\t.model flat\n\n";
 
   // Emit declarations for external functions.
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
@@ -422,7 +449,7 @@ bool X86IntelAsmPrinter::doInitialization(Module &M) {
       std::string Name = Mang->getValueName(I);
       decorateName(Name, I);
 
-      O << "\textern " ;
+      O << "\tEXTERN " ;
       if (I->hasDLLImportLinkage()) {
         O << "__imp_";
       }
@@ -436,7 +463,7 @@ bool X86IntelAsmPrinter::doInitialization(Module &M) {
     if (I->isDeclaration()) {
       std::string Name = Mang->getValueName(I);
 
-      O << "\textern " ;
+      O << "\tEXTERN " ;
       if (I->hasDLLImportLinkage()) {
         O << "__imp_";
       }
@@ -471,14 +498,14 @@ bool X86IntelAsmPrinter::doFinalization(Module &M) {
     case GlobalValue::WeakAnyLinkage:
     case GlobalValue::WeakODRLinkage:
       SwitchToDataSection("");
-      O << name << "?\tsegment common 'COMMON'\n";
+      O << name << "?\tSEGEMNT PARA common 'COMMON'\n";
       bCustomSegment = true;
       // FIXME: the default alignment is 16 bytes, but 1, 2, 4, and 256
       // are also available.
       break;
     case GlobalValue::AppendingLinkage:
       SwitchToDataSection("");
-      O << name << "?\tsegment public 'DATA'\n";
+      O << name << "?\tSEGMENT PARA public 'DATA'\n";
       bCustomSegment = true;
       // FIXME: the default alignment is 16 bytes, but 1, 2, 4, and 256
       // are also available.
diff --git a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h
index 9520d982f692..04f259551170 100644
--- a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h
@@ -52,6 +52,9 @@ struct VISIBILITY_HIDDEN X86IntelAsmPrinter : public AsmPrinter {
       printOp(MO, Modifier);
     }
   }
+  
+  void print_pcrel_imm(const MachineInstr *MI, unsigned OpNo);
+
 
   void printi8mem(const MachineInstr *MI, unsigned OpNo) {
     O << "BYTE PTR ";
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index 3796aac57cb5..4464878ce217 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -1858,8 +1858,23 @@ Ideal output:
 	setne	%al
 	ret
 
-We could do this transformation in instcombine, but it's only clearly
-beneficial on platforms with a test instruction.
+This should definitely be done in instcombine, canonicalizing the range
+condition into a != condition.  We get this IR:
+
+define i32 @a(i32 %x) nounwind readnone {
+entry:
+	%0 = and i32 %x, 127		; <i32> [#uses=1]
+	%1 = icmp ugt i32 %0, 31		; <i1> [#uses=1]
+	%2 = zext i1 %1 to i32		; <i32> [#uses=1]
+	ret i32 %2
+}
+
+Instcombine prefers to strength reduce relational comparisons to equality
+comparisons when possible, this should be another case of that.  This could
+be handled pretty easily in InstCombiner::visitICmpInstWithInstAndIntCst, but it
+looks like InstCombiner::visitICmpInstWithInstAndIntCst should really already
+be redesigned to use ComputeMaskedBits and friends.
+
 
 //===---------------------------------------------------------------------===//
 Testcase:
@@ -1880,20 +1895,40 @@ Ideal output:
 
 Testcase:
 int x(int a) { return (a & 0x80) ? 0x100 : 0; }
+int y(int a) { return (a & 0x80) *2; }
 
-Current output:
+Current:
 	testl	$128, 4(%esp)
 	setne	%al
 	movzbl	%al, %eax
 	shll	$8, %eax
 	ret
 
-Ideal output:
+Better:
 	movl	4(%esp), %eax
 	addl	%eax, %eax
 	andl	$256, %eax
 	ret
 
-We generally want to fold shifted tests of a single bit into a shift+and on x86.
+This is another general instcombine transformation that is profitable on all
+targets.  In LLVM IR, these functions look like this:
+
+define i32 @x(i32 %a) nounwind readnone {
+entry:
+	%0 = and i32 %a, 128
+	%1 = icmp eq i32 %0, 0
+	%iftmp.0.0 = select i1 %1, i32 0, i32 256
+	ret i32 %iftmp.0.0
+}
+
+define i32 @y(i32 %a) nounwind readnone {
+entry:
+	%0 = shl i32 %a, 1
+	%1 = and i32 %0, 256
+	ret i32 %1
+}
+
+Replacing an icmp+select with a shift should always be considered profitable in
+instcombine.
 
 //===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 0f2fbcc98694..ed4eb444e9cf 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -991,8 +991,13 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
   case X86::FpSET_ST0_32:
   case X86::FpSET_ST0_64:
   case X86::FpSET_ST0_80:
-    assert((StackTop == 1 || StackTop == 2)
-           && "Stack should have one or two element on it to return!");
+    // FpSET_ST0_80 is generated by copyRegToReg for both function return
+    // and inline assembly with the "st" constrain. In the latter case,
+    // it is possible for FP0 to be alive after this instruction.
+    if (!MI->killsRegister(X86::FP0)) {
+      // Duplicate ST0
+      duplicateToTop(0, 0, I);
+    }
     --StackTop;   // "Forget" we have something on the top of stack!
     break;
   case X86::FpSET_ST1_32:
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index b003efddd499..9cedafc8d934 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -167,6 +167,8 @@ namespace {
                     SDValue &Segment);
     bool SelectLEAAddr(SDValue Op, SDValue N, SDValue &Base,
                        SDValue &Scale, SDValue &Index, SDValue &Disp);
+    bool SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base,
+                       SDValue &Scale, SDValue &Index, SDValue &Disp);
     bool SelectScalarSSELoad(SDValue Op, SDValue Pred,
                              SDValue N, SDValue &Base, SDValue &Scale,
                              SDValue &Index, SDValue &Disp,
@@ -1293,6 +1295,32 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N,
   return false;
 }
 
+/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
+bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base,
+                                        SDValue &Scale, SDValue &Index,
+                                        SDValue &Disp) {
+  assert(Op.getOpcode() == X86ISD::TLSADDR);
+  assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
+  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
+  
+  X86ISelAddressMode AM;
+  AM.GV = GA->getGlobal();
+  AM.Disp += GA->getOffset();
+  AM.Base.Reg = CurDAG->getRegister(0, N.getValueType());
+  
+  if (N.getValueType() == MVT::i32) {
+    AM.Scale = 1;
+    AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
+  } else {
+    AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
+  }
+  
+  SDValue Segment;
+  getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
+  return true;
+}
+
+
 bool X86DAGToDAGISel::TryFoldLoad(SDValue P, SDValue N,
                                   SDValue &Base, SDValue &Scale,
                                   SDValue &Index, SDValue &Disp,
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 36e3ab2e82ac..8d0ea662dc0e 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -788,8 +788,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setOperationAction(ISD::USUBO, MVT::i64, Custom);
   setOperationAction(ISD::SMULO, MVT::i32, Custom);
   setOperationAction(ISD::SMULO, MVT::i64, Custom);
-  setOperationAction(ISD::UMULO, MVT::i32, Custom);
-  setOperationAction(ISD::UMULO, MVT::i64, Custom);
 
   if (!Subtarget->is64Bit()) {
     // These libcalls are not available in 32-bit.
@@ -4439,9 +4437,8 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
 
   // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
   // exec)
-  SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
-                                             GA->getValueType(0),
-                                             GA->getOffset());
+  SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0),
+                                           GA->getOffset());
   SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, TGA);
 
   if (model == TLSModel::InitialExec)
@@ -8474,6 +8471,14 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
       }
     }
     return;
+  case 'K':
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+      if ((int8_t)C->getSExtValue() == C->getSExtValue()) {
+        Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+        break;
+      }
+    }
+    return;
   case 'N':
     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
       if (C->getZExtValue() <= 255) {
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index dc15e4aa4ee9..063913f5ae85 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -19,6 +19,14 @@
 
 // 64-bits but only 32 bits are significant.
 def i64i32imm  : Operand<i64>;
+
+// 64-bits but only 32 bits are significant, and those bits are treated as being
+// pc relative.
+def i64i32imm_pcrel : Operand<i64> {
+  let PrintMethod = "print_pcrel_imm";
+}
+
+
 // 64-bits but only 8 bits are significant.
 def i64i8imm   : Operand<i64>;
 
@@ -29,6 +37,7 @@ def lea64mem : Operand<i64> {
 
 def lea64_32mem : Operand<i32> {
   let PrintMethod = "printlea64_32mem";
+  let AsmOperandLowerMethod = "lower_lea64_32mem";
   let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm);
 }
 
@@ -39,6 +48,9 @@ def lea64addr : ComplexPattern<i64, 4, "SelectLEAAddr",
                         [add, mul, X86mul_imm, shl, or, frameindex, X86Wrapper],
                         []>;
 
+def tls64addr : ComplexPattern<i64, 4, "SelectTLSADDRAddr",
+                               [tglobaltlsaddr], []>;
+
 //===----------------------------------------------------------------------===//
 // Pattern fragments.
 //
@@ -113,9 +125,9 @@ let isCall = 1 in
     // NOTE: this pattern doesn't match "X86call imm", because we do not know
     // that the offset between an arbitrary immediate and the call will fit in
     // the 32-bit pcrel field that we have.
-    def CALL64pcrel32 : I<0xE8, RawFrm,
-                          (outs), (ins i64i32imm:$dst, variable_ops),
-                          "call\t${dst:call}", []>,
+    def CALL64pcrel32 : Ii32<0xE8, RawFrm,
+                          (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
+                          "call\t$dst", []>,
                         Requires<[In64BitMode]>;
     def CALL64r       : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
                           "call\t{*}$dst", [(X86call GR64:$dst)]>;
@@ -177,6 +189,15 @@ def PUSH64r  : I<0x50, AddRegFrm,
                  (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
 }
 
+let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in {
+def PUSH64i8   : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm), 
+                     "push{q}\t$imm", []>;
+def PUSH64i16  : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), 
+                      "push{q}\t$imm", []>;
+def PUSH64i32  : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), 
+                      "push{q}\t$imm", []>;
+}
+
 let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1 in
 def POPFQ    : I<0x9D, RawFrm, (outs), (ins), "popf", []>, REX_W;
 let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1 in
@@ -1312,13 +1333,13 @@ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
             XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
             XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
     Uses = [RSP] in
-def TLS_addr64 : I<0, Pseudo, (outs), (ins i64imm:$sym),
+def TLS_addr64 : I<0, Pseudo, (outs), (ins lea64mem:$sym),
                    ".byte\t0x66; "
-                   "leaq\t${sym:mem}(%rip), %rdi; "
+                   "leaq\t$sym(%rip), %rdi; "
                    ".word\t0x6666; "
                    "rex64; "
                    "call\t__tls_get_addr@PLT",
-                  [(X86tlsaddr tglobaltlsaddr:$sym)]>,
+                  [(X86tlsaddr tls64addr:$sym)]>,
                   Requires<[In64BitMode]>;
 
 let AddedComplexity = 5 in
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 50ae41764151..2d8f55f40c3a 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -163,6 +163,11 @@ def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
 // X86 Operand Definitions.
 //
 
+def i32imm_pcrel : Operand<i32> {
+  let PrintMethod = "print_pcrel_imm";
+}
+
+
 // *mem - Operand definitions for the funky X86 addressing mode operands.
 //
 class X86MemOperand<string printMethod> : Operand<iPTR> {
@@ -206,8 +211,10 @@ def i16i8imm  : Operand<i16>;
 // 32-bits but only 8 bits are significant.
 def i32i8imm  : Operand<i32>;
 
-// Branch targets have OtherVT type.
-def brtarget : Operand<OtherVT>;
+// Branch targets have OtherVT type and print as pc-relative values.
+def brtarget : Operand<OtherVT> {
+  let PrintMethod = "print_pcrel_imm";
+}
 
 //===----------------------------------------------------------------------===//
 // X86 Complex Pattern Definitions.
@@ -217,6 +224,8 @@ def brtarget : Operand<OtherVT>;
 def addr      : ComplexPattern<iPTR, 5, "SelectAddr", [], []>;
 def lea32addr : ComplexPattern<i32, 4, "SelectLEAAddr",
                                [add, sub, mul, shl, or, frameindex], []>;
+def tls32addr : ComplexPattern<i32, 4, "SelectTLSADDRAddr",
+                               [tglobaltlsaddr], []>;
 
 //===----------------------------------------------------------------------===//
 // X86 Instruction Predicate Definitions.
@@ -561,8 +570,9 @@ let isCall = 1 in
               XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
               XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
       Uses = [ESP] in {
-    def CALLpcrel32 : Ii32<0xE8, RawFrm, (outs), (ins i32imm:$dst,variable_ops),
-                           "call\t${dst:call}", []>;
+    def CALLpcrel32 : Ii32<0xE8, RawFrm,
+                           (outs), (ins i32imm_pcrel:$dst,variable_ops),
+                           "call\t$dst", []>;
     def CALL32r     : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops),
                         "call\t{*}$dst", [(X86call GR32:$dst)]>;
     def CALL32m     : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops),
@@ -587,7 +597,7 @@ def TCRETURNri : I<0, Pseudo, (outs), (ins GR32:$dst, i32imm:$offset, variable_o
 
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
 
-  def TAILJMPd : IBr<0xE9, (ins i32imm:$dst), "jmp\t${dst:call}  # TAILCALL",
+  def TAILJMPd : IBr<0xE9, (ins i32imm_pcrel:$dst), "jmp\t$dst  # TAILCALL",
                  []>;
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
   def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst  # TAILCALL",
@@ -611,6 +621,15 @@ let mayStore = 1 in
 def PUSH32r  : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>;
 }
 
+let Defs = [ESP], Uses = [ESP], neverHasSideEffects = 1, mayStore = 1 in {
+def PUSH32i8   : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm), 
+                     "push{l}\t$imm", []>;
+def PUSH32i16  : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), 
+                      "push{l}\t$imm", []>;
+def PUSH32i32  : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), 
+                      "push{l}\t$imm", []>;
+}
+
 let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, neverHasSideEffects=1 in
 def POPFD    : I<0x9D, RawFrm, (outs), (ins), "popf", []>;
 let Defs = [ESP], Uses = [ESP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in
@@ -1726,13 +1745,13 @@ let isTwoAddress = 0 in {
 let Defs = [EFLAGS] in {
 let Uses = [CL] in {
 def SHL8rCL  : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src),
-                 "shl{b}\t{%cl, $dst|$dst, %CL}",
+                 "shl{b}\t{%cl, $dst|$dst, CL}",
                  [(set GR8:$dst, (shl GR8:$src, CL))]>;
 def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src),
-                 "shl{w}\t{%cl, $dst|$dst, %CL}",
+                 "shl{w}\t{%cl, $dst|$dst, CL}",
                  [(set GR16:$dst, (shl GR16:$src, CL))]>, OpSize;
 def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src),
-                 "shl{l}\t{%cl, $dst|$dst, %CL}",
+                 "shl{l}\t{%cl, $dst|$dst, CL}",
                  [(set GR32:$dst, (shl GR32:$src, CL))]>;
 } // Uses = [CL]
 
@@ -1753,13 +1772,13 @@ def SHL32ri  : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
 let isTwoAddress = 0 in {
   let Uses = [CL] in {
   def SHL8mCL  : I<0xD2, MRM4m, (outs), (ins i8mem :$dst),
-                   "shl{b}\t{%cl, $dst|$dst, %CL}",
+                   "shl{b}\t{%cl, $dst|$dst, CL}",
                    [(store (shl (loadi8 addr:$dst), CL), addr:$dst)]>;
   def SHL16mCL : I<0xD3, MRM4m, (outs), (ins i16mem:$dst),
-                   "shl{w}\t{%cl, $dst|$dst, %CL}",
+                   "shl{w}\t{%cl, $dst|$dst, CL}",
                    [(store (shl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
   def SHL32mCL : I<0xD3, MRM4m, (outs), (ins i32mem:$dst),
-                   "shl{l}\t{%cl, $dst|$dst, %CL}",
+                   "shl{l}\t{%cl, $dst|$dst, CL}",
                    [(store (shl (loadi32 addr:$dst), CL), addr:$dst)]>;
   }
   def SHL8mi   : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src),
@@ -1788,13 +1807,13 @@ let isTwoAddress = 0 in {
 
 let Uses = [CL] in {
 def SHR8rCL  : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src),
-                 "shr{b}\t{%cl, $dst|$dst, %CL}",
+                 "shr{b}\t{%cl, $dst|$dst, CL}",
                  [(set GR8:$dst, (srl GR8:$src, CL))]>;
 def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src),
-                 "shr{w}\t{%cl, $dst|$dst, %CL}",
+                 "shr{w}\t{%cl, $dst|$dst, CL}",
                  [(set GR16:$dst, (srl GR16:$src, CL))]>, OpSize;
 def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src),
-                 "shr{l}\t{%cl, $dst|$dst, %CL}",
+                 "shr{l}\t{%cl, $dst|$dst, CL}",
                  [(set GR32:$dst, (srl GR32:$src, CL))]>;
 }
 
@@ -1822,14 +1841,14 @@ def SHR32r1  : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
 let isTwoAddress = 0 in {
   let Uses = [CL] in {
   def SHR8mCL  : I<0xD2, MRM5m, (outs), (ins i8mem :$dst),
-                   "shr{b}\t{%cl, $dst|$dst, %CL}",
+                   "shr{b}\t{%cl, $dst|$dst, CL}",
                    [(store (srl (loadi8 addr:$dst), CL), addr:$dst)]>;
   def SHR16mCL : I<0xD3, MRM5m, (outs), (ins i16mem:$dst),
-                   "shr{w}\t{%cl, $dst|$dst, %CL}",
+                   "shr{w}\t{%cl, $dst|$dst, CL}",
                    [(store (srl (loadi16 addr:$dst), CL), addr:$dst)]>,
                    OpSize;
   def SHR32mCL : I<0xD3, MRM5m, (outs), (ins i32mem:$dst),
-                   "shr{l}\t{%cl, $dst|$dst, %CL}",
+                   "shr{l}\t{%cl, $dst|$dst, CL}",
                    [(store (srl (loadi32 addr:$dst), CL), addr:$dst)]>;
   }
   def SHR8mi   : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src),
@@ -1857,13 +1876,13 @@ let isTwoAddress = 0 in {
 
 let Uses = [CL] in {
 def SAR8rCL  : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src),
-                 "sar{b}\t{%cl, $dst|$dst, %CL}",
+                 "sar{b}\t{%cl, $dst|$dst, CL}",
                  [(set GR8:$dst, (sra GR8:$src, CL))]>;
 def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src),
-                 "sar{w}\t{%cl, $dst|$dst, %CL}",
+                 "sar{w}\t{%cl, $dst|$dst, CL}",
                  [(set GR16:$dst, (sra GR16:$src, CL))]>, OpSize;
 def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src),
-                 "sar{l}\t{%cl, $dst|$dst, %CL}",
+                 "sar{l}\t{%cl, $dst|$dst, CL}",
                  [(set GR32:$dst, (sra GR32:$src, CL))]>;
 }
 
@@ -1892,13 +1911,13 @@ def SAR32r1  : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
 let isTwoAddress = 0 in {
   let Uses = [CL] in {
   def SAR8mCL  : I<0xD2, MRM7m, (outs), (ins i8mem :$dst),
-                   "sar{b}\t{%cl, $dst|$dst, %CL}",
+                   "sar{b}\t{%cl, $dst|$dst, CL}",
                    [(store (sra (loadi8 addr:$dst), CL), addr:$dst)]>;
   def SAR16mCL : I<0xD3, MRM7m, (outs), (ins i16mem:$dst),
-                   "sar{w}\t{%cl, $dst|$dst, %CL}",
+                   "sar{w}\t{%cl, $dst|$dst, CL}",
                    [(store (sra (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
   def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst), 
-                   "sar{l}\t{%cl, $dst|$dst, %CL}",
+                   "sar{l}\t{%cl, $dst|$dst, CL}",
                    [(store (sra (loadi32 addr:$dst), CL), addr:$dst)]>;
   }
   def SAR8mi   : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src),
@@ -1929,13 +1948,13 @@ let isTwoAddress = 0 in {
 // FIXME: provide shorter instructions when imm8 == 1
 let Uses = [CL] in {
 def ROL8rCL  : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src),
-                 "rol{b}\t{%cl, $dst|$dst, %CL}",
+                 "rol{b}\t{%cl, $dst|$dst, CL}",
                  [(set GR8:$dst, (rotl GR8:$src, CL))]>;
 def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src),
-                 "rol{w}\t{%cl, $dst|$dst, %CL}",
+                 "rol{w}\t{%cl, $dst|$dst, CL}",
                  [(set GR16:$dst, (rotl GR16:$src, CL))]>, OpSize;
 def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src),
-                 "rol{l}\t{%cl, $dst|$dst, %CL}",
+                 "rol{l}\t{%cl, $dst|$dst, CL}",
                  [(set GR32:$dst, (rotl GR32:$src, CL))]>;
 }
 
@@ -1963,13 +1982,13 @@ def ROL32r1  : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
 let isTwoAddress = 0 in {
   let Uses = [CL] in {
   def ROL8mCL  : I<0xD2, MRM0m, (outs), (ins i8mem :$dst),
-                   "rol{b}\t{%cl, $dst|$dst, %CL}",
+                   "rol{b}\t{%cl, $dst|$dst, CL}",
                    [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)]>;
   def ROL16mCL : I<0xD3, MRM0m, (outs), (ins i16mem:$dst),
-                   "rol{w}\t{%cl, $dst|$dst, %CL}",
+                   "rol{w}\t{%cl, $dst|$dst, CL}",
                    [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
   def ROL32mCL : I<0xD3, MRM0m, (outs), (ins i32mem:$dst),
-                   "rol{l}\t{%cl, $dst|$dst, %CL}",
+                   "rol{l}\t{%cl, $dst|$dst, CL}",
                    [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)]>;
   }
   def ROL8mi   : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src),
@@ -1998,13 +2017,13 @@ let isTwoAddress = 0 in {
 
 let Uses = [CL] in {
 def ROR8rCL  : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src),
-                 "ror{b}\t{%cl, $dst|$dst, %CL}",
+                 "ror{b}\t{%cl, $dst|$dst, CL}",
                  [(set GR8:$dst, (rotr GR8:$src, CL))]>;
 def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src),
-                 "ror{w}\t{%cl, $dst|$dst, %CL}",
+                 "ror{w}\t{%cl, $dst|$dst, CL}",
                  [(set GR16:$dst, (rotr GR16:$src, CL))]>, OpSize;
 def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src),
-                 "ror{l}\t{%cl, $dst|$dst, %CL}",
+                 "ror{l}\t{%cl, $dst|$dst, CL}",
                  [(set GR32:$dst, (rotr GR32:$src, CL))]>;
 }
 
@@ -2032,13 +2051,13 @@ def ROR32r1  : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
 let isTwoAddress = 0 in {
   let Uses = [CL] in {
   def ROR8mCL  : I<0xD2, MRM1m, (outs), (ins i8mem :$dst),
-                   "ror{b}\t{%cl, $dst|$dst, %CL}",
+                   "ror{b}\t{%cl, $dst|$dst, CL}",
                    [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)]>;
   def ROR16mCL : I<0xD3, MRM1m, (outs), (ins i16mem:$dst),
-                   "ror{w}\t{%cl, $dst|$dst, %CL}",
+                   "ror{w}\t{%cl, $dst|$dst, CL}",
                    [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
   def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst), 
-                   "ror{l}\t{%cl, $dst|$dst, %CL}",
+                   "ror{l}\t{%cl, $dst|$dst, CL}",
                    [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)]>;
   }
   def ROR8mi   : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src),
@@ -2070,17 +2089,17 @@ let isTwoAddress = 0 in {
 // Double shift instructions (generalizations of rotate)
 let Uses = [CL] in {
 def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                   "shld{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                   "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
                    [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>, TB;
 def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                   "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                   "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
                    [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>, TB;
 def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                   "shld{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                   "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
                    [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>,
                    TB, OpSize;
 def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                   "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                   "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
                    [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>,
                    TB, OpSize;
 }
@@ -2115,11 +2134,11 @@ def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
 let isTwoAddress = 0 in {
   let Uses = [CL] in {
   def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
-                     "shld{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                     "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
                      [(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL),
                        addr:$dst)]>, TB;
   def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
-                    "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                    "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
                     [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL),
                       addr:$dst)]>, TB;
   }
@@ -2138,11 +2157,11 @@ let isTwoAddress = 0 in {
 
   let Uses = [CL] in {
   def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
-                     "shld{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                     "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
                      [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL),
                        addr:$dst)]>, TB, OpSize;
   def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
-                    "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                    "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
                     [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL),
                       addr:$dst)]>, TB, OpSize;
   }
@@ -3095,11 +3114,11 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
             MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
             XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
             XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
-    Uses = [ESP, EBX] in
-def TLS_addr32 : I<0, Pseudo, (outs), (ins i32imm:$sym),
-                  "leal\t${sym:mem}(,%ebx,1), %eax; "
+    Uses = [ESP] in
+def TLS_addr32 : I<0, Pseudo, (outs), (ins lea32mem:$sym),
+                  "leal\t$sym, %eax; "
                   "call\t___tls_get_addr@PLT",
-                  [(X86tlsaddr tglobaltlsaddr:$sym)]>,
+                  [(X86tlsaddr tls32addr:$sym)]>,
                   Requires<[In32BitMode]>;
 
 let AddedComplexity = 5 in
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index b44c7a693ef7..5d6ef36414a5 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3027,6 +3027,12 @@ def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
           (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
 }
 
+// vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but
+// fall back to this for SSE1)
+def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
+          (SHUFPSrri VR128:$src2, VR128:$src1, 
+                     (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE1]>;
+
 // Set lowest element and zero upper elements.
 let AddedComplexity = 15 in
 def : Pat<(v2f64 (movl immAllZerosV_bc, VR128:$src)),
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 6c0074e62f2a..a2f319f9a7ab 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -662,6 +662,10 @@ void X86RegisterInfo::emitFrameMoves(MachineFunction &MF,
      TargetFrameInfo::StackGrowsUp ?
      TD->getPointerSize() : -TD->getPointerSize());
 
+  MachineLocation FPDst(hasFP(MF) ? FramePtr : StackPtr);
+  MachineLocation FPSrc(MachineLocation::VirtualFP);
+  Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
+
   if (StackSize) {
     // Show update of SP.
     if (hasFP(MF)) {
@@ -676,7 +680,7 @@ void X86RegisterInfo::emitFrameMoves(MachineFunction &MF,
       Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
     }
   } else {
-    //FIXME: Verify & implement for FP
+    // FIXME: Verify & implement for FP
     MachineLocation SPDst(StackPtr);
     MachineLocation SPSrc(StackPtr, stackGrowth);
     Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
@@ -711,10 +715,6 @@ void X86RegisterInfo::emitFrameMoves(MachineFunction &MF,
     MachineLocation FPSrc(FramePtr);
     Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
   }
-
-  MachineLocation FPDst(hasFP(MF) ? FramePtr : StackPtr);
-  MachineLocation FPSrc(MachineLocation::VirtualFP);
-  Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
 }
 
 
@@ -729,8 +729,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
   bool needsFrameMoves = (MMI && MMI->hasDebugInfo()) ||
                           !Fn->doesNotThrow() ||
                           UnwindTablesMandatory;
-  DebugLoc DL = (MBBI != MBB.end() ? MBBI->getDebugLoc() :
-                 DebugLoc::getUnknownLoc());
+  DebugLoc DL;
 
   // Prepare for frame info.
   unsigned FrameLabelId = 0;
@@ -822,13 +821,6 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
     NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
   }
 
-  unsigned ReadyLabelId = 0;
-  if (needsFrameMoves) {
-    // Mark effective beginning of when frame pointer is ready.
-    ReadyLabelId = MMI->NextLabelID();
-    BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId);
-  }
-
   // Skip the callee-saved push instructions.
   while (MBBI != MBB.end() &&
          (MBBI->getOpcode() == X86::PUSH32r ||
@@ -891,8 +883,13 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
       emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
   }
 
-  if (needsFrameMoves)
+  if (needsFrameMoves) {
+    unsigned ReadyLabelId = 0;
+    // Mark effective beginning of when frame pointer is ready.
+    ReadyLabelId = MMI->NextLabelID();
+    BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId);
     emitFrameMoves(MF, FrameLabelId, ReadyLabelId);
+  }
 }
 
 void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 46476f20400a..694b0ebac867 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -173,7 +173,7 @@ public:
   bool isPICStyleGOT() const { return PICStyle == PICStyles::GOT; }
   bool isPICStyleStub() const { return PICStyle == PICStyles::Stub; }
   bool isPICStyleRIPRel() const { return PICStyle == PICStyles::RIPRel; }
-  bool isPICStyleWinPIC() const { return PICStyle == PICStyles:: WinPIC; }
+  bool isPICStyleWinPIC() const { return PICStyle == PICStyles::WinPIC; }
   
   /// getDarwinVers - Return the darwin version number, 8 = tiger, 9 = leopard.
   unsigned getDarwinVers() const { return DarwinVers; }
diff --git a/lib/Target/X86/X86TargetAsmInfo.cpp b/lib/Target/X86/X86TargetAsmInfo.cpp
index 5dda5f4e49b0..f49ca15a05a4 100644
--- a/lib/Target/X86/X86TargetAsmInfo.cpp
+++ b/lib/Target/X86/X86TargetAsmInfo.cpp
@@ -44,40 +44,25 @@ X86DarwinTargetAsmInfo::X86DarwinTargetAsmInfo(const X86TargetMachine &TM):
 
   AlignmentIsInBytes = false;
   TextAlignFillValue = 0x90;
-  GlobalPrefix = "_";
+    
+    
   if (!is64Bit)
     Data64bitsDirective = 0;       // we can't emit a 64-bit unit
   ZeroDirective = "\t.space\t";  // ".space N" emits N zeros.
-  PrivateGlobalPrefix = "L";     // Marker for constant pool idxs
-  LessPrivateGlobalPrefix = "l";  // Marker for some ObjC metadata
-  BSSSection = 0;                       // no BSS section.
   ZeroFillDirective = "\t.zerofill\t";  // Uses .zerofill
   if (TM.getRelocationModel() != Reloc::Static)
     ConstantPoolSection = "\t.const_data";
   else
     ConstantPoolSection = "\t.const\n";
-  JumpTableDataSection = "\t.const\n";
-  CStringSection = "\t.cstring";
-  // FIXME: Why don't always use this section?
-  if (is64Bit) {
+  // FIXME: Why don't we always use this section?
+  if (is64Bit)
     SixteenByteConstantSection = getUnnamedSection("\t.literal16\n",
                                                    SectionFlags::Mergeable);
-  }
   LCOMMDirective = "\t.lcomm\t";
-  SwitchToSectionDirective = "\t.section ";
-  StringConstantPrefix = "\1LC";
   // Leopard and above support aligned common symbols.
   COMMDirectiveTakesAlignment = (Subtarget->getDarwinVers() >= 9);
   HasDotTypeDotSizeDirective = false;
-  HasSingleParameterDotFile = false;
   NonLocalEHFrameLabel = true;
-  if (TM.getRelocationModel() == Reloc::Static) {
-    StaticCtorsSection = ".constructor";
-    StaticDtorsSection = ".destructor";
-  } else {
-    StaticCtorsSection = ".mod_init_func";
-    StaticDtorsSection = ".mod_term_func";
-  }
   if (is64Bit) {
     PersonalityPrefix = "";
     PersonalitySuffix = "+4@GOTPCREL";
@@ -85,40 +70,18 @@ X86DarwinTargetAsmInfo::X86DarwinTargetAsmInfo(const X86TargetMachine &TM):
     PersonalityPrefix = "L";
     PersonalitySuffix = "$non_lazy_ptr";
   }
-  NeedsIndirectEncoding = true;
   InlineAsmStart = "## InlineAsm Start";
   InlineAsmEnd = "## InlineAsm End";
   CommentString = "##";
   SetDirective = "\t.set";
   PCSymbol = ".";
   UsedDirective = "\t.no_dead_strip\t";
-  WeakDefDirective = "\t.weak_definition ";
-  WeakRefDirective = "\t.weak_reference ";
-  HiddenDirective = "\t.private_extern ";
   ProtectedDirective = "\t.globl\t";
 
-  // In non-PIC modes, emit a special label before jump tables so that the
-  // linker can perform more accurate dead code stripping.
-  if (TM.getRelocationModel() != Reloc::PIC_) {
-    // Emit a local label that is preserved until the linker runs.
-    JumpTableSpecialLabelPrefix = "l";
-  }
-
   SupportsDebugInformation = true;
-  NeedsSet = true;
-  DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug";
-  DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug";
-  DwarfLineSection = ".section __DWARF,__debug_line,regular,debug";
-  DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug";
-  DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug";
-  DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug";
+
   DwarfDebugInlineSection = ".section __DWARF,__debug_inlined,regular,debug";
   DwarfUsesInlineInfoSection = true;
-  DwarfStrSection = ".section __DWARF,__debug_str,regular,debug";
-  DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug";
-  DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug";
-  DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug";
-  DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug";
 
   // Exceptions handling
   SupportsExceptionHandling = true;
@@ -176,7 +139,7 @@ X86ELFTargetAsmInfo::X86ELFTargetAsmInfo(const X86TargetMachine &TM):
   DwarfLocSection =     "\t.section\t.debug_loc,\"\",@progbits";
   DwarfARangesSection = "\t.section\t.debug_aranges,\"\",@progbits";
   DwarfRangesSection =  "\t.section\t.debug_ranges,\"\",@progbits";
-  DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits";
+  DwarfMacroInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits";
 
   // Exceptions handling
   SupportsExceptionHandling = true;
@@ -259,7 +222,7 @@ X86COFFTargetAsmInfo::X86COFFTargetAsmInfo(const X86TargetMachine &TM):
   DwarfLocSection =     "\t.section\t.debug_loc,\"dr\"";
   DwarfARangesSection = "\t.section\t.debug_aranges,\"dr\"";
   DwarfRangesSection =  "\t.section\t.debug_ranges,\"dr\"";
-  DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"dr\"";
+  DwarfMacroInfoSection = "\t.section\t.debug_macinfo,\"dr\"";
 }
 
 unsigned
@@ -340,8 +303,11 @@ X86WinTargetAsmInfo::X86WinTargetAsmInfo(const X86TargetMachine &TM):
   GlobalPrefix = "_";
   CommentString = ";";
 
+  InlineAsmStart = "; InlineAsm Start";
+  InlineAsmEnd   = "; InlineAsm End";
+
   PrivateGlobalPrefix = "$";
-  AlignDirective = "\talign\t";
+  AlignDirective = "\tALIGN\t";
   ZeroDirective = "\tdb\t";
   ZeroDirectiveSuffix = " dup(0)";
   AsciiDirective = "\tdb\t";
@@ -353,13 +319,15 @@ X86WinTargetAsmInfo::X86WinTargetAsmInfo(const X86TargetMachine &TM):
   HasDotTypeDotSizeDirective = false;
   HasSingleParameterDotFile = false;
 
+  AlignmentIsInBytes = true;
+
   TextSection = getUnnamedSection("_text", SectionFlags::Code);
   DataSection = getUnnamedSection("_data", SectionFlags::Writeable);
 
   JumpTableDataSection = NULL;
   SwitchToSectionDirective = "";
-  TextSectionStartSuffix = "\tsegment 'CODE'";
-  DataSectionStartSuffix = "\tsegment 'DATA'";
+  TextSectionStartSuffix = "\tSEGMENT PARA 'CODE'";
+  DataSectionStartSuffix = "\tSEGMENT PARA 'DATA'";
   SectionEndDirectiveSuffix = "\tends\n";
 }
 
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index dfb055ff78c5..53c46c3595ed 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -36,6 +36,11 @@ X("x86",    "32-bit X86: Pentium-Pro and above");
 static RegisterTarget<X86_64TargetMachine>
 Y("x86-64", "64-bit X86: EM64T and AMD64");
 
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeX86Target() { }
+}
+
 // No assembler printer by default
 X86TargetMachine::AsmPrinterCtorFn X86TargetMachine::AsmPrinterCtor = 0;
 
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index c9a6d8afd316..ed4c10184323 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -428,6 +428,7 @@ void XCoreAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
 
 bool XCoreAsmPrinter::doInitialization(Module &M) {
   bool Result = AsmPrinter::doInitialization(M);
+  DW = getAnalysisIfAvailable<DwarfWriter>();
   
   if (!FileDirective.empty()) {
     emitFileDirective(FileDirective);
@@ -449,11 +450,6 @@ bool XCoreAsmPrinter::doInitialization(Module &M) {
     }
   }
 
-  // Emit initial debug information.
-  DW = getAnalysisIfAvailable<DwarfWriter>();
-  assert(DW && "Dwarf Writer is not available");
-  DW->BeginModule(&M, getAnalysisIfAvailable<MachineModuleInfo>(),
-                  O, this, TAI);
   return Result;
 }
 
diff --git a/lib/Target/XCore/XCoreTargetAsmInfo.cpp b/lib/Target/XCore/XCoreTargetAsmInfo.cpp
index 55137621deae..59ad624052ad 100644
--- a/lib/Target/XCore/XCoreTargetAsmInfo.cpp
+++ b/lib/Target/XCore/XCoreTargetAsmInfo.cpp
@@ -24,6 +24,7 @@ using namespace llvm;
 XCoreTargetAsmInfo::XCoreTargetAsmInfo(const XCoreTargetMachine &TM)
   : ELFTargetAsmInfo(TM),
     Subtarget(TM.getSubtargetImpl()) {
+  SupportsDebugInformation = true;
   TextSection = getUnnamedSection("\t.text", SectionFlags::Code);
   DataSection = getNamedSection("\t.dp.data", SectionFlags::Writeable |
                                 SectionFlags::Small);
@@ -64,7 +65,7 @@ XCoreTargetAsmInfo::XCoreTargetAsmInfo(const XCoreTargetMachine &TM)
   DwarfLocSection = "\t.section\t.debug_loc,\"\",@progbits";
   DwarfARangesSection = "\t.section\t.debug_aranges,\"\",@progbits";
   DwarfRangesSection = "\t.section\t.debug_ranges,\"\",@progbits";
-  DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits";
+  DwarfMacroInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits";
 }
 
 const Section*
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 5437c574a80f..cfd3cd3fe3bf 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -31,6 +31,11 @@ namespace {
   RegisterTarget<XCoreTargetMachine> X("xcore", "XCore");
 }
 
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeXCoreTarget() { }
+}
+
 const TargetAsmInfo *XCoreTargetMachine::createTargetAsmInfo() const {
   return new XCoreTargetAsmInfo(*this);
 }
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index 4b85e1388a68..1438b4879d2b 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -16,6 +16,7 @@ add_llvm_library(LLVMipo
   LoopExtractor.cpp
   LowerSetJmp.cpp
   MergeFunctions.cpp
+  PartialInlining.cpp
   PartialSpecialization.cpp
   PruneEH.cpp
   RaiseAllocations.cpp
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 9a1b29419077..cbf3a1d827a9 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1667,11 +1667,14 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
     //
     // NOTE: It doesn't make sense to promote non single-value types since we
     // are just replacing static memory to stack memory.
+    //
+    // If the global is in different address space, don't bring it to stack.
     if (!GS.HasMultipleAccessingFunctions &&
         GS.AccessingFunction && !GS.HasNonInstructionUser &&
         GV->getType()->getElementType()->isSingleValueType() &&
         GS.AccessingFunction->getName() == "main" &&
-        GS.AccessingFunction->hasExternalLinkage()) {
+        GS.AccessingFunction->hasExternalLinkage() &&
+        GV->getType()->getAddressSpace() == 0) {
       DOUT << "LOCALIZING GLOBAL: " << *GV;
       Instruction* FirstI = GS.AccessingFunction->getEntryBlock().begin();
       const Type* ElemTy = GV->getType()->getElementType();
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index b3a2554039a1..0b975ae49979 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -20,10 +20,13 @@
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/FunctionUtils.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/CFG.h"
 using namespace llvm;
 
+STATISTIC(NumPartialInlined, "Number of functions partially inlined");
+
 namespace {
   struct VISIBILITY_HIDDEN PartialInliner : public ModulePass {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
@@ -132,6 +135,8 @@ Function* PartialInliner::unswitchFunction(Function* F) {
   duplicateFunction->replaceAllUsesWith(F);
   duplicateFunction->eraseFromParent();
   
+  ++NumPartialInlined;
+  
   return extractedFunction;
 }
 
diff --git a/lib/Transforms/IPO/RaiseAllocations.cpp b/lib/Transforms/IPO/RaiseAllocations.cpp
index a81bbdb3c53d..8c97b5d17c08 100644
--- a/lib/Transforms/IPO/RaiseAllocations.cpp
+++ b/lib/Transforms/IPO/RaiseAllocations.cpp
@@ -82,14 +82,14 @@ void RaiseAllocations::doInitialization(Module &M) {
 
     // Chck to see if we got the expected malloc
     if (TyWeHave != Malloc1Type) {
-      // Check to see if the prototype is wrong, giving us sbyte*(uint) * malloc
+      // Check to see if the prototype is wrong, giving us i8*(i32) * malloc
       // This handles the common declaration of: 'void *malloc(unsigned);'
       const FunctionType *Malloc2Type = 
         FunctionType::get(PointerType::getUnqual(Type::Int8Ty),
                           std::vector<const Type*>(1, Type::Int32Ty), false);
       if (TyWeHave != Malloc2Type) {
         // Check to see if the prototype is missing, giving us 
-        // sbyte*(...) * malloc
+        // i8*(...) * malloc
         // This handles the common declaration of: 'void *malloc();'
         const FunctionType *Malloc3Type = 
           FunctionType::get(PointerType::getUnqual(Type::Int8Ty),
diff --git a/lib/Transforms/Instrumentation/RSProfiling.cpp b/lib/Transforms/Instrumentation/RSProfiling.cpp
index c6cf4dfd6ebf..b110f4eb368b 100644
--- a/lib/Transforms/Instrumentation/RSProfiling.cpp
+++ b/lib/Transforms/Instrumentation/RSProfiling.cpp
@@ -108,9 +108,9 @@ namespace {
   class VISIBILITY_HIDDEN GlobalRandomCounter : public Chooser {
     GlobalVariable* Counter;
     Value* ResetValue;
-    const Type* T;
+    const IntegerType* T;
   public:
-    GlobalRandomCounter(Module& M, const Type* t, uint64_t resetval);
+    GlobalRandomCounter(Module& M, const IntegerType* t, uint64_t resetval);
     virtual ~GlobalRandomCounter();
     virtual void PrepFunction(Function* F);
     virtual void ProcessChoicePoint(BasicBlock* bb);
@@ -121,9 +121,9 @@ namespace {
     GlobalVariable* Counter;
     Value* ResetValue;
     AllocaInst* AI;
-    const Type* T;
+    const IntegerType* T;
   public:
-    GlobalRandomCounterOpt(Module& M, const Type* t, uint64_t resetval);
+    GlobalRandomCounterOpt(Module& M, const IntegerType* t, uint64_t resetval);
     virtual ~GlobalRandomCounterOpt();
     virtual void PrepFunction(Function* F);
     virtual void ProcessChoicePoint(BasicBlock* bb);
@@ -193,7 +193,7 @@ static void getBackEdges(Function& F, T& BackEdges);
 // Methods of choosing when to profile
 ///////////////////////////////////////
   
-GlobalRandomCounter::GlobalRandomCounter(Module& M, const Type* t, 
+GlobalRandomCounter::GlobalRandomCounter(Module& M, const IntegerType* t,
                                          uint64_t resetval) : T(t) {
   ConstantInt* Init = ConstantInt::get(T, resetval); 
   ResetValue = Init;
@@ -229,7 +229,7 @@ void GlobalRandomCounter::ProcessChoicePoint(BasicBlock* bb) {
   ReplacePhiPred(oldnext, bb, resetblock);
 }
 
-GlobalRandomCounterOpt::GlobalRandomCounterOpt(Module& M, const Type* t, 
+GlobalRandomCounterOpt::GlobalRandomCounterOpt(Module& M, const IntegerType* t,
                                                uint64_t resetval) 
   : AI(0), T(t) {
   ConstantInt* Init = ConstantInt::get(T, resetval);
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 42978e753d16..e9bee6408fe3 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -401,8 +401,8 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum,
 
 
 /// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
-/// copy (e.g. it's casting from one pointer type to another, int->uint, or
-/// int->sbyte on PPC), sink it into user blocks to reduce the number of virtual
+/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
+/// sink it into user blocks to reduce the number of virtual
 /// registers that must be created and coalesced.
 ///
 /// Return true if any changes are made.
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 673d38b7f3ae..f4a989844478 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -37,6 +37,7 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <cstdio>
 using namespace llvm;
 
@@ -48,7 +49,7 @@ STATISTIC(NumPRELoad,   "Number of loads PRE'd");
 
 static cl::opt<bool> EnablePRE("enable-pre",
                                cl::init(true), cl::Hidden);
-cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
+static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
 
 //===----------------------------------------------------------------------===//
 //                         ValueTable Class
@@ -952,8 +953,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
 
   // If we had a phi translation failure, we'll have a single entry which is a
   // clobber in the current block.  Reject this early.
-  if (Deps.size() == 1 && Deps[0].second.isClobber())
+  if (Deps.size() == 1 && Deps[0].second.isClobber()) {
+    DEBUG(
+      DOUT << "GVN: non-local load ";
+      WriteAsOperand(*DOUT.stream(), LI);
+      DOUT << " is clobbered by " << *Deps[0].second.getInst();
+    );
     return false;
+  }
   
   // Filter out useless results (non-locals, etc).  Keep track of the blocks
   // where we have a value available in repl, also keep track of whether we see
@@ -1069,6 +1076,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
   BasicBlock *TmpBB = LoadBB;
 
   bool isSinglePred = false;
+  bool allSingleSucc = true;
   while (TmpBB->getSinglePredecessor()) {
     isSinglePred = true;
     TmpBB = TmpBB->getSinglePredecessor();
@@ -1078,6 +1086,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
       return false;
     if (Blockers.count(TmpBB))
       return false;
+    if (TmpBB->getTerminator()->getNumSuccessors() != 1)
+      allSingleSucc = false;
   }
   
   assert(TmpBB);
@@ -1154,7 +1164,20 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
                 << UnavailablePred->getName() << "': " << *LI);
     return false;
   }
-  
+
+  // Make sure it is valid to move this load here.  We have to watch out for:
+  //  @1 = getelementptr (i8* p, ...
+  //  test p and branch if == 0
+  //  load @1
+  // It is valid to have the getelementptr before the test, even if p can be 0,
+  // as getelementptr only does address arithmetic.
+  // If we are not pushing the value through any multiple-successor blocks
+  // we do not have this case.  Otherwise, check that the load is safe to
+  // put anywhere; this can be improved, but should be conservatively safe.
+  if (!allSingleSucc &&
+      !isSafeToLoadUnconditionally(LoadPtr, UnavailablePred->getTerminator()))
+    return false;
+
   // Okay, we can eliminate this load by inserting a reload in the predecessor
   // and using PHI construction to get the value in the other predecessors, do
   // it.
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 38b11985519f..326fb38909b5 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -17,7 +17,10 @@
 //      which starts at zero and steps by one.
 //   2. The canonical induction variable is guaranteed to be the first PHI node
 //      in the loop header block.
-//   3. Any pointer arithmetic recurrences are raised to use array subscripts.
+//   3. The canonical induction variable is guaranteed to be in a wide enough
+//      type so that IV expressions need not be (directly) zero-extended or
+//      sign-extended.
+//   4. Any pointer arithmetic recurrences are raised to use array subscripts.
 //
 // If the trip count of a loop is computable, this pass also makes the following
 // changes:
@@ -296,11 +299,11 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,
         // If this instruction is dead now, delete it.
         RecursivelyDeleteTriviallyDeadInstructions(Inst);
 
-        // See if this is a single-entry LCSSA PHI node.  If so, we can (and
-        // have to) remove
-        // the PHI entirely.  This is safe, because the NewVal won't be variant
+        // If we're inserting code into the exit block rather than the
+        // preheader, we can (and have to) remove the PHI entirely.
+        // This is safe, because the NewVal won't be variant
         // in the loop, so we don't need an LCSSA phi node anymore.
-        if (NumPreds == 1) {
+        if (ExitBlocks.size() == 1) {
           PN->replaceAllUsesWith(ExitVal);
           RecursivelyDeleteTriviallyDeadInstructions(PN);
           break;
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index 5465e4a88466..5bd17e0737b2 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -390,7 +390,7 @@ namespace {
 
     Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned);
 
-    bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
+    bool CanEvaluateInDifferentType(Value *V, const Type *Ty,
                                     unsigned CastOpc, int &NumCastsRemoved);
     unsigned GetOrEnforceKnownAlignment(Value *V,
                                         unsigned PrefAlign = 0);
@@ -654,30 +654,12 @@ static unsigned getOpcode(const Value *V) {
 }
 
 /// AddOne - Add one to a ConstantInt
-static ConstantInt *AddOne(ConstantInt *C) {
-  APInt Val(C->getValue());
-  return ConstantInt::get(++Val);
+static Constant *AddOne(Constant *C) {
+  return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
 }
 /// SubOne - Subtract one from a ConstantInt
-static ConstantInt *SubOne(ConstantInt *C) {
-  APInt Val(C->getValue());
-  return ConstantInt::get(--Val);
-}
-/// Add - Add two ConstantInts together
-static ConstantInt *Add(ConstantInt *C1, ConstantInt *C2) {
-  return ConstantInt::get(C1->getValue() + C2->getValue());
-}
-/// And - Bitwise AND two ConstantInts together
-static ConstantInt *And(ConstantInt *C1, ConstantInt *C2) {
-  return ConstantInt::get(C1->getValue() & C2->getValue());
-}
-/// Subtract - Subtract one ConstantInt from another
-static ConstantInt *Subtract(ConstantInt *C1, ConstantInt *C2) {
-  return ConstantInt::get(C1->getValue() - C2->getValue());
-}
-/// Multiply - Multiply two ConstantInts together
-static ConstantInt *Multiply(ConstantInt *C1, ConstantInt *C2) {
-  return ConstantInt::get(C1->getValue() * C2->getValue());
+static Constant *SubOne(ConstantInt *C) {
+  return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1));
 }
 /// MultiplyOverflows - True if the multiply can not be expressed in an int
 /// this size.
@@ -774,7 +756,7 @@ static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
 /// SimplifyDemandedBits knows about.  See if the instruction has any
 /// properties that allow us to simplify its operands.
 bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
-  unsigned BitWidth = cast<IntegerType>(Inst.getType())->getBitWidth();
+  unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
   APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
   APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
   
@@ -830,13 +812,13 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
   const Type *VTy = V->getType();
   assert((TD || !isa<PointerType>(VTy)) &&
          "SimplifyDemandedBits needs to know bit widths!");
-  assert((!TD || TD->getTypeSizeInBits(VTy) == BitWidth) &&
-         (!isa<IntegerType>(VTy) ||
-          VTy->getPrimitiveSizeInBits() == BitWidth) &&
+  assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) &&
+         (!VTy->isIntOrIntVector() ||
+          VTy->getScalarSizeInBits() == BitWidth) &&
          KnownZero.getBitWidth() == BitWidth &&
          KnownOne.getBitWidth() == BitWidth &&
-         "Value *V, DemandedMask, KnownZero and KnownOne \
-          must have same BitWidth");
+         "Value *V, DemandedMask, KnownZero and KnownOne "
+         "must have same BitWidth");
   if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
     // We know all of the bits for a constant!
     KnownOne = CI->getValue() & DemandedMask;
@@ -1089,7 +1071,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     RHSKnownZero &= LHSKnownZero;
     break;
   case Instruction::Trunc: {
-    unsigned truncBf = I->getOperand(0)->getType()->getPrimitiveSizeInBits();
+    unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits();
     DemandedMask.zext(truncBf);
     RHSKnownZero.zext(truncBf);
     RHSKnownOne.zext(truncBf);
@@ -1112,7 +1094,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     break;
   case Instruction::ZExt: {
     // Compute the bits in the result that are not present in the input.
-    unsigned SrcBitWidth =I->getOperand(0)->getType()->getPrimitiveSizeInBits();
+    unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
     
     DemandedMask.trunc(SrcBitWidth);
     RHSKnownZero.trunc(SrcBitWidth);
@@ -1130,7 +1112,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
   }
   case Instruction::SExt: {
     // Compute the bits in the result that are not present in the input.
-    unsigned SrcBitWidth =I->getOperand(0)->getType()->getPrimitiveSizeInBits();
+    unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
     
     APInt InputDemandedBits = DemandedMask & 
                               APInt::getLowBitsSet(BitWidth, SrcBitWidth);
@@ -1354,7 +1336,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
       APInt RA = Rem->getValue().abs();
       if (RA.isPowerOf2()) {
-        if (DemandedMask.ule(RA))    // srem won't affect demanded bits
+        if (DemandedMask.ult(RA))    // srem won't affect demanded bits
           return I->getOperand(0);
 
         APInt LowBits = RA - 1;
@@ -2087,7 +2069,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
       
       // See if SimplifyDemandedBits can simplify this.  This handles stuff like
       // (X & 254)+1 -> (X&254)|1
-      if (!isa<VectorType>(I.getType()) && SimplifyDemandedInstructionBits(I))
+      if (SimplifyDemandedInstructionBits(I))
         return &I;
 
       // zext(i1) - 1  ->  select i1, 0, -1
@@ -2107,7 +2089,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
     Value *XorLHS = 0;
     if (isa<ConstantInt>(RHSC) &&
         match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) {
-      uint32_t TySizeBits = I.getType()->getPrimitiveSizeInBits();
+      uint32_t TySizeBits = I.getType()->getScalarSizeInBits();
       const APInt& RHSVal = cast<ConstantInt>(RHSC)->getValue();
       
       uint32_t Size = TySizeBits / 2;
@@ -2197,7 +2179,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
     // X*C1 + X*C2 --> X * (C1+C2)
     ConstantInt *C1;
     if (X == dyn_castFoldableMul(RHS, C1))
-      return BinaryOperator::CreateMul(X, Add(C1, C2));
+      return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2));
   }
 
   // X + X*C --> X * (C+1)
@@ -2262,7 +2244,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
 
     // (X & FF00) + xx00  -> (X+xx00) & FF00
     if (LHS->hasOneUse() && match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) {
-      Constant *Anded = And(CRHS, C2);
+      Constant *Anded = ConstantExpr::getAnd(CRHS, C2);
       if (Anded == CRHS) {
         // See if all bits from the first bit set in the Add RHS up are included
         // in the mask.  First, get the rightmost bit.
@@ -2290,7 +2272,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   }
 
   // add (cast *A to intptrtype) B -> 
-  //   cast (GEP (cast *A to sbyte*) B)  -->  intptrtype
+  //   cast (GEP (cast *A to i8*) B)  -->  intptrtype
   {
     CastInst *CI = dyn_cast<CastInst>(LHS);
     Value *Other = RHS;
@@ -2299,7 +2281,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
       Other = LHS;
     }
     if (CI && CI->getType()->isSized() && 
-        (CI->getType()->getPrimitiveSizeInBits() == 
+        (CI->getType()->getScalarSizeInBits() ==
          TD->getIntPtrType()->getPrimitiveSizeInBits()) 
         && isa<PointerType>(CI->getOperand(0)->getType())) {
       unsigned AS =
@@ -2523,7 +2505,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
       else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) {
         if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1)))
           // C1-(X+C2) --> (C1-C2)-X
-          return BinaryOperator::CreateSub(Subtract(CI1, CI2), 
+          return BinaryOperator::CreateSub(ConstantExpr::getSub(CI1, CI2),
                                            Op1I->getOperand(0));
       }
     }
@@ -2564,7 +2546,8 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
       // X - X*C --> X * (1-C)
       ConstantInt *C2 = 0;
       if (dyn_castFoldableMul(Op1I, C2) == Op0) {
-        Constant *CP1 = Subtract(ConstantInt::get(I.getType(), 1), C2);
+        Constant *CP1 = ConstantExpr::getSub(ConstantInt::get(I.getType(), 1),
+                                             C2);
         return BinaryOperator::CreateMul(Op0, CP1);
       }
     }
@@ -2589,7 +2572,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
 
     ConstantInt *C2;   // X*C1 - X*C2 -> X * (C1-C2)
     if (X == dyn_castFoldableMul(Op1, C2))
-      return BinaryOperator::CreateMul(X, Subtract(C1, C2));
+      return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2));
   }
   return 0;
 }
@@ -2950,12 +2933,12 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
   // (sdiv X, X) --> 1     (udiv X, X) --> 1
   if (Op0 == Op1) {
     if (const VectorType *Ty = dyn_cast<VectorType>(I.getType())) {
-      ConstantInt *CI = ConstantInt::get(Ty->getElementType(), 1);
+      Constant *CI = ConstantInt::get(Ty->getElementType(), 1);
       std::vector<Constant*> Elts(Ty->getNumElements(), CI);
       return ReplaceInstUsesWith(I, ConstantVector::get(Elts));
     }
 
-    ConstantInt *CI = ConstantInt::get(I.getType(), 1);
+    Constant *CI = ConstantInt::get(I.getType(), 1);
     return ReplaceInstUsesWith(I, CI);
   }
   
@@ -2980,7 +2963,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
             return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
           else 
             return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0),
-                                          Multiply(RHS, LHSRHS));
+                                          ConstantExpr::getMul(RHS, LHSRHS));
         }
 
     if (!RHS->isZero()) { // avoid X udiv 0
@@ -3513,7 +3496,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
   Value *X = Op->getOperand(0);
   Constant *Together = 0;
   if (!Op->isShift())
-    Together = And(AndRHS, OpRHS);
+    Together = ConstantExpr::getAnd(AndRHS, OpRHS);
 
   switch (Op->getOpcode()) {
   case Instruction::Xor:
@@ -3724,7 +3707,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
   switch (LHSI->getOpcode()) {
   default: return 0;
   case Instruction::And:
-    if (And(N, Mask) == Mask) {
+    if (ConstantExpr::getAnd(N, Mask) == Mask) {
       // If the AndRHS is a power of two minus one (0+1+), this is simple.
       if ((Mask->getValue().countLeadingZeros() + 
            Mask->getValue().countPopulation()) == 
@@ -3748,7 +3731,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
     // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0
     if ((Mask->getValue().countLeadingZeros() + 
          Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth()
-        && And(N, Mask)->isZero())
+        && ConstantExpr::getAnd(N, Mask)->isNullValue())
       break;
     return 0;
   }
@@ -3946,10 +3929,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
 
   // See if we can simplify any instructions used by the instruction whose sole 
   // purpose is to compute bits we don't care about.
-  if (!isa<VectorType>(I.getType())) {
-    if (SimplifyDemandedInstructionBits(I))
-      return &I;
-  } else {
+  if (SimplifyDemandedInstructionBits(I))
+    return &I;
+  if (isa<VectorType>(I.getType())) {
     if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) {
       if (CP->isAllOnesValue())            // X & <-1,-1> -> X
         return ReplaceInstUsesWith(I, I.getOperand(0));
@@ -3957,7 +3939,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
       return ReplaceInstUsesWith(I, Op1);  // X & <0,0> -> <0,0>
     }
   }
-  
+
   if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
     const APInt& AndRHSMask = AndRHS->getValue();
     APInt NotAndRHS(~AndRHSMask);
@@ -4510,7 +4492,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
         Instruction *Add = BinaryOperator::CreateAdd(Val, AddCST,
                                                      Val->getName()+".off");
         InsertNewInstBefore(Add, I);
-        AddCST = Subtract(AddOne(RHSCst), LHSCst);
+        AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
         return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST);
       }
       break;                         // (X == 13 | X == 15) -> no change
@@ -4653,18 +4635,17 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
 
   // See if we can simplify any instructions used by the instruction whose sole 
   // purpose is to compute bits we don't care about.
-  if (!isa<VectorType>(I.getType())) {
-    if (SimplifyDemandedInstructionBits(I))
-      return &I;
-  } else if (isa<ConstantAggregateZero>(Op1)) {
-    return ReplaceInstUsesWith(I, Op0);  // X | <0,0> -> X
-  } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) {
-    if (CP->isAllOnesValue())            // X | <-1,-1> -> <-1,-1>
-      return ReplaceInstUsesWith(I, I.getOperand(1));
+  if (SimplifyDemandedInstructionBits(I))
+    return &I;
+  if (isa<VectorType>(I.getType())) {
+    if (isa<ConstantAggregateZero>(Op1)) {
+      return ReplaceInstUsesWith(I, Op0);  // X | <0,0> -> X
+    } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) {
+      if (CP->isAllOnesValue())            // X | <-1,-1> -> <-1,-1>
+        return ReplaceInstUsesWith(I, I.getOperand(1));
+    }
   }
-    
 
-  
   // or X, -1 == -1
   if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
     ConstantInt *C1 = 0; Value *X = 0;
@@ -4991,12 +4972,11 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   
   // See if we can simplify any instructions used by the instruction whose sole 
   // purpose is to compute bits we don't care about.
-  if (!isa<VectorType>(I.getType())) {
-    if (SimplifyDemandedInstructionBits(I))
-      return &I;
-  } else if (isa<ConstantAggregateZero>(Op1)) {
-    return ReplaceInstUsesWith(I, Op0);  // X ^ <0,0> -> X
-  }
+  if (SimplifyDemandedInstructionBits(I))
+    return &I;
+  if (isa<VectorType>(I.getType()))
+    if (isa<ConstantAggregateZero>(Op1))
+      return ReplaceInstUsesWith(I, Op0);  // X ^ <0,0> -> X
 
   // Is this a ~ operation?
   if (Value *NotOp = dyn_castNotVal(&I)) {
@@ -5083,7 +5063,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
             Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS);
             // Anything in both C1 and C2 is known to be zero, remove it from
             // NewRHS.
-            Constant *CommonBits = And(Op0CI, RHS);
+            Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS);
             NewRHS = ConstantExpr::getAnd(NewRHS, 
                                           ConstantExpr::getNot(CommonBits));
             AddToWorkList(Op0I);
@@ -5247,12 +5227,13 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   return Changed ? &I : 0;
 }
 
-/// AddWithOverflow - Compute Result = In1+In2, returning true if the result
-/// overflowed for this type.
-static bool AddWithOverflow(ConstantInt *&Result, ConstantInt *In1,
-                            ConstantInt *In2, bool IsSigned = false) {
-  Result = cast<ConstantInt>(Add(In1, In2));
+static ConstantInt *ExtractElement(Constant *V, Constant *Idx) {
+  return cast<ConstantInt>(ConstantExpr::getExtractElement(V, Idx));
+}
 
+static bool HasAddOverflow(ConstantInt *Result,
+                           ConstantInt *In1, ConstantInt *In2,
+                           bool IsSigned) {
   if (IsSigned)
     if (In2->getValue().isNegative())
       return Result->getValue().sgt(In1->getValue());
@@ -5262,12 +5243,32 @@ static bool AddWithOverflow(ConstantInt *&Result, ConstantInt *In1,
     return Result->getValue().ult(In1->getValue());
 }
 
-/// SubWithOverflow - Compute Result = In1-In2, returning true if the result
+/// AddWithOverflow - Compute Result = In1+In2, returning true if the result
 /// overflowed for this type.
-static bool SubWithOverflow(ConstantInt *&Result, ConstantInt *In1,
-                            ConstantInt *In2, bool IsSigned = false) {
-  Result = cast<ConstantInt>(Subtract(In1, In2));
+static bool AddWithOverflow(Constant *&Result, Constant *In1,
+                            Constant *In2, bool IsSigned = false) {
+  Result = ConstantExpr::getAdd(In1, In2);
+
+  if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
+    for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+      Constant *Idx = ConstantInt::get(Type::Int32Ty, i);
+      if (HasAddOverflow(ExtractElement(Result, Idx),
+                         ExtractElement(In1, Idx),
+                         ExtractElement(In2, Idx),
+                         IsSigned))
+        return true;
+    }
+    return false;
+  }
+
+  return HasAddOverflow(cast<ConstantInt>(Result),
+                        cast<ConstantInt>(In1), cast<ConstantInt>(In2),
+                        IsSigned);
+}
 
+static bool HasSubOverflow(ConstantInt *Result,
+                           ConstantInt *In1, ConstantInt *In2,
+                           bool IsSigned) {
   if (IsSigned)
     if (In2->getValue().isNegative())
       return Result->getValue().slt(In1->getValue());
@@ -5277,6 +5278,29 @@ static bool SubWithOverflow(ConstantInt *&Result, ConstantInt *In1,
     return Result->getValue().ugt(In1->getValue());
 }
 
+/// SubWithOverflow - Compute Result = In1-In2, returning true if the result
+/// overflowed for this type.
+static bool SubWithOverflow(Constant *&Result, Constant *In1,
+                            Constant *In2, bool IsSigned = false) {
+  Result = ConstantExpr::getSub(In1, In2);
+
+  if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
+    for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+      Constant *Idx = ConstantInt::get(Type::Int32Ty, i);
+      if (HasSubOverflow(ExtractElement(Result, Idx),
+                         ExtractElement(In1, Idx),
+                         ExtractElement(In2, Idx),
+                         IsSigned))
+        return true;
+    }
+    return false;
+  }
+
+  return HasSubOverflow(cast<ConstantInt>(Result),
+                        cast<ConstantInt>(In1), cast<ConstantInt>(In2),
+                        IsSigned);
+}
+
 /// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the
 /// code necessary to compute the offset from the base pointer (without adding
 /// in the base pointer).  Return the result as a signed integer of intptr size.
@@ -5589,7 +5613,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
   // Check to see that the input is converted from an integer type that is small
   // enough that preserves all bits.  TODO: check here for "known" sign bits.
   // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.
-  unsigned InputSize = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits();
+  unsigned InputSize = LHSI->getOperand(0)->getType()->getScalarSizeInBits();
   
   // If this is a uitofp instruction, we need an extra bit to hold the sign.
   bool LHSUnsigned = isa<UIToFPInst>(LHSI);
@@ -5644,7 +5668,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
   
   // See if the FP constant is too large for the integer.  For example,
   // comparing an i8 to 300.0.
-  unsigned IntWidth = IntTy->getPrimitiveSizeInBits();
+  unsigned IntWidth = IntTy->getScalarSizeInBits();
   
   if (!LHSUnsigned) {
     // If the RHS value is > SignedMax, fold the comparison.  This handles +INF
@@ -5943,9 +5967,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
 
   unsigned BitWidth = 0;
   if (TD)
-    BitWidth = TD->getTypeSizeInBits(Ty);
-  else if (isa<IntegerType>(Ty))
-    BitWidth = Ty->getPrimitiveSizeInBits();
+    BitWidth = TD->getTypeSizeInBits(Ty->getScalarType());
+  else if (Ty->isIntOrIntVector())
+    BitWidth = Ty->getScalarSizeInBits();
 
   bool isSignBit = false;
 
@@ -6459,7 +6483,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
   // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and 
   // C2 (CI). By solving for X we can turn this into a range check 
   // instead of computing a divide. 
-  ConstantInt *Prod = Multiply(CmpRHS, DivRHS);
+  Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS);
 
   // Determine if the product overflows by seeing if the product is
   // not equal to the divide. Make sure we do the same kind of divide
@@ -6478,7 +6502,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
   // overflow variable is set to 0 if it's corresponding bound variable is valid
   // -1 if overflowed off the bottom end, or +1 if overflowed off the top end.
   int LoOverflow = 0, HiOverflow = 0;
-  ConstantInt *LoBound = 0, *HiBound = 0;
+  Constant *LoBound = 0, *HiBound = 0;
   
   if (!DivIsSigned) {  // udiv
     // e.g. X/5 op 3  --> [15, 20)
@@ -6966,7 +6990,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         if (ConstantInt *BOp1C = dyn_cast<ConstantInt>(BO->getOperand(1))) {
           if (BO->hasOneUse())
             return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
-                                Subtract(RHS, BOp1C));
+                                ConstantExpr::getSub(RHS, BOp1C));
         } else if (RHSV == 0) {
           // Replace ((add A, B) != 0) with (A != -B) if A or B is
           // efficiently invertible, or if the add has just this one use.
@@ -7133,10 +7157,10 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
   if (Res2 == CI) {
     // Make sure that sign of the Cmp and the sign of the Cast are the same.
     // For example, we might have:
-    //    %A = sext short %X to uint
-    //    %B = icmp ugt uint %A, 1330
+    //    %A = sext i16 %X to i32
+    //    %B = icmp ugt i32 %A, 1330
     // It is incorrect to transform this into 
-    //    %B = icmp ugt short %X, 1330 
+    //    %B = icmp ugt i16 %X, 1330
     // because %A may have negative value. 
     //
     // However, we allow this when the compare is EQ/NE, because they are
@@ -7210,18 +7234,16 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
   if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
     if (CSI->isAllOnesValue())
       return ReplaceInstUsesWith(I, CSI);
-  
+
   // See if we can turn a signed shr into an unsigned shr.
-  if (!isa<VectorType>(I.getType())) {
-    if (MaskedValueIsZero(Op0,
-                      APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())))
-      return BinaryOperator::CreateLShr(Op0, I.getOperand(1));
-
-    // Arithmetic shifting an all-sign-bit value is a no-op.
-    unsigned NumSignBits = ComputeNumSignBits(Op0);
-    if (NumSignBits == Op0->getType()->getPrimitiveSizeInBits())
-      return ReplaceInstUsesWith(I, Op0);
-  }
+  if (MaskedValueIsZero(Op0,
+                        APInt::getSignBit(I.getType()->getScalarSizeInBits())))
+    return BinaryOperator::CreateLShr(Op0, I.getOperand(1));
+
+  // Arithmetic shifting an all-sign-bit value is a no-op.
+  unsigned NumSignBits = ComputeNumSignBits(Op0);
+  if (NumSignBits == Op0->getType()->getScalarSizeInBits())
+    return ReplaceInstUsesWith(I, Op0);
 
   return 0;
 }
@@ -7250,7 +7272,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
   }
 
   // See if we can fold away this shift.
-  if (!isa<VectorType>(I.getType()) && SimplifyDemandedInstructionBits(I))
+  if (SimplifyDemandedInstructionBits(I))
     return &I;
 
   // Try to fold constant and into select arguments.
@@ -7271,10 +7293,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
 
   // See if we can simplify any instructions used by the instruction whose sole 
   // purpose is to compute bits we don't care about.
-  uint32_t TypeBits = Op0->getType()->getPrimitiveSizeInBits();
+  uint32_t TypeBits = Op0->getType()->getScalarSizeInBits();
   
-  // shl uint X, 32 = 0 and shr ubyte Y, 9 = 0, ... just don't eliminate shr
-  // of a signed value.
+  // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate
+  // a signed shift.
   //
   if (Op1->uge(TypeBits)) {
     if (I.getOpcode() != Instruction::AShr)
@@ -7320,8 +7342,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
       // part of the register be zeros.  Emulate this by inserting an AND to
       // clear the top bits as needed.  This 'and' will usually be zapped by
       // other xforms later if dead.
-      unsigned SrcSize = TrOp->getType()->getPrimitiveSizeInBits();
-      unsigned DstSize = TI->getType()->getPrimitiveSizeInBits();
+      unsigned SrcSize = TrOp->getType()->getScalarSizeInBits();
+      unsigned DstSize = TI->getType()->getScalarSizeInBits();
       APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize));
       
       // The mask we constructed says what the trunc would do if occurring
@@ -7729,7 +7751,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
     // If the allocation size is constant, form a constant mul expression
     Amt = ConstantInt::get(Type::Int32Ty, Scale);
     if (isa<ConstantInt>(NumElements))
-      Amt = Multiply(cast<ConstantInt>(NumElements), cast<ConstantInt>(Amt));
+      Amt = ConstantExpr::getMul(cast<ConstantInt>(NumElements),
+                                 cast<ConstantInt>(Amt));
     // otherwise multiply the amount and the number of elements
     else {
       Instruction *Tmp = BinaryOperator::CreateMul(Amt, NumElements, "tmp");
@@ -7788,17 +7811,17 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
 /// If CastOpc is a sext or zext, we are asking if the low bits of the value can
 /// bit computed in a larger type, which is then and'd or sext_in_reg'd to get
 /// the final result.
-bool InstCombiner::CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
+bool InstCombiner::CanEvaluateInDifferentType(Value *V, const Type *Ty,
                                               unsigned CastOpc,
                                               int &NumCastsRemoved){
   // We can always evaluate constants in another type.
-  if (isa<ConstantInt>(V))
+  if (isa<Constant>(V))
     return true;
   
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I) return false;
   
-  const IntegerType *OrigTy = cast<IntegerType>(V->getType());
+  const Type *OrigTy = V->getType();
   
   // If this is an extension or truncate, we can often eliminate it.
   if (isa<TruncInst>(I) || isa<ZExtInst>(I) || isa<SExtInst>(I)) {
@@ -7836,8 +7859,8 @@ bool InstCombiner::CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
     // If we are truncating the result of this SHL, and if it's a shift of a
     // constant amount, we can always perform a SHL in a smaller type.
     if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      uint32_t BitWidth = Ty->getBitWidth();
-      if (BitWidth < OrigTy->getBitWidth() && 
+      uint32_t BitWidth = Ty->getScalarSizeInBits();
+      if (BitWidth < OrigTy->getScalarSizeInBits() &&
           CI->getLimitedValue(BitWidth) < BitWidth)
         return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc,
                                           NumCastsRemoved);
@@ -7848,8 +7871,8 @@ bool InstCombiner::CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
     // lshr iff we know that the bits we would otherwise be shifting in are
     // already zeros.
     if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      uint32_t OrigBitWidth = OrigTy->getBitWidth();
-      uint32_t BitWidth = Ty->getBitWidth();
+      uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+      uint32_t BitWidth = Ty->getScalarSizeInBits();
       if (BitWidth < OrigBitWidth &&
           MaskedValueIsZero(I->getOperand(0),
             APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) &&
@@ -8131,8 +8154,8 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
   Value *Src = CI.getOperand(0);
   const Type *SrcTy = Src->getType();
   const Type *DestTy = CI.getType();
-  uint32_t SrcBitSize = SrcTy->getPrimitiveSizeInBits();
-  uint32_t DestBitSize = DestTy->getPrimitiveSizeInBits();
+  uint32_t SrcBitSize = SrcTy->getScalarSizeInBits();
+  uint32_t DestBitSize = DestTy->getScalarSizeInBits();
 
   // See if we can simplify any instructions used by the LHS whose sole 
   // purpose is to compute bits we don't care about.
@@ -8151,8 +8174,9 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
       // Only do this if the dest type is a simple type, don't convert the
       // expression tree to something weird like i93 unless the source is also
       // strange.
-      (isSafeIntegerType(DestTy) || !isSafeIntegerType(SrcI->getType())) &&
-      CanEvaluateInDifferentType(SrcI, cast<IntegerType>(DestTy),
+      (isSafeIntegerType(DestTy->getScalarType()) ||
+       !isSafeIntegerType(SrcI->getType()->getScalarType())) &&
+      CanEvaluateInDifferentType(SrcI, DestTy,
                                  CI.getOpcode(), NumCastsRemoved)) {
     // If this cast is a truncate, evaluting in a different type always
     // eliminates the cast, so it is always a win.  If this is a zero-extension,
@@ -8350,17 +8374,18 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
   
   Value *Src = CI.getOperand(0);
   const Type *Ty = CI.getType();
-  uint32_t DestBitWidth = Ty->getPrimitiveSizeInBits();
-  uint32_t SrcBitWidth = cast<IntegerType>(Src->getType())->getBitWidth();
+  uint32_t DestBitWidth = Ty->getScalarSizeInBits();
+  uint32_t SrcBitWidth = Src->getType()->getScalarSizeInBits();
 
   // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0)
-  if (DestBitWidth == 1) {
+  if (DestBitWidth == 1 &&
+      isa<VectorType>(Ty) == isa<VectorType>(Src->getType())) {
     Constant *One = ConstantInt::get(Src->getType(), 1);
     Src = InsertNewInstBefore(BinaryOperator::CreateAnd(Src, One, "tmp"), CI);
     Value *Zero = Constant::getNullValue(Src->getType());
     return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
   }
-  
+
   // Optimize trunc(lshr(), c) to pull the shift through the truncate.
   ConstantInt *ShAmtV = 0;
   Value *ShiftOp = 0;
@@ -8403,7 +8428,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
 
       Value *In = ICI->getOperand(0);
       Value *Sh = ConstantInt::get(In->getType(),
-                                   In->getType()->getPrimitiveSizeInBits()-1);
+                                   In->getType()->getScalarSizeInBits()-1);
       In = InsertNewInstBefore(BinaryOperator::CreateLShr(In, Sh,
                                                         In->getName()+".lobit"),
                                CI);
@@ -8494,28 +8519,30 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
     // Get the sizes of the types involved.  We know that the intermediate type
     // will be smaller than A or C, but don't know the relation between A and C.
     Value *A = CSrc->getOperand(0);
-    unsigned SrcSize = A->getType()->getPrimitiveSizeInBits();
-    unsigned MidSize = CSrc->getType()->getPrimitiveSizeInBits();
-    unsigned DstSize = CI.getType()->getPrimitiveSizeInBits();
+    unsigned SrcSize = A->getType()->getScalarSizeInBits();
+    unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
+    unsigned DstSize = CI.getType()->getScalarSizeInBits();
     // If we're actually extending zero bits, then if
     // SrcSize <  DstSize: zext(a & mask)
     // SrcSize == DstSize: a & mask
     // SrcSize  > DstSize: trunc(a) & mask
     if (SrcSize < DstSize) {
       APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
-      Constant *AndConst = ConstantInt::get(AndValue);
+      Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
       Instruction *And =
         BinaryOperator::CreateAnd(A, AndConst, CSrc->getName()+".mask");
       InsertNewInstBefore(And, CI);
       return new ZExtInst(And, CI.getType());
     } else if (SrcSize == DstSize) {
       APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
-      return BinaryOperator::CreateAnd(A, ConstantInt::get(AndValue));
+      return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
+                                                           AndValue));
     } else if (SrcSize > DstSize) {
       Instruction *Trunc = new TruncInst(A, CI.getType(), "tmp");
       InsertNewInstBefore(Trunc, CI);
       APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
-      return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(AndValue));
+      return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(Trunc->getType(),
+                                                               AndValue));
     }
   }
 
@@ -8537,6 +8564,33 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
     }
   }
 
+  // zext(trunc(t) & C) -> (t & zext(C)).
+  if (SrcI && SrcI->getOpcode() == Instruction::And && SrcI->hasOneUse())
+    if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1)))
+      if (TruncInst *TI = dyn_cast<TruncInst>(SrcI->getOperand(0))) {
+        Value *TI0 = TI->getOperand(0);
+        if (TI0->getType() == CI.getType())
+          return
+            BinaryOperator::CreateAnd(TI0,
+                                      ConstantExpr::getZExt(C, CI.getType()));
+      }
+
+  // zext((trunc(t) & C) ^ C) -> ((t & zext(C)) ^ zext(C)).
+  if (SrcI && SrcI->getOpcode() == Instruction::Xor && SrcI->hasOneUse())
+    if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1)))
+      if (BinaryOperator *And = dyn_cast<BinaryOperator>(SrcI->getOperand(0)))
+        if (And->getOpcode() == Instruction::And && And->hasOneUse() &&
+            And->getOperand(1) == C)
+          if (TruncInst *TI = dyn_cast<TruncInst>(And->getOperand(0))) {
+            Value *TI0 = TI->getOperand(0);
+            if (TI0->getType() == CI.getType()) {
+              Constant *ZC = ConstantExpr::getZExt(C, CI.getType());
+              Instruction *NewAnd = BinaryOperator::CreateAnd(TI0, ZC, "tmp");
+              InsertNewInstBefore(NewAnd, *And);
+              return BinaryOperator::CreateXor(NewAnd, ZC);
+            }
+          }
+
   return 0;
 }
 
@@ -8556,9 +8610,9 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
   // eliminate the trunc/sext pair.
   if (getOpcode(Src) == Instruction::Trunc) {
     Value *Op = cast<User>(Src)->getOperand(0);
-    unsigned OpBits   = cast<IntegerType>(Op->getType())->getBitWidth();
-    unsigned MidBits  = cast<IntegerType>(Src->getType())->getBitWidth();
-    unsigned DestBits = cast<IntegerType>(CI.getType())->getBitWidth();
+    unsigned OpBits   = Op->getType()->getScalarSizeInBits();
+    unsigned MidBits  = Src->getType()->getScalarSizeInBits();
+    unsigned DestBits = CI.getType()->getScalarSizeInBits();
     unsigned NumSignBits = ComputeNumSignBits(Op);
 
     if (OpBits == DestBits) {
@@ -8599,8 +8653,8 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
       BA == CA && isa<TruncInst>(A)) {
     Value *I = cast<TruncInst>(A)->getOperand(0);
     if (I->getType() == CI.getType()) {
-      unsigned MidSize = Src->getType()->getPrimitiveSizeInBits();
-      unsigned SrcDstSize = CI.getType()->getPrimitiveSizeInBits();
+      unsigned MidSize = Src->getType()->getScalarSizeInBits();
+      unsigned SrcDstSize = CI.getType()->getScalarSizeInBits();
       unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize;
       Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt);
       I = InsertNewInstBefore(BinaryOperator::CreateShl(I, ShAmtV,
@@ -8671,11 +8725,11 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
       Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1));
       if (LHSTrunc->getType() != SrcTy && 
           RHSTrunc->getType() != SrcTy) {
-        unsigned DstSize = CI.getType()->getPrimitiveSizeInBits();
+        unsigned DstSize = CI.getType()->getScalarSizeInBits();
         // If the source types were both smaller than the destination type of
         // the cast, do this xform.
-        if (LHSTrunc->getType()->getPrimitiveSizeInBits() <= DstSize &&
-            RHSTrunc->getType()->getPrimitiveSizeInBits() <= DstSize) {
+        if (LHSTrunc->getType()->getScalarSizeInBits() <= DstSize &&
+            RHSTrunc->getType()->getScalarSizeInBits() <= DstSize) {
           LHSTrunc = InsertCastBefore(Instruction::FPExt, LHSTrunc,
                                       CI.getType(), CI);
           RHSTrunc = InsertCastBefore(Instruction::FPExt, RHSTrunc,
@@ -8706,7 +8760,7 @@ Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {
   // 'X' value would cause an undefined result for the fptoui. 
   if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
       OpI->getOperand(0)->getType() == FI.getType() &&
-      (int)FI.getType()->getPrimitiveSizeInBits() < /*extra bit for sign */
+      (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */
                     OpI->getType()->getFPMantissaWidth())
     return ReplaceInstUsesWith(FI, OpI->getOperand(0));
 
@@ -8726,7 +8780,7 @@ Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) {
   // 'X' value would cause an undefined result for the fptoui. 
   if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
       OpI->getOperand(0)->getType() == FI.getType() &&
-      (int)FI.getType()->getPrimitiveSizeInBits() <= 
+      (int)FI.getType()->getScalarSizeInBits() <=
                     OpI->getType()->getFPMantissaWidth())
     return ReplaceInstUsesWith(FI, OpI->getOperand(0));
   
@@ -8747,7 +8801,7 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
   // trunc to be exposed to other transforms.  Don't do this for extending
   // ptrtoint's, because we don't know if the target sign or zero extends its
   // pointers.
-  if (CI.getType()->getPrimitiveSizeInBits() < TD->getPointerSizeInBits()) {
+  if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) {
     Value *P = InsertNewInstBefore(new PtrToIntInst(CI.getOperand(0),
                                                     TD->getIntPtrType(),
                                                     "tmp"), CI);
@@ -8763,7 +8817,7 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
   // allows the trunc to be exposed to other transforms.  Don't do this for
   // extending inttoptr's, because we don't know if the target sign or zero
   // extends to pointers.
-  if (CI.getOperand(0)->getType()->getPrimitiveSizeInBits() >
+  if (CI.getOperand(0)->getType()->getScalarSizeInBits() >
       TD->getPointerSizeInBits()) {
     Value *P = InsertNewInstBefore(new TruncInst(CI.getOperand(0),
                                                  TD->getIntPtrType(),
@@ -9194,7 +9248,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
             (Pred == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) {
           Value *In = ICI->getOperand(0);
           Value *Sh = ConstantInt::get(In->getType(),
-                                       In->getType()->getPrimitiveSizeInBits()-1);
+                                       In->getType()->getScalarSizeInBits()-1);
           In = InsertNewInstBefore(BinaryOperator::CreateAShr(In, Sh,
                                                           In->getName()+".lobit"),
                                    *ICI);
@@ -9316,7 +9370,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
               // The comparison constant and the result are not neccessarily the
               // same width. Make an all-ones value by inserting a AShr.
               Value *X = IC->getOperand(0);
-              uint32_t Bits = X->getType()->getPrimitiveSizeInBits();
+              uint32_t Bits = X->getType()->getScalarSizeInBits();
               Constant *ShAmt = ConstantInt::get(X->getType(), Bits-1);
               Instruction *SRA = BinaryOperator::Create(Instruction::AShr, X,
                                                         ShAmt, "ones");
@@ -10850,8 +10904,8 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
 static Value *InsertCastToIntPtrTy(Value *V, const Type *DTy,
                                    Instruction *InsertPoint,
                                    InstCombiner *IC) {
-  unsigned PtrSize = DTy->getPrimitiveSizeInBits();
-  unsigned VTySize = V->getType()->getPrimitiveSizeInBits();
+  unsigned PtrSize = DTy->getScalarSizeInBits();
+  unsigned VTySize = V->getType()->getScalarSizeInBits();
   // We must cast correctly to the pointer type. Ensure that we
   // sign extend the integer value if it is smaller as this is
   // used for address computation.
@@ -10892,7 +10946,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
           const Type *SrcTy = CI->getOperand(0)->getType();
           // We can eliminate a cast from i32 to i64 iff the target 
           // is a 32-bit pointer target.
-          if (SrcTy->getPrimitiveSizeInBits() >= TD->getPointerSizeInBits()) {
+          if (SrcTy->getScalarSizeInBits() >= TD->getPointerSizeInBits()) {
             MadeChange = true;
             *i = CI->getOperand(0);
           }
@@ -11105,7 +11159,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         ConstantInt *Scale = 0;
         if (ArrayEltSize == 1) {
           NewIdx = GEP.getOperand(1);
-          Scale = ConstantInt::get(NewIdx->getType(), 1);
+          Scale = ConstantInt::get(cast<IntegerType>(NewIdx->getType()), 1);
         } else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(1))) {
           NewIdx = ConstantInt::get(CI->getType(), 1);
           Scale = CI;
@@ -11114,7 +11168,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
               isa<ConstantInt>(Inst->getOperand(1))) {
             ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1));
             uint32_t ShAmtVal = ShAmt->getLimitedValue(64);
-            Scale = ConstantInt::get(Inst->getType(), 1ULL << ShAmtVal);
+            Scale = ConstantInt::get(cast<IntegerType>(Inst->getType()),
+                                     1ULL << ShAmtVal);
             NewIdx = Inst->getOperand(0);
           } else if (Inst->getOpcode() == Instruction::Mul &&
                      isa<ConstantInt>(Inst->getOperand(1))) {
@@ -11390,45 +11445,6 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
   return 0;
 }
 
-/// isSafeToLoadUnconditionally - Return true if we know that executing a load
-/// from this value cannot trap.  If it is not obviously safe to load from the
-/// specified pointer, we do a quick local scan of the basic block containing
-/// ScanFrom, to determine if the address is already accessed.
-static bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom) {
-  // If it is an alloca it is always safe to load from.
-  if (isa<AllocaInst>(V)) return true;
-
-  // If it is a global variable it is mostly safe to load from.
-  if (const GlobalValue *GV = dyn_cast<GlobalVariable>(V))
-    // Don't try to evaluate aliases.  External weak GV can be null.
-    return !isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage();
-
-  // Otherwise, be a little bit agressive by scanning the local block where we
-  // want to check to see if the pointer is already being loaded or stored
-  // from/to.  If so, the previous load or store would have already trapped,
-  // so there is no harm doing an extra load (also, CSE will later eliminate
-  // the load entirely).
-  BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin();
-
-  while (BBI != E) {
-    --BBI;
-
-    // If we see a free or a call (which might do a free) the pointer could be
-    // marked invalid.
-    if (isa<FreeInst>(BBI) || 
-        (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)))
-      return false;
-    
-    if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
-      if (LI->getOperand(0) == V) return true;
-    } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
-      if (SI->getOperand(1) == V) return true;
-    }
-
-  }
-  return false;
-}
-
 Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
   Value *Op = LI.getOperand(0);
 
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index c0ca2df1ce11..5a70fc3bc6f7 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -76,7 +76,7 @@ namespace {
     bool ProcessBlock(BasicBlock *BB);
     bool ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BasicBlock *SuccBB,
                     unsigned JumpThreadCost);
-    BasicBlock *FactorCommonPHIPreds(PHINode *PN, Constant *CstVal);
+    BasicBlock *FactorCommonPHIPreds(PHINode *PN, Value *Val);
     bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
     bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
 
@@ -163,10 +163,10 @@ void JumpThreading::FindLoopHeaders(Function &F) {
 /// This is important for things like "phi i1 [true, true, false, true, x]"
 /// where we only need to clone the block for the true blocks once.
 ///
-BasicBlock *JumpThreading::FactorCommonPHIPreds(PHINode *PN, Constant *CstVal) {
+BasicBlock *JumpThreading::FactorCommonPHIPreds(PHINode *PN, Value *Val) {
   SmallVector<BasicBlock*, 16> CommonPreds;
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-    if (PN->getIncomingValue(i) == CstVal)
+    if (PN->getIncomingValue(i) == Val)
       CommonPreds.push_back(PN->getIncomingBlock(i));
   
   if (CommonPreds.size() == 1)
@@ -324,10 +324,6 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
     }
   }
 
-  // If there is only a single predecessor of this block, nothing to fold.
-  if (BB->getSinglePredecessor())
-    return false;
-  
   // All the rest of our checks depend on the condition being an instruction.
   if (CondInst == 0)
     return false;
@@ -346,13 +342,36 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
                              CondInst->getOpcode() == Instruction::And))
     return true;
   
-  // If we have "br (phi != 42)" and the phi node has any constant values as 
-  // operands, we can thread through this block.
-  if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst))
-    if (isa<PHINode>(CondCmp->getOperand(0)) &&
-        isa<Constant>(CondCmp->getOperand(1)) &&
-        ProcessBranchOnCompare(CondCmp, BB))
-      return true;
+  if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
+    if (isa<PHINode>(CondCmp->getOperand(0))) {
+      // If we have "br (phi != 42)" and the phi node has any constant values
+      // as operands, we can thread through this block.
+      // 
+      // If we have "br (cmp phi, x)" and the phi node contains x such that the
+      // comparison uniquely identifies the branch target, we can thread
+      // through this block.
+
+      if (ProcessBranchOnCompare(CondCmp, BB))
+        return true;      
+    }
+    
+    // If we have a comparison, loop over the predecessors to see if there is
+    // a condition with the same value.
+    pred_iterator PI = pred_begin(BB), E = pred_end(BB);
+    for (; PI != E; ++PI)
+      if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
+        if (PBI->isConditional() && *PI != BB) {
+          if (CmpInst *CI = dyn_cast<CmpInst>(PBI->getCondition())) {
+            if (CI->getOperand(0) == CondCmp->getOperand(0) &&
+                CI->getOperand(1) == CondCmp->getOperand(1) &&
+                CI->getPredicate() == CondCmp->getPredicate()) {
+              // TODO: Could handle things like (x != 4) --> (x == 17)
+              if (ProcessBranchOnDuplicateCond(*PI, BB))
+                return true;
+            }
+          }
+        }
+  }
 
   // Check for some cases that are worth simplifying.  Right now we want to look
   // for loads that are used by a switch or by the condition for the branch.  If
@@ -770,12 +789,30 @@ bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB,
   return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost);
 }
 
+/// GetResultOfComparison - Given an icmp/fcmp predicate and the left and right
+/// hand sides of the compare instruction, try to determine the result. If the
+/// result can not be determined, a null pointer is returned.
+static Constant *GetResultOfComparison(CmpInst::Predicate pred,
+                                       Value *LHS, Value *RHS) {
+  if (Constant *CLHS = dyn_cast<Constant>(LHS))
+    if (Constant *CRHS = dyn_cast<Constant>(RHS))
+      return ConstantExpr::getCompare(pred, CLHS, CRHS);
+
+  if (LHS == RHS)
+    if (isa<IntegerType>(LHS->getType()) || isa<PointerType>(LHS->getType()))
+      return ICmpInst::isTrueWhenEqual(pred) ? 
+                 ConstantInt::getTrue() : ConstantInt::getFalse();
+
+  return 0;
+}
+
 /// ProcessBranchOnCompare - We found a branch on a comparison between a phi
-/// node and a constant.  If the PHI node contains any constants as inputs, we
-/// can fold the compare for that edge and thread through it.
+/// node and a value.  If we can identify when the comparison is true between
+/// the phi inputs and the value, we can fold the compare for that edge and
+/// thread through it.
 bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {
   PHINode *PN = cast<PHINode>(Cmp->getOperand(0));
-  Constant *RHS = cast<Constant>(Cmp->getOperand(1));
+  Value *RHS = Cmp->getOperand(1);
   
   // If the phi isn't in the current block, an incoming edge to this block
   // doesn't control the destination.
@@ -784,18 +821,17 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {
   
   // We can do this simplification if any comparisons fold to true or false.
   // See if any do.
-  Constant *PredCst = 0;
+  Value *PredVal = 0;
   bool TrueDirection = false;
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-    PredCst = dyn_cast<Constant>(PN->getIncomingValue(i));
-    if (PredCst == 0) continue;
+    PredVal = PN->getIncomingValue(i);
+    
+    Constant *Res = GetResultOfComparison(Cmp->getPredicate(), PredVal, RHS);
+    if (!Res) {
+      PredVal = 0;
+      continue;
+    }
     
-    Constant *Res;
-    if (ICmpInst *ICI = dyn_cast<ICmpInst>(Cmp))
-      Res = ConstantExpr::getICmp(ICI->getPredicate(), PredCst, RHS);
-    else
-      Res = ConstantExpr::getFCmp(cast<FCmpInst>(Cmp)->getPredicate(),
-                                  PredCst, RHS);
     // If this folded to a constant expr, we can't do anything.
     if (ConstantInt *ResC = dyn_cast<ConstantInt>(Res)) {
       TrueDirection = ResC->getZExtValue();
@@ -808,11 +844,11 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {
     }
     
     // Otherwise, we can't fold this input.
-    PredCst = 0;
+    PredVal = 0;
   }
   
   // If no match, bail out.
-  if (PredCst == 0)
+  if (PredVal == 0)
     return false;
   
   // See if the cost of duplicating this block is low enough.
@@ -825,7 +861,7 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {
   
   // If so, we can actually do this threading.  Merge any common predecessors
   // that will act the same.
-  BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst);
+  BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredVal);
   
   // Next, get our successor.
   BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(!TrueDirection);
diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp
index 9c785968e1d4..6f7a7f866a8e 100644
--- a/lib/Transforms/Scalar/LoopIndexSplit.cpp
+++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp
@@ -290,13 +290,13 @@ static bool isUsedOutsideLoop(Value *V, Loop *L) {
 
 // Return V+1
 static Value *getPlusOne(Value *V, bool Sign, Instruction *InsertPt) {
-  ConstantInt *One = ConstantInt::get(V->getType(), 1, Sign);
+  Constant *One = ConstantInt::get(V->getType(), 1, Sign);
   return BinaryOperator::CreateAdd(V, One, "lsp", InsertPt);
 }
 
 // Return V-1
 static Value *getMinusOne(Value *V, bool Sign, Instruction *InsertPt) {
-  ConstantInt *One = ConstantInt::get(V->getType(), 1, Sign);
+  Constant *One = ConstantInt::get(V->getType(), 1, Sign);
   return BinaryOperator::CreateSub(V, One, "lsp", InsertPt);
 }
 
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 944f40931910..7579748bbc0a 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -143,10 +143,10 @@ namespace {
     /// inside the loop then try to eliminate the cast opeation.
     void OptimizeShadowIV(Loop *L);
 
-    /// OptimizeSMax - Rewrite the loop's terminating condition
-    /// if it uses an smax computation.
-    ICmpInst *OptimizeSMax(Loop *L, ICmpInst *Cond,
-                           IVStrideUse* &CondUse);
+    /// OptimizeMax - Rewrite the loop's terminating condition
+    /// if it uses a max computation.
+    ICmpInst *OptimizeMax(Loop *L, ICmpInst *Cond,
+                          IVStrideUse* &CondUse);
 
     bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
                            const SCEVHandle *&CondStride);
@@ -336,13 +336,6 @@ namespace {
     /// EmittedBase.
     Value *OperandValToReplace;
 
-    /// isSigned - The stride (and thus also the Base) of this use may be in
-    /// a narrower type than the use itself (OperandValToReplace->getType()).
-    /// When this is the case, the isSigned field indicates whether the
-    /// IV expression should be signed-extended instead of zero-extended to
-    /// fit the type of the use.
-    bool isSigned;
-
     /// Imm - The immediate value that should be added to the base immediately
     /// before Inst, because it will be folded into the imm field of the
     /// instruction.  This is also sometimes used for loop-variant values that
@@ -363,7 +356,6 @@ namespace {
     BasedUser(IVStrideUse &IVSU, ScalarEvolution *se)
       : SE(se), Base(IVSU.getOffset()), Inst(IVSU.getUser()),
         OperandValToReplace(IVSU.getOperandValToReplace()),
-        isSigned(IVSU.isSigned()),
         Imm(SE->getIntegerSCEV(0, Base->getType())), 
         isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {}
 
@@ -428,11 +420,6 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEVHandle &NewBase,
     NewValSCEV = SE->getAddExpr(NewValSCEV, Imm);
   }
 
-  if (isSigned)
-    NewValSCEV = SE->getTruncateOrSignExtend(NewValSCEV, Ty);
-  else
-    NewValSCEV = SE->getTruncateOrZeroExtend(NewValSCEV, Ty);
-
   return Rewriter.expandCodeFor(NewValSCEV, Ty, IP);
 }
 
@@ -592,7 +579,7 @@ static void MoveLoopVariantsToImmediateField(SCEVHandle &Val, SCEVHandle &Imm,
   if (Val->isLoopInvariant(L)) return;  // Nothing to do.
   
   if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
-    std::vector<SCEVHandle> NewOps;
+    SmallVector<SCEVHandle, 4> NewOps;
     NewOps.reserve(SAE->getNumOperands());
     
     for (unsigned i = 0; i != SAE->getNumOperands(); ++i)
@@ -613,7 +600,7 @@ static void MoveLoopVariantsToImmediateField(SCEVHandle &Val, SCEVHandle &Imm,
     SCEVHandle Start = SARE->getStart();
     MoveLoopVariantsToImmediateField(Start, Imm, L, SE);
     
-    std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end());
+    SmallVector<SCEVHandle, 4> Ops(SARE->op_begin(), SARE->op_end());
     Ops[0] = Start;
     Val = SE->getAddRecExpr(Ops, SARE->getLoop());
   } else {
@@ -633,7 +620,7 @@ static void MoveImmediateValues(const TargetLowering *TLI,
                                 bool isAddress, Loop *L,
                                 ScalarEvolution *SE) {
   if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
-    std::vector<SCEVHandle> NewOps;
+    SmallVector<SCEVHandle, 4> NewOps;
     NewOps.reserve(SAE->getNumOperands());
     
     for (unsigned i = 0; i != SAE->getNumOperands(); ++i) {
@@ -660,7 +647,7 @@ static void MoveImmediateValues(const TargetLowering *TLI,
     MoveImmediateValues(TLI, AccessTy, Start, Imm, isAddress, L, SE);
     
     if (Start != SARE->getStart()) {
-      std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end());
+      SmallVector<SCEVHandle, 4> Ops(SARE->op_begin(), SARE->op_end());
       Ops[0] = Start;
       Val = SE->getAddRecExpr(Ops, SARE->getLoop());
     }
@@ -717,7 +704,7 @@ static void MoveImmediateValues(const TargetLowering *TLI,
 /// SeparateSubExprs - Decompose Expr into all of the subexpressions that are
 /// added together.  This is used to reassociate common addition subexprs
 /// together for maximal sharing when rewriting bases.
-static void SeparateSubExprs(std::vector<SCEVHandle> &SubExprs,
+static void SeparateSubExprs(SmallVector<SCEVHandle, 16> &SubExprs,
                              SCEVHandle Expr,
                              ScalarEvolution *SE) {
   if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(Expr)) {
@@ -729,7 +716,7 @@ static void SeparateSubExprs(std::vector<SCEVHandle> &SubExprs,
       SubExprs.push_back(Expr);
     } else {
       // Compute the addrec with zero as its base.
-      std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end());
+      SmallVector<SCEVHandle, 4> Ops(SARE->op_begin(), SARE->op_end());
       Ops[0] = Zero;   // Start with zero base.
       SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop()));
       
@@ -783,9 +770,9 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
   
   // UniqueSubExprs - Keep track of all of the subexpressions we see in the
   // order we see them.
-  std::vector<SCEVHandle> UniqueSubExprs;
+  SmallVector<SCEVHandle, 16> UniqueSubExprs;
 
-  std::vector<SCEVHandle> SubExprs;
+  SmallVector<SCEVHandle, 16> SubExprs;
   unsigned NumUsesInsideLoop = 0;
   for (unsigned i = 0; i != NumUses; ++i) {
     // If the user is outside the loop, just ignore it for base computation.
@@ -1129,11 +1116,11 @@ static bool isNonConstantNegative(const SCEVHandle &Expr) {
   return SC->getValue()->getValue().isNegative();
 }
 
-// CollectIVUsers - Transform our list of users and offsets to a bit more
-// complex table. In this new vector, each 'BasedUser' contains 'Base', the base
-// of the strided accesses, as well as the old information from Uses. We
-// progressively move information from the Base field to the Imm field, until
-// we eventually have the full access expression to rewrite the use.
+/// CollectIVUsers - Transform our list of users and offsets to a bit more
+/// complex table. In this new vector, each 'BasedUser' contains 'Base', the base
+/// of the strided accesses, as well as the old information from Uses. We
+/// progressively move information from the Base field to the Imm field, until
+/// we eventually have the full access expression to rewrite the use.
 SCEVHandle LoopStrengthReduce::CollectIVUsers(const SCEVHandle &Stride,
                                               IVUsersOfOneStride &Uses,
                                               Loop *L,
@@ -2008,15 +1995,15 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
       if (!isa<PointerType>(NewCmpTy))
         NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal);
       else {
-        ConstantInt *CI = ConstantInt::get(NewCmpIntTy, NewCmpVal);
+        Constant *CI = ConstantInt::get(NewCmpIntTy, NewCmpVal);
         NewCmpRHS = ConstantExpr::getIntToPtr(CI, NewCmpTy);
       }
       NewOffset = TyBits == NewTyBits
         ? SE->getMulExpr(CondUse->getOffset(),
-                         SE->getConstant(ConstantInt::get(CmpTy, Scale)))
-        : SE->getConstant(ConstantInt::get(NewCmpIntTy,
+                         SE->getConstant(CmpTy, Scale))
+        : SE->getConstant(NewCmpIntTy,
           cast<SCEVConstant>(CondUse->getOffset())->getValue()
-            ->getSExtValue()*Scale));
+            ->getSExtValue()*Scale);
       break;
     }
   }
@@ -2047,7 +2034,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
     OldCond->replaceAllUsesWith(Cond);
     OldCond->eraseFromParent();
 
-    IU->IVUsesByStride[*NewStride]->addUser(NewOffset, Cond, NewCmpLHS, false);
+    IU->IVUsesByStride[*NewStride]->addUser(NewOffset, Cond, NewCmpLHS);
     CondUse = &IU->IVUsesByStride[*NewStride]->Users.back();
     CondStride = NewStride;
     ++NumEliminated;
@@ -2057,8 +2044,8 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
   return Cond;
 }
 
-/// OptimizeSMax - Rewrite the loop's terminating condition if it uses
-/// an smax computation.
+/// OptimizeMax - Rewrite the loop's terminating condition if it uses
+/// a max computation.
 ///
 /// This is a narrow solution to a specific, but acute, problem. For loops
 /// like this:
@@ -2068,10 +2055,10 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
 ///     p[i] = 0.0;
 ///   } while (++i < n);
 ///
-/// where the comparison is signed, the trip count isn't just 'n', because
-/// 'n' could be negative. And unfortunately this can come up even for loops
-/// where the user didn't use a C do-while loop. For example, seemingly
-/// well-behaved top-test loops will commonly be lowered like this:
+/// the trip count isn't just 'n', because 'n' might not be positive. And
+/// unfortunately this can come up even for loops where the user didn't use
+/// a C do-while loop. For example, seemingly well-behaved top-test loops
+/// will commonly be lowered like this:
 //
 ///   if (n > 0) {
 ///     i = 0;
@@ -2084,14 +2071,14 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
 /// test in such a way that indvars can't find it.
 ///
 /// When indvars can't find the if test in loops like this, it creates a
-/// signed-max expression, which allows it to give the loop a canonical
+/// max expression, which allows it to give the loop a canonical
 /// induction variable:
 ///
 ///   i = 0;
-///   smax = n < 1 ? 1 : n;
+///   max = n < 1 ? 1 : n;
 ///   do {
 ///     p[i] = 0.0;
-///   } while (++i != smax);
+///   } while (++i != max);
 ///
 /// Canonical induction variables are necessary because the loop passes
 /// are designed around them. The most obvious example of this is the
@@ -2107,8 +2094,8 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
 /// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
 /// the instructions for the maximum computation.
 ///
-ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond,
-                                           IVStrideUse* &CondUse) {
+ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,
+                                          IVStrideUse* &CondUse) {
   // Check that the loop matches the pattern we're looking for.
   if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
       Cond->getPredicate() != CmpInst::ICMP_NE)
@@ -2126,12 +2113,19 @@ ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond,
   SCEVHandle IterationCount = SE->getAddExpr(BackedgeTakenCount, One);
 
   // Check for a max calculation that matches the pattern.
-  const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(IterationCount);
-  if (!SMax || SMax != SE->getSCEV(Sel)) return Cond;
+  if (!isa<SCEVSMaxExpr>(IterationCount) && !isa<SCEVUMaxExpr>(IterationCount))
+    return Cond;
+  const SCEVNAryExpr *Max = cast<SCEVNAryExpr>(IterationCount);
+  if (Max != SE->getSCEV(Sel)) return Cond;
+
+  // To handle a max with more than two operands, this optimization would
+  // require additional checking and setup.
+  if (Max->getNumOperands() != 2)
+    return Cond;
 
-  SCEVHandle SMaxLHS = SMax->getOperand(0);
-  SCEVHandle SMaxRHS = SMax->getOperand(1);
-  if (!SMaxLHS || SMaxLHS != One) return Cond;
+  SCEVHandle MaxLHS = Max->getOperand(0);
+  SCEVHandle MaxRHS = Max->getOperand(1);
+  if (!MaxLHS || MaxLHS != One) return Cond;
 
   // Check the relevant induction variable for conformance to
   // the pattern.
@@ -2148,19 +2142,23 @@ ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond,
   // Check the right operand of the select, and remember it, as it will
   // be used in the new comparison instruction.
   Value *NewRHS = 0;
-  if (SE->getSCEV(Sel->getOperand(1)) == SMaxRHS)
+  if (SE->getSCEV(Sel->getOperand(1)) == MaxRHS)
     NewRHS = Sel->getOperand(1);
-  else if (SE->getSCEV(Sel->getOperand(2)) == SMaxRHS)
+  else if (SE->getSCEV(Sel->getOperand(2)) == MaxRHS)
     NewRHS = Sel->getOperand(2);
   if (!NewRHS) return Cond;
 
+  // Determine the new comparison opcode. It may be signed or unsigned,
+  // and the original comparison may be either equality or inequality.
+  CmpInst::Predicate Pred =
+    isa<SCEVSMaxExpr>(Max) ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT;
+  if (Cond->getPredicate() == CmpInst::ICMP_EQ)
+    Pred = CmpInst::getInversePredicate(Pred);
+
   // Ok, everything looks ok to change the condition into an SLT or SGE and
   // delete the max calculation.
   ICmpInst *NewCond =
-    new ICmpInst(Cond->getPredicate() == CmpInst::ICMP_NE ?
-                   CmpInst::ICMP_SLT :
-                   CmpInst::ICMP_SGE,
-                 Cond->getOperand(0), NewRHS, "scmp", Cond);
+    new ICmpInst(Pred, Cond->getOperand(0), NewRHS, "scmp", Cond);
 
   // Delete the max calculation instructions.
   Cond->replaceAllUsesWith(NewCond);
@@ -2242,7 +2240,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
         
       ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
       if (!Init) continue;
-      ConstantFP *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
+      Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
 
       BinaryOperator *Incr = 
         dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
@@ -2266,7 +2264,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
       PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);
 
       /* create new increment. '++d' in above example. */
-      ConstantFP *CFP = ConstantFP::get(DestTy, C->getZExtValue());
+      Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
       BinaryOperator *NewIncr = 
         BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
                                  Instruction::FAdd : Instruction::FSub,
@@ -2284,9 +2282,9 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
   }
 }
 
-// OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar
-// uses in the loop, look to see if we can eliminate some, in favor of using
-// common indvars for the different uses.
+/// OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar
+/// uses in the loop, look to see if we can eliminate some, in favor of using
+/// common indvars for the different uses.
 void LoopStrengthReduce::OptimizeIndvars(Loop *L) {
   // TODO: implement optzns here.
 
@@ -2301,11 +2299,11 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
   // induction variable, to allow coalescing the live ranges for the IV into
   // one register value.
   BasicBlock *LatchBlock = L->getLoopLatch();
-  BasicBlock *ExitBlock = L->getExitingBlock();
-  if (!ExitBlock)
+  BasicBlock *ExitingBlock = L->getExitingBlock();
+  if (!ExitingBlock)
     // Multiple exits, just look at the exit in the latch block if there is one.
-    ExitBlock = LatchBlock;
-  BranchInst *TermBr = dyn_cast<BranchInst>(ExitBlock->getTerminator());
+    ExitingBlock = LatchBlock;
+  BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
   if (!TermBr)
     return;
   if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
@@ -2318,7 +2316,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
   if (!FindIVUserForCond(Cond, CondUse, CondStride))
     return; // setcc doesn't use the IV.
 
-  if (ExitBlock != LatchBlock) {
+  if (ExitingBlock != LatchBlock) {
     if (!Cond->hasOneUse())
       // See below, we don't want the condition to be cloned.
       return;
@@ -2373,14 +2371,14 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
     StrideNoReuse.insert(*CondStride);
   }
 
-  // If the trip count is computed in terms of an smax (due to ScalarEvolution
+  // If the trip count is computed in terms of a max (due to ScalarEvolution
   // being unable to find a sufficient guard, for example), change the loop
-  // comparison to use SLT instead of NE.
-  Cond = OptimizeSMax(L, Cond, CondUse);
+  // comparison to use SLT or ULT instead of NE.
+  Cond = OptimizeMax(L, Cond, CondUse);
 
   // If possible, change stride and operands of the compare instruction to
   // eliminate one stride.
-  if (ExitBlock == LatchBlock)
+  if (ExitingBlock == LatchBlock)
     Cond = ChangeCompareStride(L, Cond, CondUse, CondStride);
 
   // It's possible for the setcc instruction to be anywhere in the loop, and
@@ -2397,8 +2395,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
       
       // Clone the IVUse, as the old use still exists!
       IU->IVUsesByStride[*CondStride]->addUser(CondUse->getOffset(), Cond,
-                                              CondUse->getOperandValToReplace(),
-                                               false);
+                                             CondUse->getOperandValToReplace());
       CondUse = &IU->IVUsesByStride[*CondStride]->Users.back();
     }
   }
@@ -2413,9 +2410,9 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
   ++NumLoopCond;
 }
 
-// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding
-// when to exit the loop is used only for that purpose, try to rearrange things
-// so it counts down to a test against zero.
+/// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding
+/// when to exit the loop is used only for that purpose, try to rearrange things
+/// so it counts down to a test against zero.
 void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
 
   // If the number of times the loop is executed isn't computable, give up.
@@ -2506,7 +2503,7 @@ void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
   Value *startVal = phi->getIncomingValue(inBlock);
   Value *endVal = Cond->getOperand(1);
   // FIXME check for case where both are constant
-  ConstantInt* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0);
+  Constant* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0);
   BinaryOperator *NewStartVal = 
     BinaryOperator::Create(Instruction::Sub, endVal, startVal,
                            "tmp", PreInsertPt);
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 7143c7be485e..d89790c29217 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -820,10 +820,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
           StoreVal = Constant::getNullValue(EltTy);  // 0.0, null, 0, <0,0>
         } else {
           // If EltTy is a vector type, get the element type.
-          const Type *ValTy = EltTy;
-          if (const VectorType *VTy = dyn_cast<VectorType>(ValTy))
-            ValTy = VTy->getElementType();
-          
+          const Type *ValTy = EltTy->getScalarType();
+
           // Construct an integer with the right value.
           unsigned EltSize = TD->getTypeSizeInBits(ValTy);
           APInt OneVal(EltSize, CI->getZExtValue());
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 59989c92d740..bbcb79255eef 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -135,7 +135,11 @@ Value *LibCallOptimization::EmitStrLen(Value *Ptr, IRBuilder<> &B) {
                                            TD->getIntPtrType(),
                                            PointerType::getUnqual(Type::Int8Ty),
                                            NULL);
-  return B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
+  CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
+  if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+  return CI;
 }
 
 /// EmitMemCpy - Emit a call to the memcpy function to the builder.  This always
@@ -164,7 +168,12 @@ Value *LibCallOptimization::EmitMemChr(Value *Ptr, Value *Val,
                                          PointerType::getUnqual(Type::Int8Ty),
                                          Type::Int32Ty, TD->getIntPtrType(),
                                          NULL);
-  return B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");
+  CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");
+
+  if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+  return CI;
 }
 
 /// EmitMemCmp - Emit a call to the memcmp function.
@@ -182,8 +191,13 @@ Value *LibCallOptimization::EmitMemCmp(Value *Ptr1, Value *Ptr2,
                                          PointerType::getUnqual(Type::Int8Ty),
                                          PointerType::getUnqual(Type::Int8Ty),
                                          TD->getIntPtrType(), NULL);
-  return B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),
-                       Len, "memcmp");
+  CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),
+                               Len, "memcmp");
+
+  if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+  return CI;
 }
 
 /// EmitMemSet - Emit a call to the memset function
@@ -217,20 +231,30 @@ Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name,
     NameBuffer[NameLen+1] = 0;
     Name = NameBuffer;
   }
-  
+
   Module *M = Caller->getParent();
-  Value *Callee = M->getOrInsertFunction(Name, Op->getType(), 
+  Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
                                          Op->getType(), NULL);
-  return B.CreateCall(Callee, Op, Name);
+  CallInst *CI = B.CreateCall(Callee, Op, Name);
+
+  if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+  return CI;
 }
 
 /// EmitPutChar - Emit a call to the putchar function.  This assumes that Char
 /// is an integer.
 void LibCallOptimization::EmitPutChar(Value *Char, IRBuilder<> &B) {
   Module *M = Caller->getParent();
-  Value *F = M->getOrInsertFunction("putchar", Type::Int32Ty,
-                                    Type::Int32Ty, NULL);
-  B.CreateCall(F, B.CreateIntCast(Char, Type::Int32Ty, "chari"), "putchar");
+  Value *PutChar = M->getOrInsertFunction("putchar", Type::Int32Ty,
+                                          Type::Int32Ty, NULL);
+  CallInst *CI = B.CreateCall(PutChar,
+                              B.CreateIntCast(Char, Type::Int32Ty, "chari"),
+                              "putchar");
+
+  if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
 }
 
 /// EmitPutS - Emit a call to the puts function.  This assumes that Str is
@@ -241,10 +265,14 @@ void LibCallOptimization::EmitPutS(Value *Str, IRBuilder<> &B) {
   AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
   AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
 
-  Value *F = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2),
-                                    Type::Int32Ty,
-                                    PointerType::getUnqual(Type::Int8Ty), NULL);
-  B.CreateCall(F, CastToCStr(Str, B), "puts");
+  Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2),
+                                       Type::Int32Ty,
+                                       PointerType::getUnqual(Type::Int8Ty),
+                                       NULL);
+  CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts");
+  if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
 }
 
 /// EmitFPutC - Emit a call to the fputc function.  This assumes that Char is
@@ -258,12 +286,14 @@ void LibCallOptimization::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B) {
   if (isa<PointerType>(File->getType()))
     F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2), Type::Int32Ty,
                                Type::Int32Ty, File->getType(), NULL);
-                                         
   else
     F = M->getOrInsertFunction("fputc", Type::Int32Ty, Type::Int32Ty,
                                File->getType(), NULL);
   Char = B.CreateIntCast(Char, Type::Int32Ty, "chari");
-  B.CreateCall2(F, Char, File, "fputc");
+  CallInst *CI = B.CreateCall2(F, Char, File, "fputc");
+
+  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+    CI->setCallingConv(Fn->getCallingConv());
 }
 
 /// EmitFPutS - Emit a call to the puts function.  Str is required to be a
@@ -283,7 +313,10 @@ void LibCallOptimization::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B) {
     F = M->getOrInsertFunction("fputs", Type::Int32Ty,
                                PointerType::getUnqual(Type::Int8Ty),
                                File->getType(), NULL);
-  B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");
+  CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");
+
+  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+    CI->setCallingConv(Fn->getCallingConv());
 }
 
 /// EmitFWrite - Emit a call to the fwrite function.  This assumes that Ptr is
@@ -307,8 +340,11 @@ void LibCallOptimization::EmitFWrite(Value *Ptr, Value *Size, Value *File,
                                PointerType::getUnqual(Type::Int8Ty),
                                TD->getIntPtrType(), TD->getIntPtrType(),
                                File->getType(), NULL);
-  B.CreateCall4(F, CastToCStr(Ptr, B), Size, 
-                ConstantInt::get(TD->getIntPtrType(), 1), File);
+  CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size,
+                               ConstantInt::get(TD->getIntPtrType(), 1), File);
+
+  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+    CI->setCallingConv(Fn->getCallingConv());
 }
 
 //===----------------------------------------------------------------------===//
@@ -673,12 +709,10 @@ struct VISIBILITY_HIDDEN StrCmpOpt : public LibCallOptimization {
     // strcmp(P, "x") -> memcmp(P, "x", 2)
     uint64_t Len1 = GetStringLength(Str1P);
     uint64_t Len2 = GetStringLength(Str2P);
-    if (Len1 || Len2) {
-      // Choose the smallest Len excluding 0 which means 'unknown'.
-      if (!Len1 || (Len2 && Len2 < Len1))
-        Len1 = Len2;
+    if (Len1 && Len2) {
       return EmitMemCmp(Str1P, Str2P,
-                        ConstantInt::get(TD->getIntPtrType(), Len1), B);
+                        ConstantInt::get(TD->getIntPtrType(),
+                        std::min(Len1, Len2)), B);
     }
 
     return 0;
@@ -1039,7 +1073,7 @@ struct VISIBILITY_HIDDEN Exp2Opt : public LibCallOptimization {
       if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
         LdExpArg = B.CreateZExt(OpC->getOperand(0), Type::Int32Ty, "tmp");
     }
-    
+
     if (LdExpArg) {
       const char *Name;
       if (Op->getType() == Type::FloatTy)
@@ -1056,12 +1090,15 @@ struct VISIBILITY_HIDDEN Exp2Opt : public LibCallOptimization {
       Module *M = Caller->getParent();
       Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
                                              Op->getType(), Type::Int32Ty,NULL);
-      return B.CreateCall2(Callee, One, LdExpArg);
+      CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
+      if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+        CI->setCallingConv(F->getCallingConv());
+
+      return CI;
     }
     return 0;
   }
 };
-    
 
 //===---------------------------------------===//
 // Double -> Float Shrinking Optimizations for Unary Functions like 'floor'
@@ -1072,7 +1109,7 @@ struct VISIBILITY_HIDDEN UnaryDoubleFPOpt : public LibCallOptimization {
     if (FT->getNumParams() != 1 || FT->getReturnType() != Type::DoubleTy ||
         FT->getParamType(0) != Type::DoubleTy)
       return 0;
-    
+
     // If this is something like 'floor((double)floatval)', convert to floorf.
     FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getOperand(1));
     if (Cast == 0 || Cast->getOperand(0)->getType() != Type::FloatTy)
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 682d069923e4..34ee57c9b9dc 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -52,6 +52,7 @@
 
 #define DEBUG_TYPE "tailcallelim"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
@@ -201,8 +202,21 @@ bool TailCallElim::runOnFunction(Function &F) {
 bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
   // FIXME: We can move load/store/call/free instructions above the call if the
   // call does not mod/ref the memory location being processed.
-  if (I->mayHaveSideEffects() || isa<LoadInst>(I))
+  if (I->mayHaveSideEffects())  // This also handles volatile loads.
     return false;
+  
+  if (LoadInst* L = dyn_cast<LoadInst>(I)) {
+    // Loads may always be moved above calls without side effects.
+    if (CI->mayHaveSideEffects()) {
+      // Non-volatile loads may be moved above a call with side effects if it
+      // does not write to memory and the load provably won't trap.
+      // FIXME: Writes to memory only matter if they may alias the pointer
+      // being loaded from.
+      if (CI->mayWriteToMemory() ||
+          !isSafeToLoadUnconditionally(L->getPointerOperand(), L))
+        return false;
+    }
+  }
 
   // Otherwise, if this is a side-effect free instruction, check to make sure
   // that it does not use the return value of the call.  If it doesn't use the
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 94483b816e3b..c7fff548bca7 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -14,6 +14,7 @@
 
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Constants.h"
+#include "llvm/GlobalAlias.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
@@ -28,6 +29,50 @@
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
+//  Local analysis.
+//
+
+/// isSafeToLoadUnconditionally - Return true if we know that executing a load
+/// from this value cannot trap.  If it is not obviously safe to load from the
+/// specified pointer, we do a quick local scan of the basic block containing
+/// ScanFrom, to determine if the address is already accessed.
+bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom) {
+  // If it is an alloca it is always safe to load from.
+  if (isa<AllocaInst>(V)) return true;
+
+  // If it is a global variable it is mostly safe to load from.
+  if (const GlobalValue *GV = dyn_cast<GlobalVariable>(V))
+    // Don't try to evaluate aliases.  External weak GV can be null.
+    return !isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage();
+
+  // Otherwise, be a little bit agressive by scanning the local block where we
+  // want to check to see if the pointer is already being loaded or stored
+  // from/to.  If so, the previous load or store would have already trapped,
+  // so there is no harm doing an extra load (also, CSE will later eliminate
+  // the load entirely).
+  BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin();
+
+  while (BBI != E) {
+    --BBI;
+
+    // If we see a free or a call which may write to memory (i.e. which might do
+    // a free) the pointer could be marked invalid.
+    if (isa<FreeInst>(BBI) || 
+        (isa<CallInst>(BBI) && BBI->mayWriteToMemory() &&
+         !isa<DbgInfoIntrinsic>(BBI)))
+      return false;
+
+    if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
+      if (LI->getOperand(0) == V) return true;
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+      if (SI->getOperand(1) == V) return true;
+    }
+  }
+  return false;
+}
+
+
+//===----------------------------------------------------------------------===//
 //  Local constant propagation.
 //
 
diff --git a/lib/Transforms/Utils/LowerAllocations.cpp b/lib/Transforms/Utils/LowerAllocations.cpp
index 32498958e34f..9af47f56ef3d 100644
--- a/lib/Transforms/Utils/LowerAllocations.cpp
+++ b/lib/Transforms/Utils/LowerAllocations.cpp
@@ -112,7 +112,7 @@ bool LowerAllocations::runOnBasicBlock(BasicBlock &BB) {
     if (MallocInst *MI = dyn_cast<MallocInst>(I)) {
       const Type *AllocTy = MI->getType()->getElementType();
 
-      // malloc(type) becomes sbyte *malloc(size)
+      // malloc(type) becomes i8 *malloc(size)
       Value *MallocArg;
       if (LowerMallocArgToInteger)
         MallocArg = ConstantInt::get(Type::Int64Ty,
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index bcc6b819013b..ee0f6a65de4e 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -859,6 +859,26 @@ static bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI) {
   return Changed;
 }
 
+// isSafeToHoistInvoke - If we would need to insert a select that uses the
+// value of this invoke (comments in HoistThenElseCodeToIf explain why we
+// would need to do this), we can't hoist the invoke, as there is nowhere
+// to put the select in this case.
+static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
+                                Instruction *I1, Instruction *I2) {
+  for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) {
+    PHINode *PN;
+    for (BasicBlock::iterator BBI = SI->begin();
+         (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+      Value *BB1V = PN->getIncomingValueForBlock(BB1);
+      Value *BB2V = PN->getIncomingValueForBlock(BB2);
+      if (BB1V != BB2V && (BB1V==I1 || BB2V==I2)) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
 /// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and
 /// BB2, hoist any common code in the two blocks up into the branch block.  The
 /// caller of this function guarantees that BI's block dominates BB1 and BB2.
@@ -879,8 +899,9 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
     I1 = BB1_Itr++;
   while (isa<DbgInfoIntrinsic>(I2))
     I2 = BB2_Itr++;
-  if (I1->getOpcode() != I2->getOpcode() || isa<PHINode>(I1) || 
-      isa<InvokeInst>(I1) || !I1->isIdenticalTo(I2))
+  if (I1->getOpcode() != I2->getOpcode() || isa<PHINode>(I1) ||
+      !I1->isIdenticalTo(I2) ||
+      (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
     return false;
 
   // If we get here, we can hoist at least one instruction.
@@ -911,6 +932,10 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
   return true;
 
 HoistTerminator:
+  // It may not be possible to hoist an invoke.
+  if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))
+    return true;
+
   // Okay, it is safe to hoist the terminator.
   Instruction *NT = I1->clone();
   BIParent->getInstList().insert(BI, NT);
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index 6b369b680d72..73b1ed656db3 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -1384,7 +1384,10 @@ void AssemblyWriter::printFunction(const Function *F) {
   case CallingConv::Fast:         Out << "fastcc "; break;
   case CallingConv::Cold:         Out << "coldcc "; break;
   case CallingConv::X86_StdCall:  Out << "x86_stdcallcc "; break;
-  case CallingConv::X86_FastCall: Out << "x86_fastcallcc "; break; 
+  case CallingConv::X86_FastCall: Out << "x86_fastcallcc "; break;
+  case CallingConv::ARM_APCS:     Out << "arm_apcscc "; break;
+  case CallingConv::ARM_AAPCS:    Out << "arm_aapcscc "; break;
+  case CallingConv::ARM_AAPCS_VFP:Out << "arm_aapcs_vfpcc "; break;
   default: Out << "cc" << F->getCallingConv() << " "; break;
   }
 
@@ -1640,7 +1643,10 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     case CallingConv::Fast:  Out << " fastcc"; break;
     case CallingConv::Cold:  Out << " coldcc"; break;
     case CallingConv::X86_StdCall:  Out << " x86_stdcallcc"; break;
-    case CallingConv::X86_FastCall: Out << " x86_fastcallcc"; break; 
+    case CallingConv::X86_FastCall: Out << " x86_fastcallcc"; break;
+    case CallingConv::ARM_APCS:     Out << " arm_apcscc "; break;
+    case CallingConv::ARM_AAPCS:    Out << " arm_aapcscc "; break;
+    case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break;
     default: Out << " cc" << CI->getCallingConv(); break;
     }
 
@@ -1688,6 +1694,9 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     case CallingConv::Cold:  Out << " coldcc"; break;
     case CallingConv::X86_StdCall:  Out << " x86_stdcallcc"; break;
     case CallingConv::X86_FastCall: Out << " x86_fastcallcc"; break;
+    case CallingConv::ARM_APCS:     Out << " arm_apcscc "; break;
+    case CallingConv::ARM_AAPCS:    Out << " arm_aapcscc "; break;
+    case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break;
     default: Out << " cc" << II->getCallingConv(); break;
     }
 
diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp
index 1d293ccbd44d..3aab0cce37e4 100644
--- a/lib/VMCore/ConstantFold.cpp
+++ b/lib/VMCore/ConstantFold.cpp
@@ -208,6 +208,22 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, const Constant *V,
     }
   }
 
+  // If the cast operand is a constant vector, perform the cast by
+  // operating on each element. In the cast of bitcasts, the element
+  // count may be mismatched; don't attempt to handle that here.
+  if (const ConstantVector *CV = dyn_cast<ConstantVector>(V))
+    if (isa<VectorType>(DestTy) &&
+        cast<VectorType>(DestTy)->getNumElements() ==
+        CV->getType()->getNumElements()) {
+      std::vector<Constant*> res;
+      const VectorType *DestVecTy = cast<VectorType>(DestTy);
+      const Type *DstEltTy = DestVecTy->getElementType();
+      for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
+        res.push_back(ConstantExpr::getCast(opc,
+                                            CV->getOperand(i), DstEltTy));
+      return ConstantVector::get(DestVecTy, res);
+    }
+
   // We actually have to do a cast now. Perform the cast according to the
   // opcode specified.
   switch (opc) {
@@ -237,14 +253,6 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, const Constant *V,
       APInt Val(DestBitWidth, 2, x);
       return ConstantInt::get(Val);
     }
-    if (const ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
-      std::vector<Constant*> res;
-      const VectorType *DestVecTy = cast<VectorType>(DestTy);
-      const Type *DstEltTy = DestVecTy->getElementType();
-      for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
-        res.push_back(ConstantExpr::getCast(opc, CV->getOperand(i), DstEltTy));
-      return ConstantVector::get(DestVecTy, res);
-    }
     return 0; // Can't fold.
   case Instruction::IntToPtr:   //always treated as unsigned
     if (V->isNullValue())       // Is it an integral null value?
@@ -266,14 +274,6 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, const Constant *V,
                                  APFloat::rmNearestTiesToEven);
       return ConstantFP::get(apf);
     }
-    if (const ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
-      std::vector<Constant*> res;
-      const VectorType *DestVecTy = cast<VectorType>(DestTy);
-      const Type *DstEltTy = DestVecTy->getElementType();
-      for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
-        res.push_back(ConstantExpr::getCast(opc, CV->getOperand(i), DstEltTy));
-      return ConstantVector::get(DestVecTy, res);
-    }
     return 0;
   case Instruction::ZExt:
     if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
@@ -629,7 +629,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
     }
   }
 
-  // Handle simplifications of the RHS when a constant int.
+  // Handle simplifications when the RHS is a constant int.
   if (const ConstantInt *CI2 = dyn_cast<ConstantInt>(C2)) {
     switch (Opcode) {
     case Instruction::Add:
@@ -773,6 +773,20 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
       }
       }
     }
+
+    switch (Opcode) {
+    case Instruction::SDiv:
+    case Instruction::UDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+    case Instruction::LShr:
+    case Instruction::AShr:
+    case Instruction::Shl:
+      if (CI1->equalsInt(0)) return const_cast<Constant*>(C1);
+      break;
+    default:
+      break;
+    }
   } else if (const ConstantFP *CFP1 = dyn_cast<ConstantFP>(C1)) {
     if (const ConstantFP *CFP2 = dyn_cast<ConstantFP>(C2)) {
       APFloat C1V = CFP1->getValueAPF();
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index 69c503dff956..c164a3b0c2ca 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -25,6 +25,9 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/System/Mutex.h"
+#include "llvm/System/RWMutex.h"
+#include "llvm/System/Threading.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include <algorithm>
@@ -35,6 +38,9 @@ using namespace llvm;
 //                              Constant Class
 //===----------------------------------------------------------------------===//
 
+// Becomes a no-op when multithreading is disabled.
+ManagedStatic<sys::SmartRWMutex<true> > ConstantsLock;
+
 void Constant::destroyConstantImpl() {
   // When a Constant is destroyed, there may be lingering
   // references to the constant by other constants in the constant pool.  These
@@ -269,9 +275,20 @@ typedef DenseMap<DenseMapAPIntKeyInfo::KeyTy, ConstantInt*,
                  DenseMapAPIntKeyInfo> IntMapTy;
 static ManagedStatic<IntMapTy> IntConstants;
 
-ConstantInt *ConstantInt::get(const Type *Ty, uint64_t V, bool isSigned) {
-  const IntegerType *ITy = cast<IntegerType>(Ty);
-  return get(APInt(ITy->getBitWidth(), V, isSigned));
+ConstantInt *ConstantInt::get(const IntegerType *Ty,
+                              uint64_t V, bool isSigned) {
+  return get(APInt(Ty->getBitWidth(), V, isSigned));
+}
+
+Constant *ConstantInt::get(const Type *Ty, uint64_t V, bool isSigned) {
+  Constant *C = get(cast<IntegerType>(Ty->getScalarType()), V, isSigned);
+
+  // For vectors, broadcast the value.
+  if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+    return
+      ConstantVector::get(std::vector<Constant *>(VTy->getNumElements(), C));
+
+  return C;
 }
 
 // Get a ConstantInt from an APInt. Note that the value stored in the DenseMap 
@@ -284,12 +301,35 @@ ConstantInt *ConstantInt::get(const APInt& V) {
   const IntegerType *ITy = IntegerType::get(V.getBitWidth());
   // get an existing value or the insertion position
   DenseMapAPIntKeyInfo::KeyTy Key(V, ITy);
+  
+  ConstantsLock->reader_acquire();
   ConstantInt *&Slot = (*IntConstants)[Key]; 
-  // if it exists, return it.
-  if (Slot)
+  ConstantsLock->reader_release();
+    
+  if (!Slot) {
+    sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
+    ConstantInt *&NewSlot = (*IntConstants)[Key]; 
+    if (!Slot) {
+      NewSlot = new ConstantInt(ITy, V);
+    }
+    
+    return NewSlot;
+  } else {
     return Slot;
-  // otherwise create a new one, insert it, and return it.
-  return Slot = new ConstantInt(ITy, V);
+  }
+}
+
+Constant *ConstantInt::get(const Type *Ty, const APInt &V) {
+  ConstantInt *C = ConstantInt::get(V);
+  assert(C->getType() == Ty->getScalarType() &&
+         "ConstantInt type doesn't match the type implied by its value!");
+
+  // For vectors, broadcast the value.
+  if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+    return
+      ConstantVector::get(std::vector<Constant *>(VTy->getNumElements(), C));
+
+  return C;
 }
 
 //===----------------------------------------------------------------------===//
@@ -368,34 +408,54 @@ static ManagedStatic<FPMapTy> FPConstants;
 
 ConstantFP *ConstantFP::get(const APFloat &V) {
   DenseMapAPFloatKeyInfo::KeyTy Key(V);
-  ConstantFP *&Slot = (*FPConstants)[Key];
-  if (Slot) return Slot;
   
-  const Type *Ty;
-  if (&V.getSemantics() == &APFloat::IEEEsingle)
-    Ty = Type::FloatTy;
-  else if (&V.getSemantics() == &APFloat::IEEEdouble)
-    Ty = Type::DoubleTy;
-  else if (&V.getSemantics() == &APFloat::x87DoubleExtended)
-    Ty = Type::X86_FP80Ty;
-  else if (&V.getSemantics() == &APFloat::IEEEquad)
-    Ty = Type::FP128Ty;
-  else {
-    assert(&V.getSemantics() == &APFloat::PPCDoubleDouble&&"Unknown FP format");
-    Ty = Type::PPC_FP128Ty;
+  ConstantsLock->reader_acquire();
+  ConstantFP *&Slot = (*FPConstants)[Key];
+  ConstantsLock->reader_release();
+    
+  if (!Slot) {
+    sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
+    ConstantFP *&NewSlot = (*FPConstants)[Key];
+    if (!NewSlot) {
+      const Type *Ty;
+      if (&V.getSemantics() == &APFloat::IEEEsingle)
+        Ty = Type::FloatTy;
+      else if (&V.getSemantics() == &APFloat::IEEEdouble)
+        Ty = Type::DoubleTy;
+      else if (&V.getSemantics() == &APFloat::x87DoubleExtended)
+        Ty = Type::X86_FP80Ty;
+      else if (&V.getSemantics() == &APFloat::IEEEquad)
+        Ty = Type::FP128Ty;
+      else {
+        assert(&V.getSemantics() == &APFloat::PPCDoubleDouble && 
+               "Unknown FP format");
+        Ty = Type::PPC_FP128Ty;
+      }
+      NewSlot = new ConstantFP(Ty, V);
+    }
+    
+    return NewSlot;
   }
   
-  return Slot = new ConstantFP(Ty, V);
+  return Slot;
 }
 
 /// get() - This returns a constant fp for the specified value in the
 /// specified type.  This should only be used for simple constant values like
 /// 2.0/1.0 etc, that are known-valid both as double and as the target format.
-ConstantFP *ConstantFP::get(const Type *Ty, double V) {
+Constant *ConstantFP::get(const Type *Ty, double V) {
   APFloat FV(V);
   bool ignored;
-  FV.convert(*TypeToFloatSemantics(Ty), APFloat::rmNearestTiesToEven, &ignored);
-  return get(FV);
+  FV.convert(*TypeToFloatSemantics(Ty->getScalarType()),
+             APFloat::rmNearestTiesToEven, &ignored);
+  Constant *C = get(FV);
+
+  // For vectors, broadcast the value.
+  if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+    return
+      ConstantVector::get(std::vector<Constant *>(VTy->getNumElements(), C));
+
+  return C;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1093,8 +1153,13 @@ namespace llvm {
     /// AbstractTypeMap - Map for abstract type constants.
     ///
     AbstractTypeMapTy AbstractTypeMap;
+    
+    /// ValueMapLock - Mutex for this map.
+    sys::SmartMutex<true> ValueMapLock;
 
   public:
+    // NOTE: This function is not locked.  It is the caller's responsibility
+    // to enforce proper synchronization.
     typename MapTy::iterator map_end() { return Map.end(); }
     
     /// InsertOrGetItem - Return an iterator for the specified element.
@@ -1102,6 +1167,8 @@ namespace llvm {
     /// entry and Exists=true.  If not, the iterator points to the newly
     /// inserted entry and returns Exists=false.  Newly inserted entries have
     /// I->second == 0, and should be filled in.
+    /// NOTE: This function is not locked.  It is the caller's responsibility
+    // to enforce proper synchronization.
     typename MapTy::iterator InsertOrGetItem(std::pair<MapKey, Constant *>
                                    &InsertVal,
                                    bool &Exists) {
@@ -1131,19 +1198,10 @@ private:
       }
       return I;
     }
-public:
     
-    /// getOrCreate - Return the specified constant from the map, creating it if
-    /// necessary.
-    ConstantClass *getOrCreate(const TypeClass *Ty, const ValType &V) {
-      MapKey Lookup(Ty, V);
-      typename MapTy::iterator I = Map.find(Lookup);
-      // Is it in the map?      
-      if (I != Map.end())
-        return static_cast<ConstantClass *>(I->second);  
-
-      // If no preexisting value, create one now...
-      ConstantClass *Result =
+    ConstantClass* Create(const TypeClass *Ty, const ValType &V,
+                          typename MapTy::iterator I) {
+      ConstantClass* Result =
         ConstantCreator<ConstantClass,TypeClass,ValType>::create(Ty, V);
 
       assert(Result->getType() == Ty && "Type specified is not correct!");
@@ -1151,11 +1209,12 @@ public:
 
       if (HasLargeKey)  // Remember the reverse mapping if needed.
         InverseMap.insert(std::make_pair(Result, I));
-      
-      // If the type of the constant is abstract, make sure that an entry exists
-      // for it in the AbstractTypeMap.
+
+      // If the type of the constant is abstract, make sure that an entry
+      // exists for it in the AbstractTypeMap.
       if (Ty->isAbstract()) {
-        typename AbstractTypeMapTy::iterator TI = AbstractTypeMap.find(Ty);
+        typename AbstractTypeMapTy::iterator TI = 
+                                                 AbstractTypeMap.find(Ty);
 
         if (TI == AbstractTypeMap.end()) {
           // Add ourselves to the ATU list of the type.
@@ -1164,10 +1223,33 @@ public:
           AbstractTypeMap.insert(TI, std::make_pair(Ty, I));
         }
       }
+      
+      return Result;
+    }
+public:
+    
+    /// getOrCreate - Return the specified constant from the map, creating it if
+    /// necessary.
+    ConstantClass *getOrCreate(const TypeClass *Ty, const ValType &V) {
+      sys::SmartScopedLock<true> Lock(&ValueMapLock);
+      MapKey Lookup(Ty, V);
+      ConstantClass* Result = 0;
+      
+      typename MapTy::iterator I = Map.find(Lookup);
+      // Is it in the map?  
+      if (I != Map.end())
+        Result = static_cast<ConstantClass *>(I->second);
+        
+      if (!Result) {
+        // If no preexisting value, create one now...
+        Result = Create(Ty, V, I);
+      }
+        
       return Result;
     }
 
     void remove(ConstantClass *CP) {
+      sys::SmartScopedLock<true> Lock(&ValueMapLock);
       typename MapTy::iterator I = FindExistingElement(CP);
       assert(I != Map.end() && "Constant not found in constant table!");
       assert(I->second == CP && "Didn't find correct element?");
@@ -1221,6 +1303,8 @@ public:
     /// MoveConstantToNewSlot - If we are about to change C to be the element
     /// specified by I, update our internal data structures to reflect this
     /// fact.
+    /// NOTE: This function is not locked. It is the responsibility of the
+    /// caller to enforce proper synchronization if using this method.
     void MoveConstantToNewSlot(ConstantClass *C, typename MapTy::iterator I) {
       // First, remove the old location of the specified constant in the map.
       typename MapTy::iterator OldI = FindExistingElement(C);
@@ -1250,6 +1334,7 @@ public:
     }
     
     void refineAbstractType(const DerivedType *OldTy, const Type *NewTy) {
+      sys::SmartScopedLock<true> Lock(&ValueMapLock);
       typename AbstractTypeMapTy::iterator I =
         AbstractTypeMap.find(cast<Type>(OldTy));
 
@@ -1314,12 +1399,15 @@ static char getValType(ConstantAggregateZero *CPZ) { return 0; }
 ConstantAggregateZero *ConstantAggregateZero::get(const Type *Ty) {
   assert((isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) &&
          "Cannot create an aggregate zero of non-aggregate type!");
+  
+  // Implicitly locked.
   return AggZeroConstants->getOrCreate(Ty, 0);
 }
 
 /// destroyConstant - Remove the constant from the constant table...
 ///
 void ConstantAggregateZero::destroyConstant() {
+  // Implicitly locked.
   AggZeroConstants->remove(this);
   destroyConstantImpl();
 }
@@ -1359,18 +1447,24 @@ Constant *ConstantArray::get(const ArrayType *Ty,
   // If this is an all-zero array, return a ConstantAggregateZero object
   if (!V.empty()) {
     Constant *C = V[0];
-    if (!C->isNullValue())
+    if (!C->isNullValue()) {
+      // Implicitly locked.
       return ArrayConstants->getOrCreate(Ty, V);
+    }
     for (unsigned i = 1, e = V.size(); i != e; ++i)
-      if (V[i] != C)
+      if (V[i] != C) {
+        // Implicitly locked.
         return ArrayConstants->getOrCreate(Ty, V);
+      }
   }
+  
   return ConstantAggregateZero::get(Ty);
 }
 
 /// destroyConstant - Remove the constant from the constant table...
 ///
 void ConstantArray::destroyConstant() {
+  // Implicitly locked.
   ArrayConstants->remove(this);
   destroyConstantImpl();
 }
@@ -1482,6 +1576,7 @@ Constant *ConstantStruct::get(const StructType *Ty,
   // Create a ConstantAggregateZero value if all elements are zeros...
   for (unsigned i = 0, e = V.size(); i != e; ++i)
     if (!V[i]->isNullValue())
+      // Implicitly locked.
       return StructConstants->getOrCreate(Ty, V);
 
   return ConstantAggregateZero::get(Ty);
@@ -1498,6 +1593,7 @@ Constant *ConstantStruct::get(const std::vector<Constant*> &V, bool packed) {
 // destroyConstant - Remove the constant from the constant table...
 //
 void ConstantStruct::destroyConstant() {
+  // Implicitly locked.
   StructConstants->remove(this);
   destroyConstantImpl();
 }
@@ -1552,6 +1648,8 @@ Constant *ConstantVector::get(const VectorType *Ty,
     return ConstantAggregateZero::get(Ty);
   if (isUndef)
     return UndefValue::get(Ty);
+    
+  // Implicitly locked.
   return VectorConstants->getOrCreate(Ty, V);
 }
 
@@ -1563,6 +1661,7 @@ Constant *ConstantVector::get(const std::vector<Constant*> &V) {
 // destroyConstant - Remove the constant from the constant table...
 //
 void ConstantVector::destroyConstant() {
+  // Implicitly locked.
   VectorConstants->remove(this);
   destroyConstantImpl();
 }
@@ -1627,12 +1726,14 @@ static char getValType(ConstantPointerNull *) {
 
 
 ConstantPointerNull *ConstantPointerNull::get(const PointerType *Ty) {
+  // Implicitly locked.
   return NullPtrConstants->getOrCreate(Ty, 0);
 }
 
 // destroyConstant - Remove the constant from the constant table...
 //
 void ConstantPointerNull::destroyConstant() {
+  // Implicitly locked.
   NullPtrConstants->remove(this);
   destroyConstantImpl();
 }
@@ -1670,12 +1771,14 @@ static char getValType(UndefValue *) {
 
 
 UndefValue *UndefValue::get(const Type *Ty) {
+  // Implicitly locked.
   return UndefValueConstants->getOrCreate(Ty, 0);
 }
 
 // destroyConstant - Remove the constant from the constant table.
 //
 void UndefValue::destroyConstant() {
+  // Implicitly locked.
   UndefValueConstants->remove(this);
   destroyConstantImpl();
 }
@@ -1690,15 +1793,18 @@ MDString::MDString(const char *begin, const char *end)
 static ManagedStatic<StringMap<MDString*> > MDStringCache;
 
 MDString *MDString::get(const char *StrBegin, const char *StrEnd) {
-  StringMapEntry<MDString *> &Entry = MDStringCache->GetOrCreateValue(StrBegin,
-                                                                      StrEnd);
+  sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
+  StringMapEntry<MDString *> &Entry = MDStringCache->GetOrCreateValue(
+                                        StrBegin, StrEnd);
   MDString *&S = Entry.getValue();
   if (!S) S = new MDString(Entry.getKeyData(),
                            Entry.getKeyData() + Entry.getKeyLength());
+
   return S;
 }
 
 void MDString::destroyConstant() {
+  sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
   MDStringCache->erase(MDStringCache->find(StrBegin, StrEnd));
   destroyConstantImpl();
 }
@@ -1724,18 +1830,27 @@ MDNode *MDNode::get(Value*const* Vals, unsigned NumVals) {
   for (unsigned i = 0; i != NumVals; ++i)
     ID.AddPointer(Vals[i]);
 
+  ConstantsLock->reader_acquire();
   void *InsertPoint;
-  if (MDNode *N = MDNodeSet->FindNodeOrInsertPos(ID, InsertPoint))
-    return N;
-
-  // InsertPoint will have been set by the FindNodeOrInsertPos call.
-  MDNode *N = new(0) MDNode(Vals, NumVals);
-  MDNodeSet->InsertNode(N, InsertPoint);
+  MDNode *N = MDNodeSet->FindNodeOrInsertPos(ID, InsertPoint);
+  ConstantsLock->reader_release();
+  
+  if (!N) {
+    sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
+    N = MDNodeSet->FindNodeOrInsertPos(ID, InsertPoint);
+    if (!N) {
+      // InsertPoint will have been set by the FindNodeOrInsertPos call.
+      N = new(0) MDNode(Vals, NumVals);
+      MDNodeSet->InsertNode(N, InsertPoint);
+    }
+  }
   return N;
 }
 
 void MDNode::destroyConstant() {
+  sys::SmartScopedWriter<true> Writer(&*ConstantsLock); 
   MDNodeSet->RemoveNode(this);
+  
   destroyConstantImpl();
 }
 
@@ -1902,6 +2017,8 @@ static inline Constant *getFoldedCast(
   // Look up the constant in the table first to ensure uniqueness
   std::vector<Constant*> argVec(1, C);
   ExprMapKeyType Key(opc, argVec);
+  
+  // Implicitly locked.
   return ExprConstants->getOrCreate(Ty, Key);
 }
  
@@ -1932,19 +2049,19 @@ Constant *ConstantExpr::getCast(unsigned oc, Constant *C, const Type *Ty) {
 } 
 
 Constant *ConstantExpr::getZExtOrBitCast(Constant *C, const Type *Ty) {
-  if (C->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+  if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
     return getCast(Instruction::BitCast, C, Ty);
   return getCast(Instruction::ZExt, C, Ty);
 }
 
 Constant *ConstantExpr::getSExtOrBitCast(Constant *C, const Type *Ty) {
-  if (C->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+  if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
     return getCast(Instruction::BitCast, C, Ty);
   return getCast(Instruction::SExt, C, Ty);
 }
 
 Constant *ConstantExpr::getTruncOrBitCast(Constant *C, const Type *Ty) {
-  if (C->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+  if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
     return getCast(Instruction::BitCast, C, Ty);
   return getCast(Instruction::Trunc, C, Ty);
 }
@@ -1960,9 +2077,10 @@ Constant *ConstantExpr::getPointerCast(Constant *S, const Type *Ty) {
 
 Constant *ConstantExpr::getIntegerCast(Constant *C, const Type *Ty, 
                                        bool isSigned) {
-  assert(C->getType()->isInteger() && Ty->isInteger() && "Invalid cast");
-  unsigned SrcBits = C->getType()->getPrimitiveSizeInBits();
-  unsigned DstBits = Ty->getPrimitiveSizeInBits();
+  assert(C->getType()->isIntOrIntVector() &&
+         Ty->isIntOrIntVector() && "Invalid cast");
+  unsigned SrcBits = C->getType()->getScalarSizeInBits();
+  unsigned DstBits = Ty->getScalarSizeInBits();
   Instruction::CastOps opcode =
     (SrcBits == DstBits ? Instruction::BitCast :
      (SrcBits > DstBits ? Instruction::Trunc :
@@ -1971,10 +2089,10 @@ Constant *ConstantExpr::getIntegerCast(Constant *C, const Type *Ty,
 }
 
 Constant *ConstantExpr::getFPCast(Constant *C, const Type *Ty) {
-  assert(C->getType()->isFloatingPoint() && Ty->isFloatingPoint() && 
+  assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() &&
          "Invalid cast");
-  unsigned SrcBits = C->getType()->getPrimitiveSizeInBits();
-  unsigned DstBits = Ty->getPrimitiveSizeInBits();
+  unsigned SrcBits = C->getType()->getScalarSizeInBits();
+  unsigned DstBits = Ty->getScalarSizeInBits();
   if (SrcBits == DstBits)
     return C; // Avoid a useless cast
   Instruction::CastOps opcode =
@@ -1983,42 +2101,67 @@ Constant *ConstantExpr::getFPCast(Constant *C, const Type *Ty) {
 }
 
 Constant *ConstantExpr::getTrunc(Constant *C, const Type *Ty) {
-  assert(C->getType()->isInteger() && "Trunc operand must be integer");
-  assert(Ty->isInteger() && "Trunc produces only integral");
-  assert(C->getType()->getPrimitiveSizeInBits() > Ty->getPrimitiveSizeInBits()&&
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isIntOrIntVector() && "Trunc operand must be integer");
+  assert(Ty->isIntOrIntVector() && "Trunc produces only integral");
+  assert(C->getType()->getScalarSizeInBits() > Ty->getScalarSizeInBits()&&
          "SrcTy must be larger than DestTy for Trunc!");
 
   return getFoldedCast(Instruction::Trunc, C, Ty);
 }
 
 Constant *ConstantExpr::getSExt(Constant *C, const Type *Ty) {
-  assert(C->getType()->isInteger() && "SEXt operand must be integral");
-  assert(Ty->isInteger() && "SExt produces only integer");
-  assert(C->getType()->getPrimitiveSizeInBits() < Ty->getPrimitiveSizeInBits()&&
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isIntOrIntVector() && "SExt operand must be integral");
+  assert(Ty->isIntOrIntVector() && "SExt produces only integer");
+  assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
          "SrcTy must be smaller than DestTy for SExt!");
 
   return getFoldedCast(Instruction::SExt, C, Ty);
 }
 
 Constant *ConstantExpr::getZExt(Constant *C, const Type *Ty) {
-  assert(C->getType()->isInteger() && "ZEXt operand must be integral");
-  assert(Ty->isInteger() && "ZExt produces only integer");
-  assert(C->getType()->getPrimitiveSizeInBits() < Ty->getPrimitiveSizeInBits()&&
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isIntOrIntVector() && "ZEXt operand must be integral");
+  assert(Ty->isIntOrIntVector() && "ZExt produces only integer");
+  assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
          "SrcTy must be smaller than DestTy for ZExt!");
 
   return getFoldedCast(Instruction::ZExt, C, Ty);
 }
 
 Constant *ConstantExpr::getFPTrunc(Constant *C, const Type *Ty) {
-  assert(C->getType()->isFloatingPoint() && Ty->isFloatingPoint() &&
-         C->getType()->getPrimitiveSizeInBits() > Ty->getPrimitiveSizeInBits()&&
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() &&
+         C->getType()->getScalarSizeInBits() > Ty->getScalarSizeInBits()&&
          "This is an illegal floating point truncation!");
   return getFoldedCast(Instruction::FPTrunc, C, Ty);
 }
 
 Constant *ConstantExpr::getFPExtend(Constant *C, const Type *Ty) {
-  assert(C->getType()->isFloatingPoint() && Ty->isFloatingPoint() &&
-         C->getType()->getPrimitiveSizeInBits() < Ty->getPrimitiveSizeInBits()&&
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() &&
+         C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
          "This is an illegal floating point extension!");
   return getFoldedCast(Instruction::FPExt, C, Ty);
 }
@@ -2136,6 +2279,8 @@ Constant *ConstantExpr::getTy(const Type *ReqTy, unsigned Opcode,
 
   std::vector<Constant*> argVec(1, C1); argVec.push_back(C2);
   ExprMapKeyType Key(Opcode, argVec);
+  
+  // Implicitly locked.
   return ExprConstants->getOrCreate(ReqTy, Key);
 }
 
@@ -2188,34 +2333,30 @@ Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2) {
   case Instruction::UDiv: 
   case Instruction::SDiv: 
     assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert((C1->getType()->isInteger() || (isa<VectorType>(C1->getType()) &&
-      cast<VectorType>(C1->getType())->getElementType()->isInteger())) &&
+    assert(C1->getType()->isIntOrIntVector() &&
            "Tried to create an arithmetic operation on a non-arithmetic type!");
     break;
   case Instruction::FDiv:
     assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert((C1->getType()->isFloatingPoint() || (isa<VectorType>(C1->getType())
-      && cast<VectorType>(C1->getType())->getElementType()->isFloatingPoint())) 
-      && "Tried to create an arithmetic operation on a non-arithmetic type!");
+    assert(C1->getType()->isFPOrFPVector() &&
+           "Tried to create an arithmetic operation on a non-arithmetic type!");
     break;
   case Instruction::URem: 
   case Instruction::SRem: 
     assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert((C1->getType()->isInteger() || (isa<VectorType>(C1->getType()) &&
-      cast<VectorType>(C1->getType())->getElementType()->isInteger())) &&
+    assert(C1->getType()->isIntOrIntVector() &&
            "Tried to create an arithmetic operation on a non-arithmetic type!");
     break;
   case Instruction::FRem:
     assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert((C1->getType()->isFloatingPoint() || (isa<VectorType>(C1->getType())
-      && cast<VectorType>(C1->getType())->getElementType()->isFloatingPoint())) 
-      && "Tried to create an arithmetic operation on a non-arithmetic type!");
+    assert(C1->getType()->isFPOrFPVector() &&
+           "Tried to create an arithmetic operation on a non-arithmetic type!");
     break;
   case Instruction::And:
   case Instruction::Or:
   case Instruction::Xor:
     assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert((C1->getType()->isInteger() || isa<VectorType>(C1->getType())) &&
+    assert(C1->getType()->isIntOrIntVector() &&
            "Tried to create a logical operation on a non-integral type!");
     break;
   case Instruction::Shl:
@@ -2251,6 +2392,8 @@ Constant *ConstantExpr::getSelectTy(const Type *ReqTy, Constant *C,
   argVec[1] = V1;
   argVec[2] = V2;
   ExprMapKeyType Key(Instruction::Select, argVec);
+  
+  // Implicitly locked.
   return ExprConstants->getOrCreate(ReqTy, Key);
 }
 
@@ -2274,6 +2417,8 @@ Constant *ConstantExpr::getGetElementPtrTy(const Type *ReqTy, Constant *C,
   for (unsigned i = 0; i != NumIdx; ++i)
     ArgVec.push_back(cast<Constant>(Idxs[i]));
   const ExprMapKeyType Key(Instruction::GetElementPtr, ArgVec);
+
+  // Implicitly locked.
   return ExprConstants->getOrCreate(ReqTy, Key);
 }
 
@@ -2308,6 +2453,8 @@ ConstantExpr::getICmp(unsigned short pred, Constant* LHS, Constant* RHS) {
   ArgVec.push_back(RHS);
   // Get the key type with both the opcode and predicate
   const ExprMapKeyType Key(Instruction::ICmp, ArgVec, pred);
+
+  // Implicitly locked.
   return ExprConstants->getOrCreate(Type::Int1Ty, Key);
 }
 
@@ -2325,6 +2472,8 @@ ConstantExpr::getFCmp(unsigned short pred, Constant* LHS, Constant* RHS) {
   ArgVec.push_back(RHS);
   // Get the key type with both the opcode and predicate
   const ExprMapKeyType Key(Instruction::FCmp, ArgVec, pred);
+  
+  // Implicitly locked.
   return ExprConstants->getOrCreate(Type::Int1Ty, Key);
 }
 
@@ -2370,6 +2519,8 @@ ConstantExpr::getVICmp(unsigned short pred, Constant* LHS, Constant* RHS) {
   ArgVec.push_back(RHS);
   // Get the key type with both the opcode and predicate
   const ExprMapKeyType Key(Instruction::VICmp, ArgVec, pred);
+  
+  // Implicitly locked.
   return ExprConstants->getOrCreate(LHS->getType(), Key);
 }
 
@@ -2417,6 +2568,8 @@ ConstantExpr::getVFCmp(unsigned short pred, Constant* LHS, Constant* RHS) {
   ArgVec.push_back(RHS);
   // Get the key type with both the opcode and predicate
   const ExprMapKeyType Key(Instruction::VFCmp, ArgVec, pred);
+  
+  // Implicitly locked.
   return ExprConstants->getOrCreate(ResultTy, Key);
 }
 
@@ -2428,6 +2581,8 @@ Constant *ConstantExpr::getExtractElementTy(const Type *ReqTy, Constant *Val,
   std::vector<Constant*> ArgVec(1, Val);
   ArgVec.push_back(Idx);
   const ExprMapKeyType Key(Instruction::ExtractElement,ArgVec);
+  
+  // Implicitly locked.
   return ExprConstants->getOrCreate(ReqTy, Key);
 }
 
@@ -2449,6 +2604,8 @@ Constant *ConstantExpr::getInsertElementTy(const Type *ReqTy, Constant *Val,
   ArgVec.push_back(Elt);
   ArgVec.push_back(Idx);
   const ExprMapKeyType Key(Instruction::InsertElement,ArgVec);
+  
+  // Implicitly locked.
   return ExprConstants->getOrCreate(ReqTy, Key);
 }
 
@@ -2472,6 +2629,8 @@ Constant *ConstantExpr::getShuffleVectorTy(const Type *ReqTy, Constant *V1,
   ArgVec.push_back(V2);
   ArgVec.push_back(Mask);
   const ExprMapKeyType Key(Instruction::ShuffleVector,ArgVec);
+  
+  // Implicitly locked.
   return ExprConstants->getOrCreate(ReqTy, Key);
 }
 
@@ -2555,6 +2714,7 @@ Constant *ConstantExpr::getZeroValueForNegationExpr(const Type *Ty) {
 // destroyConstant - Remove the constant from the constant table...
 //
 void ConstantExpr::destroyConstant() {
+  // Implicitly locked.
   ExprConstants->remove(this);
   destroyConstantImpl();
 }
@@ -2619,6 +2779,7 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
     Replacement = ConstantAggregateZero::get(getType());
   } else {
     // Check to see if we have this array type already.
+    sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
     bool Exists;
     ArrayConstantsTy::MapTy::iterator I =
       ArrayConstants->InsertOrGetItem(Lookup, Exists);
@@ -2694,6 +2855,7 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
     Replacement = ConstantAggregateZero::get(getType());
   } else {
     // Check to see if we have this array type already.
+    sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
     bool Exists;
     StructConstantsTy::MapTy::iterator I =
       StructConstants->InsertOrGetItem(Lookup, Exists);
diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp
index 54bd895fd409..045056643b54 100644
--- a/lib/VMCore/Function.cpp
+++ b/lib/VMCore/Function.cpp
@@ -16,7 +16,10 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/StringPool.h"
+#include "llvm/System/RWMutex.h"
+#include "llvm/System/Threading.h"
 #include "SymbolTableListTraitsImpl.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringExtras.h"
@@ -230,17 +233,21 @@ void Function::removeAttribute(unsigned i, Attributes attr) {
 // use GC.
 static DenseMap<const Function*,PooledStringPtr> *GCNames;
 static StringPool *GCNamePool;
+static ManagedStatic<sys::SmartRWMutex<true> > GCLock;
 
 bool Function::hasGC() const {
+  sys::SmartScopedReader<true> Reader(&*GCLock);
   return GCNames && GCNames->count(this);
 }
 
 const char *Function::getGC() const {
   assert(hasGC() && "Function has no collector");
+  sys::SmartScopedReader<true> Reader(&*GCLock);
   return *(*GCNames)[this];
 }
 
 void Function::setGC(const char *Str) {
+  sys::SmartScopedWriter<true> Writer(&*GCLock);
   if (!GCNamePool)
     GCNamePool = new StringPool();
   if (!GCNames)
@@ -249,6 +256,7 @@ void Function::setGC(const char *Str) {
 }
 
 void Function::clearGC() {
+  sys::SmartScopedWriter<true> Writer(&*GCLock);
   if (GCNames) {
     GCNames->erase(this);
     if (GCNames->empty()) {
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index 4c228fe81c0a..6a6424d39dd2 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -1310,7 +1310,7 @@ bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt,
     return false;// Second operand of insertelement must be vector element type.
     
   if (Index->getType() != Type::Int32Ty)
-    return false;  // Third operand of insertelement must be uint.
+    return false;  // Third operand of insertelement must be i32.
   return true;
 }
 
@@ -1576,9 +1576,8 @@ void BinaryOperator::init(BinaryOps iType) {
   case FDiv:
     assert(getType() == LHS->getType() &&
            "Arithmetic operation should return same type as operands!");
-    assert((getType()->isFloatingPoint() || (isa<VectorType>(getType()) &&
-            cast<VectorType>(getType())->getElementType()->isFloatingPoint())) 
-            && "Incorrect operand type (not floating point) for FDIV");
+    assert(getType()->isFPOrFPVector() &&
+           "Incorrect operand type (not floating point) for FDIV");
     break;
   case URem: 
   case SRem: 
@@ -1591,9 +1590,8 @@ void BinaryOperator::init(BinaryOps iType) {
   case FRem:
     assert(getType() == LHS->getType() &&
            "Arithmetic operation should return same type as operands!");
-    assert((getType()->isFloatingPoint() || (isa<VectorType>(getType()) &&
-            cast<VectorType>(getType())->getElementType()->isFloatingPoint())) 
-            && "Incorrect operand type (not floating point) for FREM");
+    assert(getType()->isFPOrFPVector() &&
+           "Incorrect operand type (not floating point) for FREM");
     break;
   case Shl:
   case LShr:
@@ -1837,11 +1835,11 @@ bool CastInst::isNoopCast(const Type *IntPtrTy) const {
     case Instruction::BitCast:
       return true;  // BitCast never modifies bits.
     case Instruction::PtrToInt:
-      return IntPtrTy->getPrimitiveSizeInBits() ==
-            getType()->getPrimitiveSizeInBits();
+      return IntPtrTy->getScalarSizeInBits() ==
+             getType()->getScalarSizeInBits();
     case Instruction::IntToPtr:
-      return IntPtrTy->getPrimitiveSizeInBits() ==
-             getOperand(0)->getType()->getPrimitiveSizeInBits();
+      return IntPtrTy->getScalarSizeInBits() ==
+             getOperand(0)->getType()->getScalarSizeInBits();
   }
 }
 
@@ -1880,8 +1878,8 @@ unsigned CastInst::isEliminableCastPair(
   // BITCONVERT    =       FirstClass   n/a       FirstClass    n/a   
   //
   // NOTE: some transforms are safe, but we consider them to be non-profitable.
-  // For example, we could merge "fptoui double to uint" + "zext uint to ulong",
-  // into "fptoui double to ulong", but this loses information about the range
+  // For example, we could merge "fptoui double to i32" + "zext i32 to i64",
+  // into "fptoui double to i64", but this loses information about the range
   // of the produced value (we no longer know the top-part is all zeros). 
   // Further this conversion is often much more expensive for typical hardware,
   // and causes issues when building libgcc.  We disallow fptosi+sext for the 
@@ -1946,8 +1944,8 @@ unsigned CastInst::isEliminableCastPair(
       return 0;
     case 7: { 
       // ptrtoint, inttoptr -> bitcast (ptr -> ptr) if int size is >= ptr size
-      unsigned PtrSize = IntPtrTy->getPrimitiveSizeInBits();
-      unsigned MidSize = MidTy->getPrimitiveSizeInBits();
+      unsigned PtrSize = IntPtrTy->getScalarSizeInBits();
+      unsigned MidSize = MidTy->getScalarSizeInBits();
       if (MidSize >= PtrSize)
         return Instruction::BitCast;
       return 0;
@@ -1956,8 +1954,8 @@ unsigned CastInst::isEliminableCastPair(
       // ext, trunc -> bitcast,    if the SrcTy and DstTy are same size
       // ext, trunc -> ext,        if sizeof(SrcTy) < sizeof(DstTy)
       // ext, trunc -> trunc,      if sizeof(SrcTy) > sizeof(DstTy)
-      unsigned SrcSize = SrcTy->getPrimitiveSizeInBits();
-      unsigned DstSize = DstTy->getPrimitiveSizeInBits();
+      unsigned SrcSize = SrcTy->getScalarSizeInBits();
+      unsigned DstSize = DstTy->getScalarSizeInBits();
       if (SrcSize == DstSize)
         return Instruction::BitCast;
       else if (SrcSize < DstSize)
@@ -1985,9 +1983,9 @@ unsigned CastInst::isEliminableCastPair(
       return 0;
     case 13: {
       // inttoptr, ptrtoint -> bitcast if SrcSize<=PtrSize and SrcSize==DstSize
-      unsigned PtrSize = IntPtrTy->getPrimitiveSizeInBits();
-      unsigned SrcSize = SrcTy->getPrimitiveSizeInBits();
-      unsigned DstSize = DstTy->getPrimitiveSizeInBits();
+      unsigned PtrSize = IntPtrTy->getScalarSizeInBits();
+      unsigned SrcSize = SrcTy->getScalarSizeInBits();
+      unsigned DstSize = DstTy->getScalarSizeInBits();
       if (SrcSize <= PtrSize && SrcSize == DstSize)
         return Instruction::BitCast;
       return 0;
@@ -2051,7 +2049,7 @@ CastInst *CastInst::Create(Instruction::CastOps op, Value *S, const Type *Ty,
 CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty, 
                                         const std::string &Name,
                                         Instruction *InsertBefore) {
-  if (S->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
     return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
   return Create(Instruction::ZExt, S, Ty, Name, InsertBefore);
 }
@@ -2059,7 +2057,7 @@ CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty,
 CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty, 
                                         const std::string &Name,
                                         BasicBlock *InsertAtEnd) {
-  if (S->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
     return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
   return Create(Instruction::ZExt, S, Ty, Name, InsertAtEnd);
 }
@@ -2067,7 +2065,7 @@ CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty,
 CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty, 
                                         const std::string &Name,
                                         Instruction *InsertBefore) {
-  if (S->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
     return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
   return Create(Instruction::SExt, S, Ty, Name, InsertBefore);
 }
@@ -2075,7 +2073,7 @@ CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty,
 CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty, 
                                         const std::string &Name,
                                         BasicBlock *InsertAtEnd) {
-  if (S->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
     return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
   return Create(Instruction::SExt, S, Ty, Name, InsertAtEnd);
 }
@@ -2083,7 +2081,7 @@ CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty,
 CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty,
                                          const std::string &Name,
                                          Instruction *InsertBefore) {
-  if (S->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
     return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
   return Create(Instruction::Trunc, S, Ty, Name, InsertBefore);
 }
@@ -2091,7 +2089,7 @@ CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty,
 CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty,
                                          const std::string &Name, 
                                          BasicBlock *InsertAtEnd) {
-  if (S->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
     return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
   return Create(Instruction::Trunc, S, Ty, Name, InsertAtEnd);
 }
@@ -2125,8 +2123,8 @@ CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty,
                                       bool isSigned, const std::string &Name,
                                       Instruction *InsertBefore) {
   assert(C->getType()->isInteger() && Ty->isInteger() && "Invalid cast");
-  unsigned SrcBits = C->getType()->getPrimitiveSizeInBits();
-  unsigned DstBits = Ty->getPrimitiveSizeInBits();
+  unsigned SrcBits = C->getType()->getScalarSizeInBits();
+  unsigned DstBits = Ty->getScalarSizeInBits();
   Instruction::CastOps opcode =
     (SrcBits == DstBits ? Instruction::BitCast :
      (SrcBits > DstBits ? Instruction::Trunc :
@@ -2137,9 +2135,10 @@ CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty,
 CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty, 
                                       bool isSigned, const std::string &Name,
                                       BasicBlock *InsertAtEnd) {
-  assert(C->getType()->isInteger() && Ty->isInteger() && "Invalid cast");
-  unsigned SrcBits = C->getType()->getPrimitiveSizeInBits();
-  unsigned DstBits = Ty->getPrimitiveSizeInBits();
+  assert(C->getType()->isIntOrIntVector() && Ty->isIntOrIntVector() &&
+         "Invalid cast");
+  unsigned SrcBits = C->getType()->getScalarSizeInBits();
+  unsigned DstBits = Ty->getScalarSizeInBits();
   Instruction::CastOps opcode =
     (SrcBits == DstBits ? Instruction::BitCast :
      (SrcBits > DstBits ? Instruction::Trunc :
@@ -2150,10 +2149,10 @@ CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty,
 CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty, 
                                  const std::string &Name, 
                                  Instruction *InsertBefore) {
-  assert(C->getType()->isFloatingPoint() && Ty->isFloatingPoint() && 
+  assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() &&
          "Invalid cast");
-  unsigned SrcBits = C->getType()->getPrimitiveSizeInBits();
-  unsigned DstBits = Ty->getPrimitiveSizeInBits();
+  unsigned SrcBits = C->getType()->getScalarSizeInBits();
+  unsigned DstBits = Ty->getScalarSizeInBits();
   Instruction::CastOps opcode =
     (SrcBits == DstBits ? Instruction::BitCast :
      (SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt));
@@ -2163,10 +2162,10 @@ CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty,
 CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty, 
                                  const std::string &Name, 
                                  BasicBlock *InsertAtEnd) {
-  assert(C->getType()->isFloatingPoint() && Ty->isFloatingPoint() && 
+  assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() &&
          "Invalid cast");
-  unsigned SrcBits = C->getType()->getPrimitiveSizeInBits();
-  unsigned DstBits = Ty->getPrimitiveSizeInBits();
+  unsigned SrcBits = C->getType()->getScalarSizeInBits();
+  unsigned DstBits = Ty->getScalarSizeInBits();
   Instruction::CastOps opcode =
     (SrcBits == DstBits ? Instruction::BitCast :
      (SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt));
@@ -2183,8 +2182,8 @@ bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) {
     return true;
 
   // Get the bit sizes, we'll need these
-  unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();   // 0 for ptr/vector
-  unsigned DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr/vector
+  unsigned SrcBits = SrcTy->getScalarSizeInBits();   // 0 for ptr
+  unsigned DestBits = DestTy->getScalarSizeInBits(); // 0 for ptr
 
   // Run through the possibilities ...
   if (DestTy->isInteger()) {                   // Casting to integral
@@ -2242,8 +2241,8 @@ CastInst::getCastOpcode(
   const Value *Src, bool SrcIsSigned, const Type *DestTy, bool DestIsSigned) {
   // Get the bit sizes, we'll need these
   const Type *SrcTy = Src->getType();
-  unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();   // 0 for ptr/vector
-  unsigned DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr/vector
+  unsigned SrcBits = SrcTy->getScalarSizeInBits();   // 0 for ptr
+  unsigned DestBits = DestTy->getScalarSizeInBits(); // 0 for ptr
 
   assert(SrcTy->isFirstClassType() && DestTy->isFirstClassType() &&
          "Only first class types are castable!");
@@ -2344,8 +2343,8 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, const Type *DstTy) {
     return false;
 
   // Get the size of the types in bits, we'll need this later
-  unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
-  unsigned DstBitSize = DstTy->getPrimitiveSizeInBits();
+  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+  unsigned DstBitSize = DstTy->getScalarSizeInBits();
 
   // Switch on the opcode provided
   switch (op) {
@@ -2400,7 +2399,7 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, const Type *DstTy) {
     // Now we know we're not dealing with a pointer/non-pointer mismatch. In all
     // these cases, the cast is okay if the source and destination bit widths
     // are identical.
-    return SrcBitSize == DstBitSize;
+    return SrcTy->getPrimitiveSizeInBits() == DstTy->getPrimitiveSizeInBits();
   }
 }
 
diff --git a/lib/VMCore/LeakDetector.cpp b/lib/VMCore/LeakDetector.cpp
index 1bf917155383..b5926bcf441a 100644
--- a/lib/VMCore/LeakDetector.cpp
+++ b/lib/VMCore/LeakDetector.cpp
@@ -14,7 +14,10 @@
 #include "llvm/Support/LeakDetector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Streams.h"
+#include "llvm/System/RWMutex.h"
+#include "llvm/System/Threading.h"
 #include "llvm/Value.h"
 using namespace llvm;
 
@@ -29,16 +32,29 @@ namespace {
     static void print(const Value* P) { cerr << *P; }
   };
 
+  ManagedStatic<sys::SmartRWMutex<true> > LeakDetectorLock;
+
   template <typename T>
   struct VISIBILITY_HIDDEN LeakDetectorImpl {
-    explicit LeakDetectorImpl(const char* const name) : Cache(0), Name(name) { }
+    explicit LeakDetectorImpl(const char* const name = "") : 
+      Cache(0), Name(name) { }
 
+    void clear() {
+      Cache = 0;
+      Ts.clear();
+    }
+    
+    void setName(const char* n) { 
+      Name = n;
+    }
+    
     // Because the most common usage pattern, by far, is to add a
     // garbage object, then remove it immediately, we optimize this
     // case.  When an object is added, it is not added to the set
     // immediately, it is added to the CachedValue Value.  If it is
     // immediately removed, no set search need be performed.
     void addGarbage(const T* o) {
+      sys::SmartScopedWriter<true> Writer(&*LeakDetectorLock);
       if (Cache) {
         assert(Ts.count(Cache) == 0 && "Object already in set!");
         Ts.insert(Cache);
@@ -47,6 +63,7 @@ namespace {
     }
 
     void removeGarbage(const T* o) {
+      sys::SmartScopedWriter<true> Writer(&*LeakDetectorLock);
       if (o == Cache)
         Cache = 0; // Cache hit
       else
@@ -56,6 +73,7 @@ namespace {
     bool hasGarbage(const std::string& Message) {
       addGarbage(0); // Flush the Cache
 
+      sys::SmartScopedReader<true> Reader(&*LeakDetectorLock);
       assert(Cache == 0 && "No value should be cached anymore!");
 
       if (!Ts.empty()) {
@@ -70,58 +88,48 @@ namespace {
 
         return true;
       }
+      
       return false;
     }
 
   private:
     SmallPtrSet<const T*, 8> Ts;
     const T* Cache;
-    const char* const Name;
+    const char* Name;
   };
 
-  static LeakDetectorImpl<void>  *Objects;
-  static LeakDetectorImpl<Value> *LLVMObjects;
-
-  static LeakDetectorImpl<void> &getObjects() {
-    if (Objects == 0)
-      Objects = new LeakDetectorImpl<void>("GENERIC");
-    return *Objects;
-  }
-
-  static LeakDetectorImpl<Value> &getLLVMObjects() {
-    if (LLVMObjects == 0)
-      LLVMObjects = new LeakDetectorImpl<Value>("LLVM");
-    return *LLVMObjects;
-  }
+  static ManagedStatic<LeakDetectorImpl<void> > Objects;
+  static ManagedStatic<LeakDetectorImpl<Value> > LLVMObjects;
 
   static void clearGarbage() {
-    delete Objects;
-    delete LLVMObjects;
-    Objects = 0;
-    LLVMObjects = 0;
+    Objects->clear();
+    LLVMObjects->clear();
   }
 }
 
 void LeakDetector::addGarbageObjectImpl(void *Object) {
-  getObjects().addGarbage(Object);
+  Objects->addGarbage(Object);
 }
 
 void LeakDetector::addGarbageObjectImpl(const Value *Object) {
-  getLLVMObjects().addGarbage(Object);
+  LLVMObjects->addGarbage(Object);
 }
 
 void LeakDetector::removeGarbageObjectImpl(void *Object) {
-  getObjects().removeGarbage(Object);
+  Objects->removeGarbage(Object);
 }
 
 void LeakDetector::removeGarbageObjectImpl(const Value *Object) {
-  getLLVMObjects().removeGarbage(Object);
+  LLVMObjects->removeGarbage(Object);
 }
 
 void LeakDetector::checkForGarbageImpl(const std::string &Message) {
+  Objects->setName("GENERIC");
+  LLVMObjects->setName("LLVM");
+  
   // use non-short-circuit version so that both checks are performed
-  if (getObjects().hasGarbage(Message) |
-      getLLVMObjects().hasGarbage(Message))
+  if (Objects->hasGarbage(Message) |
+      LLVMObjects->hasGarbage(Message))
     cerr << "\nThis is probably because you removed an object, but didn't "
          << "delete it.  Please check your code for memory leaks.\n";
 
diff --git a/lib/VMCore/Mangler.cpp b/lib/VMCore/Mangler.cpp
index 0bd190ad4edf..1a68b890542f 100644
--- a/lib/VMCore/Mangler.cpp
+++ b/lib/VMCore/Mangler.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Support/Mangler.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
+#include "llvm/System/Atomic.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
@@ -164,8 +165,12 @@ std::string Mangler::getValueName(const GlobalValue *GV, const char * Suffix) {
   } else if (!GV->hasName()) {
     // Must mangle the global into a unique ID.
     unsigned TypeUniqueID = getTypeID(GV->getType());
-    static unsigned GlobalID = 0;
-    Name = "__unnamed_" + utostr(TypeUniqueID) + "_" + utostr(GlobalID++);
+    static uint32_t GlobalID = 0;
+    
+    unsigned OldID = GlobalID;
+    sys::AtomicIncrement(&GlobalID);
+    
+    Name = "__unnamed_" + utostr(TypeUniqueID) + "_" + utostr(OldID);
   } else {
     if (GV->hasPrivateLinkage())
       Name = makeNameProper(GV->getName() + Suffix, Prefix, PrivatePrefix);
diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp
index 6db5d7e24c5e..e943e31b1ed5 100644
--- a/lib/VMCore/Pass.cpp
+++ b/lib/VMCore/Pass.cpp
@@ -19,6 +19,8 @@
 #include "llvm/ModuleProvider.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/System/Atomic.h"
+#include "llvm/System/Threading.h"
 #include <algorithm>
 #include <map>
 #include <set>
@@ -192,8 +194,26 @@ static std::vector<PassRegistrationListener*> *Listeners = 0;
 // ressurection after llvm_shutdown is run.
 static PassRegistrar *getPassRegistrar() {
   static PassRegistrar *PassRegistrarObj = 0;
-  if (!PassRegistrarObj)
+  
+  // Use double-checked locking to safely initialize the registrar when
+  // we're running in multithreaded mode.
+  PassRegistrar* tmp = PassRegistrarObj;
+  if (llvm_is_multithreaded()) {
+    sys::MemoryFence();
+    if (!tmp) {
+      llvm_acquire_global_lock();
+      tmp = PassRegistrarObj;
+      if (!tmp) {
+        tmp = new PassRegistrar();
+        sys::MemoryFence();
+        PassRegistrarObj = tmp;
+      }
+      llvm_release_global_lock();
+    }
+  } else if (!tmp) {
     PassRegistrarObj = new PassRegistrar();
+  }
+  
   return PassRegistrarObj;
 }
 
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
index 47999152c1d2..86cf10e67337 100644
--- a/lib/VMCore/PassManager.cpp
+++ b/lib/VMCore/PassManager.cpp
@@ -20,6 +20,8 @@
 #include "llvm/Support/Streams.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Mutex.h"
+#include "llvm/System/Threading.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm-c/Core.h"
 #include <algorithm>
@@ -355,6 +357,9 @@ namespace {
 /// amount of time each pass takes to execute.  This only happens when
 /// -time-passes is enabled on the command line.
 ///
+
+static ManagedStatic<sys::SmartMutex<true> > TimingInfoMutex;
+
 class VISIBILITY_HIDDEN TimingInfo {
   std::map<Pass*, Timer> TimingData;
   TimerGroup TG;
@@ -379,15 +384,18 @@ public:
     if (dynamic_cast<PMDataManager *>(P)) 
       return;
 
+    sys::SmartScopedLock<true> Lock(&*TimingInfoMutex);
     std::map<Pass*, Timer>::iterator I = TimingData.find(P);
     if (I == TimingData.end())
       I=TimingData.insert(std::make_pair(P, Timer(P->getPassName(), TG))).first;
     I->second.startTimer();
   }
+  
   void passEnded(Pass *P) {
     if (dynamic_cast<PMDataManager *>(P)) 
       return;
 
+    sys::SmartScopedLock<true> Lock(&*TimingInfoMutex);
     std::map<Pass*, Timer>::iterator I = TimingData.find(P);
     assert(I != TimingData.end() && "passStarted/passEnded not nested right!");
     I->second.stopTimer();
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index a1e6c42f86fa..5df7f1205051 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -23,6 +23,9 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Mutex.h"
+#include "llvm/System/RWMutex.h"
+#include "llvm/System/Threading.h"
 #include <algorithm>
 #include <cstdarg>
 using namespace llvm;
@@ -40,6 +43,14 @@ AbstractTypeUser::~AbstractTypeUser() {}
 //                         Type Class Implementation
 //===----------------------------------------------------------------------===//
 
+// Reader/writer lock used for guarding access to the type maps.
+static ManagedStatic<sys::SmartRWMutex<true> > TypeMapLock;
+
+// Recursive lock used for guarding access to AbstractTypeUsers.
+// NOTE: The true template parameter means this will no-op when we're not in
+// multithreaded mode.
+static ManagedStatic<sys::SmartMutex<true> > AbstractTypeUsersLock;
+
 // Concrete/Abstract TypeDescriptions - We lazily calculate type descriptions
 // for types as they are needed.  Because resolution of types must invalidate
 // all of the abstract type descriptions, we keep them in a seperate map to make
@@ -112,6 +123,14 @@ const Type *Type::getVAArgsPromotedType() const {
     return this;
 }
 
+/// getScalarType - If this is a vector type, return the element type,
+/// otherwise return this.
+const Type *Type::getScalarType() const {
+  if (const VectorType *VTy = dyn_cast<VectorType>(this))
+    return VTy->getElementType();
+  return this;
+}
+
 /// isIntOrIntVector - Return true if this is an integer type or a vector of
 /// integer types.
 ///
@@ -174,6 +193,28 @@ unsigned Type::getPrimitiveSizeInBits() const {
   }
 }
 
+/// getScalarSizeInBits - If this is a vector type, return the
+/// getPrimitiveSizeInBits value for the element type. Otherwise return the
+/// getPrimitiveSizeInBits value for this type.
+unsigned Type::getScalarSizeInBits() const {
+  return getScalarType()->getPrimitiveSizeInBits();
+}
+
+/// getFPMantissaWidth - Return the width of the mantissa of this type.  This
+/// is only valid on floating point types.  If the FP type does not
+/// have a stable mantissa (e.g. ppc long double), this method returns -1.
+int Type::getFPMantissaWidth() const {
+  if (const VectorType *VTy = dyn_cast<VectorType>(this))
+    return VTy->getElementType()->getFPMantissaWidth();
+  assert(isFloatingPoint() && "Not a floating point type!");
+  if (ID == FloatTyID) return 24;
+  if (ID == DoubleTyID) return 53;
+  if (ID == X86_FP80TyID) return 64;
+  if (ID == FP128TyID) return 113;
+  assert(ID == PPC_FP128TyID && "unknown fp type");
+  return -1;
+}
+
 /// isSizedDerivedType - Derived types like structures and arrays are sized
 /// iff all of the members of the type are sized as well.  Since asking for
 /// their size is relatively uncommon, move this operation out of line.
@@ -414,8 +455,29 @@ void DerivedType::dropAllTypeUses() {
   if (NumContainedTys != 0) {
     // The type must stay abstract.  To do this, we insert a pointer to a type
     // that will never get resolved, thus will always be abstract.
-    static Type *AlwaysOpaqueTy = OpaqueType::get();
-    static PATypeHolder Holder(AlwaysOpaqueTy);
+    static Type *AlwaysOpaqueTy = 0;
+    static PATypeHolder* Holder = 0;
+    Type *tmp = AlwaysOpaqueTy;
+    if (llvm_is_multithreaded()) {
+      sys::MemoryFence();
+      if (!tmp) {
+        llvm_acquire_global_lock();
+        tmp = AlwaysOpaqueTy;
+        if (!tmp) {
+          tmp = OpaqueType::get();
+          PATypeHolder* tmp2 = new PATypeHolder(AlwaysOpaqueTy);
+          sys::MemoryFence();
+          AlwaysOpaqueTy = tmp;
+          Holder = tmp2;
+        }
+      
+        llvm_release_global_lock();
+      }
+    } else {
+      AlwaysOpaqueTy = OpaqueType::get();
+      Holder = new PATypeHolder(AlwaysOpaqueTy);
+    } 
+        
     ContainedTys[0] = AlwaysOpaqueTy;
 
     // Change the rest of the types to be Int32Ty's.  It doesn't matter what we
@@ -818,7 +880,7 @@ public:
         // We already have this type in the table.  Get rid of the newly refined
         // type.
         TypeClass *NewTy = cast<TypeClass>((Type*)I->second.get());
-        Ty->refineAbstractTypeTo(NewTy);
+        Ty->unlockedRefineAbstractTypeTo(NewTy);
         return;
       }
     } else {
@@ -854,7 +916,7 @@ public:
               }
               TypesByHash.erase(Entry);
             }
-            Ty->refineAbstractTypeTo(NewTy);
+            Ty->unlockedRefineAbstractTypeTo(NewTy);
             return;
           }
         }
@@ -938,15 +1000,30 @@ const IntegerType *IntegerType::get(unsigned NumBits) {
     default: 
       break;
   }
-
+  
   IntegerValType IVT(NumBits);
-  IntegerType *ITy = IntegerTypes->get(IVT);
-  if (ITy) return ITy;           // Found a match, return it!
-
-  // Value not found.  Derive a new type!
-  ITy = new IntegerType(NumBits);
-  IntegerTypes->add(IVT, ITy);
-
+  IntegerType *ITy = 0;
+  
+  // First, see if the type is already in the table, for which
+  // a reader lock suffices.
+  TypeMapLock->reader_acquire();
+  ITy = IntegerTypes->get(IVT);
+  TypeMapLock->reader_release();
+    
+  if (!ITy) {
+    // OK, not in the table, get a writer lock.
+    sys::SmartScopedWriter<true> Writer(&*TypeMapLock);
+    ITy = IntegerTypes->get(IVT);
+      
+    // We need to _recheck_ the table in case someone
+    // put it in between when we released the reader lock
+    // and when we gained the writer lock!
+    if (!ITy) {
+      // Value not found.  Derive a new type!
+      ITy = new IntegerType(NumBits);
+      IntegerTypes->add(IVT, ITy);
+    }
+  }
 #ifdef DEBUG_MERGE_TYPES
   DOUT << "Derived new type: " << *ITy << "\n";
 #endif
@@ -1010,14 +1087,26 @@ FunctionType *FunctionType::get(const Type *ReturnType,
                                 const std::vector<const Type*> &Params,
                                 bool isVarArg) {
   FunctionValType VT(ReturnType, Params, isVarArg);
-  FunctionType *FT = FunctionTypes->get(VT);
-  if (FT)
-    return FT;
-
-  FT = (FunctionType*) operator new(sizeof(FunctionType) +
-                                    sizeof(PATypeHandle)*(Params.size()+1));
-  new (FT) FunctionType(ReturnType, Params, isVarArg);
-  FunctionTypes->add(VT, FT);
+  FunctionType *FT = 0;
+  
+  TypeMapLock->reader_acquire();
+  FT = FunctionTypes->get(VT);
+  TypeMapLock->reader_release();
+  
+  if (!FT) {
+    sys::SmartScopedWriter<true> Writer(&*TypeMapLock);
+    
+    // Have to check again here, because it might have
+    // been inserted between when we release the reader
+    // lock and when we acquired the writer lock.
+    FT = FunctionTypes->get(VT);
+    if (!FT) {
+      FT = (FunctionType*) operator new(sizeof(FunctionType) +
+                                      sizeof(PATypeHandle)*(Params.size()+1));
+      new (FT) FunctionType(ReturnType, Params, isVarArg);
+      FunctionTypes->add(VT, FT);
+    }
+  }
 
 #ifdef DEBUG_MERGE_TYPES
   DOUT << "Derived new type: " << FT << "\n";
@@ -1049,20 +1138,30 @@ public:
   }
 };
 }
-static ManagedStatic<TypeMap<ArrayValType, ArrayType> > ArrayTypes;
 
+static ManagedStatic<TypeMap<ArrayValType, ArrayType> > ArrayTypes;
 
 ArrayType *ArrayType::get(const Type *ElementType, uint64_t NumElements) {
   assert(ElementType && "Can't get array of <null> types!");
   assert(isValidElementType(ElementType) && "Invalid type for array element!");
 
   ArrayValType AVT(ElementType, NumElements);
-  ArrayType *AT = ArrayTypes->get(AVT);
-  if (AT) return AT;           // Found a match, return it!
-
-  // Value not found.  Derive a new type!
-  ArrayTypes->add(AVT, AT = new ArrayType(ElementType, NumElements));
-
+  ArrayType *AT = 0;
+  
+  TypeMapLock->reader_acquire();
+  AT = ArrayTypes->get(AVT);
+  TypeMapLock->reader_release();
+    
+  if (!AT) {
+    sys::SmartScopedWriter<true> Writer(&*TypeMapLock);
+    
+    // Recheck.  Might have changed between release and acquire.
+    AT = ArrayTypes->get(AVT);
+    if (!AT) {
+      // Value not found.  Derive a new type!
+      ArrayTypes->add(AVT, AT = new ArrayType(ElementType, NumElements));
+    }
+  }
 #ifdef DEBUG_MERGE_TYPES
   DOUT << "Derived new type: " << *AT << "\n";
 #endif
@@ -1106,19 +1205,27 @@ public:
   }
 };
 }
-static ManagedStatic<TypeMap<VectorValType, VectorType> > VectorTypes;
 
+static ManagedStatic<TypeMap<VectorValType, VectorType> > VectorTypes;
 
 VectorType *VectorType::get(const Type *ElementType, unsigned NumElements) {
   assert(ElementType && "Can't get vector of <null> types!");
 
   VectorValType PVT(ElementType, NumElements);
-  VectorType *PT = VectorTypes->get(PVT);
-  if (PT) return PT;           // Found a match, return it!
-
-  // Value not found.  Derive a new type!
-  VectorTypes->add(PVT, PT = new VectorType(ElementType, NumElements));
-
+  VectorType *PT = 0;
+  
+  TypeMapLock->reader_acquire();
+  PT = VectorTypes->get(PVT);
+  TypeMapLock->reader_release();
+    
+  if (!PT) {
+    sys::SmartScopedWriter<true> Writer(&*TypeMapLock);
+    PT = VectorTypes->get(PVT);
+    // Recheck.  Might have changed between release and acquire.
+    if (!PT) {
+      VectorTypes->add(PVT, PT = new VectorType(ElementType, NumElements));
+    }
+  }
 #ifdef DEBUG_MERGE_TYPES
   DOUT << "Derived new type: " << *PT << "\n";
 #endif
@@ -1173,15 +1280,24 @@ static ManagedStatic<TypeMap<StructValType, StructType> > StructTypes;
 StructType *StructType::get(const std::vector<const Type*> &ETypes, 
                             bool isPacked) {
   StructValType STV(ETypes, isPacked);
-  StructType *ST = StructTypes->get(STV);
-  if (ST) return ST;
-
-  // Value not found.  Derive a new type!
-  ST = (StructType*) operator new(sizeof(StructType) +
-                                  sizeof(PATypeHandle) * ETypes.size());
-  new (ST) StructType(ETypes, isPacked);
-  StructTypes->add(STV, ST);
-
+  StructType *ST = 0;
+  
+  TypeMapLock->reader_acquire();
+  ST = StructTypes->get(STV);
+  TypeMapLock->reader_release();
+    
+  if (!ST) {
+    sys::SmartScopedWriter<true> Writer(&*TypeMapLock);
+    ST = StructTypes->get(STV);
+    // Recheck.  Might have changed between release and acquire.
+    if (!ST) {
+      // Value not found.  Derive a new type!
+      ST = (StructType*) operator new(sizeof(StructType) +
+                                      sizeof(PATypeHandle) * ETypes.size());
+      new (ST) StructType(ETypes, isPacked);
+      StructTypes->add(STV, ST);
+    }
+  }
 #ifdef DEBUG_MERGE_TYPES
   DOUT << "Derived new type: " << *ST << "\n";
 #endif
@@ -1249,12 +1365,21 @@ PointerType *PointerType::get(const Type *ValueType, unsigned AddressSpace) {
   assert(isValidElementType(ValueType) && "Invalid type for pointer element!");
   PointerValType PVT(ValueType, AddressSpace);
 
-  PointerType *PT = PointerTypes->get(PVT);
-  if (PT) return PT;
-
-  // Value not found.  Derive a new type!
-  PointerTypes->add(PVT, PT = new PointerType(ValueType, AddressSpace));
-
+  PointerType *PT = 0;
+  
+  TypeMapLock->reader_acquire();
+  PT = PointerTypes->get(PVT);
+  TypeMapLock->reader_release();
+  
+  if (!PT) {
+    sys::SmartScopedWriter<true> Writer(&*TypeMapLock);
+    PT = PointerTypes->get(PVT);
+    // Recheck.  Might have changed between release and acquire.
+    if (!PT) {
+      // Value not found.  Derive a new type!
+      PointerTypes->add(PVT, PT = new PointerType(ValueType, AddressSpace));
+    }
+  }
 #ifdef DEBUG_MERGE_TYPES
   DOUT << "Derived new type: " << *PT << "\n";
 #endif
@@ -1281,12 +1406,24 @@ bool PointerType::isValidElementType(const Type *ElemTy) {
 //                     Derived Type Refinement Functions
 //===----------------------------------------------------------------------===//
 
+// addAbstractTypeUser - Notify an abstract type that there is a new user of
+// it.  This function is called primarily by the PATypeHandle class.
+void Type::addAbstractTypeUser(AbstractTypeUser *U) const {
+  assert(isAbstract() && "addAbstractTypeUser: Current type not abstract!");
+  AbstractTypeUsersLock->acquire();
+  AbstractTypeUsers.push_back(U);
+  AbstractTypeUsersLock->release();
+}
+
+
 // removeAbstractTypeUser - Notify an abstract type that a user of the class
 // no longer has a handle to the type.  This function is called primarily by
 // the PATypeHandle class.  When there are no users of the abstract type, it
 // is annihilated, because there is no way to get a reference to it ever again.
 //
 void Type::removeAbstractTypeUser(AbstractTypeUser *U) const {
+  AbstractTypeUsersLock->acquire();
+  
   // Search from back to front because we will notify users from back to
   // front.  Also, it is likely that there will be a stack like behavior to
   // users that register and unregister users.
@@ -1310,16 +1447,20 @@ void Type::removeAbstractTypeUser(AbstractTypeUser *U) const {
     DOUT << "DELETEing unused abstract type: <" << *this
          << ">[" << (void*)this << "]" << "\n";
 #endif
-    this->destroy();
+  
+  this->destroy();
   }
+  
+  AbstractTypeUsersLock->release();
 }
 
-// refineAbstractTypeTo - This function is used when it is discovered that
-// the 'this' abstract type is actually equivalent to the NewType specified.
-// This causes all users of 'this' to switch to reference the more concrete type
-// NewType and for 'this' to be deleted.
+// unlockedRefineAbstractTypeTo - This function is used when it is discovered
+// that the 'this' abstract type is actually equivalent to the NewType
+// specified. This causes all users of 'this' to switch to reference the more 
+// concrete type NewType and for 'this' to be deleted.  Only used for internal
+// callers.
 //
-void DerivedType::refineAbstractTypeTo(const Type *NewType) {
+void DerivedType::unlockedRefineAbstractTypeTo(const Type *NewType) {
   assert(isAbstract() && "refineAbstractTypeTo: Current type is not abstract!");
   assert(this != NewType && "Can't refine to myself!");
   assert(ForwardType == 0 && "This type has already been refined!");
@@ -1338,8 +1479,7 @@ void DerivedType::refineAbstractTypeTo(const Type *NewType) {
   // refined, that we will not continue using a dead reference...
   //
   PATypeHolder NewTy(NewType);
-
-  // Any PATypeHolders referring to this type will now automatically forward to
+  // Any PATypeHolders referring to this type will now automatically forward o
   // the type we are resolved to.
   ForwardType = NewType;
   if (NewType->isAbstract())
@@ -1362,6 +1502,7 @@ void DerivedType::refineAbstractTypeTo(const Type *NewType) {
   // will not cause users to drop off of the use list.  If we resolve to ourself
   // we succeed!
   //
+  AbstractTypeUsersLock->acquire();
   while (!AbstractTypeUsers.empty() && NewTy != this) {
     AbstractTypeUser *User = AbstractTypeUsers.back();
 
@@ -1377,6 +1518,7 @@ void DerivedType::refineAbstractTypeTo(const Type *NewType) {
     assert(AbstractTypeUsers.size() != OldSize &&
            "AbsTyUser did not remove self from user list!");
   }
+  AbstractTypeUsersLock->release();
 
   // If we were successful removing all users from the type, 'this' will be
   // deleted when the last PATypeHolder is destroyed or updated from this type.
@@ -1384,6 +1526,16 @@ void DerivedType::refineAbstractTypeTo(const Type *NewType) {
   // destroyed.
 }
 
+// refineAbstractTypeTo - This function is used by external callers to notify
+// us that this abstract type is equivalent to another type.
+//
+void DerivedType::refineAbstractTypeTo(const Type *NewType) {
+  // All recursive calls will go through unlockedRefineAbstractTypeTo,
+  // to avoid deadlock problems.
+  sys::SmartScopedWriter<true> Writer(&*TypeMapLock);
+  unlockedRefineAbstractTypeTo(NewType);
+}
+
 // notifyUsesThatTypeBecameConcrete - Notify AbstractTypeUsers of this type that
 // the current type has transitioned from being abstract to being concrete.
 //
@@ -1392,6 +1544,7 @@ void DerivedType::notifyUsesThatTypeBecameConcrete() {
   DOUT << "typeIsREFINED type: " << (void*)this << " " << *this << "\n";
 #endif
 
+  AbstractTypeUsersLock->acquire();
   unsigned OldSize = AbstractTypeUsers.size(); OldSize=OldSize;
   while (!AbstractTypeUsers.empty()) {
     AbstractTypeUser *ATU = AbstractTypeUsers.back();
@@ -1400,6 +1553,7 @@ void DerivedType::notifyUsesThatTypeBecameConcrete() {
     assert(AbstractTypeUsers.size() < OldSize-- &&
            "AbstractTypeUser did not remove itself from the use list!");
   }
+  AbstractTypeUsersLock->release();
 }
 
 // refineAbstractType - Called when a contained type is found to be more
diff --git a/lib/VMCore/TypeSymbolTable.cpp b/lib/VMCore/TypeSymbolTable.cpp
index 475d71949ff5..5ae60e28d7f0 100644
--- a/lib/VMCore/TypeSymbolTable.cpp
+++ b/lib/VMCore/TypeSymbolTable.cpp
@@ -14,13 +14,18 @@
 #include "llvm/TypeSymbolTable.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Streams.h"
+#include "llvm/System/RWMutex.h"
+#include "llvm/System/Threading.h"
 #include <algorithm>
 using namespace llvm;
 
 #define DEBUG_SYMBOL_TABLE 0
 #define DEBUG_ABSTYPE 0
 
+static ManagedStatic<sys::SmartRWMutex<true> > TypeSymbolTableLock;
+
 TypeSymbolTable::~TypeSymbolTable() {
   // Drop all abstract type references in the type plane...
   for (iterator TI = tmap.begin(), TE = tmap.end(); TI != TE; ++TI) {
@@ -31,6 +36,9 @@ TypeSymbolTable::~TypeSymbolTable() {
 
 std::string TypeSymbolTable::getUniqueName(const std::string &BaseName) const {
   std::string TryName = BaseName;
+  
+  sys::SmartScopedReader<true> Reader(&*TypeSymbolTableLock);
+  
   const_iterator End = tmap.end();
 
   // See if the name exists
@@ -41,16 +49,20 @@ std::string TypeSymbolTable::getUniqueName(const std::string &BaseName) const {
 
 // lookup a type by name - returns null on failure
 Type* TypeSymbolTable::lookup(const std::string& Name) const {
+  sys::SmartScopedReader<true> Reader(&*TypeSymbolTableLock);
+  
   const_iterator TI = tmap.find(Name);
+  Type* result = 0;
   if (TI != tmap.end())
-    return const_cast<Type*>(TI->second);
-  return 0;
+    result = const_cast<Type*>(TI->second);
+  return result;
 }
 
 // remove - Remove a type from the symbol table...
 Type* TypeSymbolTable::remove(iterator Entry) {
+  TypeSymbolTableLock->writer_acquire();
+  
   assert(Entry != tmap.end() && "Invalid entry to remove!");
-
   const Type* Result = Entry->second;
 
 #if DEBUG_SYMBOL_TABLE
@@ -59,6 +71,8 @@ Type* TypeSymbolTable::remove(iterator Entry) {
 #endif
 
   tmap.erase(Entry);
+  
+  TypeSymbolTableLock->writer_release();
 
   // If we are removing an abstract type, remove the symbol table from it's use
   // list...
@@ -79,6 +93,8 @@ Type* TypeSymbolTable::remove(iterator Entry) {
 void TypeSymbolTable::insert(const std::string& Name, const Type* T) {
   assert(T && "Can't insert null type into symbol table!");
 
+  TypeSymbolTableLock->writer_acquire();
+
   if (tmap.insert(make_pair(Name, T)).second) {
     // Type inserted fine with no conflict.
     
@@ -103,6 +119,8 @@ void TypeSymbolTable::insert(const std::string& Name, const Type* T) {
     // Insert the tmap entry
     tmap.insert(make_pair(UniqueName, T));
   }
+  
+  TypeSymbolTableLock->writer_release();
 
   // If we are adding an abstract type, add the symbol table to it's use list.
   if (T->isAbstract()) {
@@ -116,7 +134,8 @@ void TypeSymbolTable::insert(const std::string& Name, const Type* T) {
 // This function is called when one of the types in the type plane are refined
 void TypeSymbolTable::refineAbstractType(const DerivedType *OldType,
                                          const Type *NewType) {
-
+  sys::SmartScopedReader<true> Reader(&*TypeSymbolTableLock);
+  
   // Loop over all of the types in the symbol table, replacing any references
   // to OldType with references to NewType.  Note that there may be multiple
   // occurrences, and although we only need to remove one at a time, it's
@@ -146,6 +165,7 @@ void TypeSymbolTable::typeBecameConcrete(const DerivedType *AbsTy) {
   // Loop over all of the types in the symbol table, dropping any abstract
   // type user entries for AbsTy which occur because there are names for the
   // type.
+  sys::SmartScopedReader<true> Reader(&*TypeSymbolTableLock);
   for (iterator TI = begin(), TE = end(); TI != TE; ++TI)
     if (TI->second == const_cast<Type*>(static_cast<const Type*>(AbsTy)))
       AbsTy->removeAbstractTypeUser(this);
@@ -159,6 +179,7 @@ static void DumpTypes(const std::pair<const std::string, const Type*>& T ) {
 
 void TypeSymbolTable::dump() const {
   cerr << "TypeSymbolPlane: ";
+  sys::SmartScopedReader<true> Reader(&*TypeSymbolTableLock);
   for_each(tmap.begin(), tmap.end(), DumpTypes);
 }
 
diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp
index 3af161fa08b9..c952b7888cdd 100644
--- a/lib/VMCore/Value.cpp
+++ b/lib/VMCore/Value.cpp
@@ -22,6 +22,8 @@
 #include "llvm/Support/LeakDetector.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/ValueHandle.h"
+#include "llvm/System/RWMutex.h"
+#include "llvm/System/Threading.h"
 #include "llvm/ADT/DenseMap.h"
 #include <algorithm>
 using namespace llvm;
@@ -405,6 +407,7 @@ Value *Value::DoPHITranslation(const BasicBlock *CurBB,
 /// not a value has an entry in this map.
 typedef DenseMap<Value*, ValueHandleBase*> ValueHandlesTy;
 static ManagedStatic<ValueHandlesTy> ValueHandles;
+static ManagedStatic<sys::SmartRWMutex<true> > ValueHandlesLock;
 
 /// AddToExistingUseList - Add this ValueHandle to the use list for VP, where
 /// List is known to point into the existing use list.
@@ -427,9 +430,11 @@ void ValueHandleBase::AddToUseList() {
   if (VP->HasValueHandle) {
     // If this value already has a ValueHandle, then it must be in the
     // ValueHandles map already.
+    sys::SmartScopedReader<true> Reader(&*ValueHandlesLock);
     ValueHandleBase *&Entry = (*ValueHandles)[VP];
     assert(Entry != 0 && "Value doesn't have any handles?");
-    return AddToExistingUseList(&Entry);
+    AddToExistingUseList(&Entry);
+    return;
   }
   
   // Ok, it doesn't have any handles yet, so we must insert it into the
@@ -437,6 +442,7 @@ void ValueHandleBase::AddToUseList() {
   // reallocate itself, which would invalidate all of the PrevP pointers that
   // point into the old table.  Handle this by checking for reallocation and
   // updating the stale pointers only if needed.
+  sys::SmartScopedWriter<true> Writer(&*ValueHandlesLock);
   ValueHandlesTy &Handles = *ValueHandles;
   const void *OldBucketPtr = Handles.getPointerIntoBucketsArray();
   
@@ -448,8 +454,9 @@ void ValueHandleBase::AddToUseList() {
   // If reallocation didn't happen or if this was the first insertion, don't
   // walk the table.
   if (Handles.isPointerIntoBucketsArray(OldBucketPtr) || 
-      Handles.size() == 1)
+      Handles.size() == 1) {
     return;
+  }
   
   // Okay, reallocation did happen.  Fix the Prev Pointers.
   for (ValueHandlesTy::iterator I = Handles.begin(), E = Handles.end();
@@ -477,6 +484,7 @@ void ValueHandleBase::RemoveFromUseList() {
   // If the Next pointer was null, then it is possible that this was the last
   // ValueHandle watching VP.  If so, delete its entry from the ValueHandles
   // map.
+  sys::SmartScopedWriter<true> Writer(&*ValueHandlesLock);
   ValueHandlesTy &Handles = *ValueHandles;
   if (Handles.isPointerIntoBucketsArray(PrevPtr)) {
     Handles.erase(VP);
@@ -490,7 +498,9 @@ void ValueHandleBase::ValueIsDeleted(Value *V) {
 
   // Get the linked list base, which is guaranteed to exist since the
   // HasValueHandle flag is set.
+  ValueHandlesLock->reader_acquire();
   ValueHandleBase *Entry = (*ValueHandles)[V];
+  ValueHandlesLock->reader_release();
   assert(Entry && "Value bit set but no entries exist");
   
   while (Entry) {
@@ -528,7 +538,9 @@ void ValueHandleBase::ValueIsRAUWd(Value *Old, Value *New) {
   
   // Get the linked list base, which is guaranteed to exist since the
   // HasValueHandle flag is set.
+  ValueHandlesLock->reader_acquire();
   ValueHandleBase *Entry = (*ValueHandles)[Old];
+  ValueHandlesLock->reader_release();
   assert(Entry && "Value bit set but no entries exist");
   
   while (Entry) {
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index e9f2acda28d5..10816e6248bc 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -745,8 +745,8 @@ void Verifier::visitTruncInst(TruncInst &I) {
   const Type *DestTy = I.getType();
 
   // Get the size of the types in bits, we'll need this later
-  unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
-  unsigned DestBitSize = DestTy->getPrimitiveSizeInBits();
+  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+  unsigned DestBitSize = DestTy->getScalarSizeInBits();
 
   Assert1(SrcTy->isIntOrIntVector(), "Trunc only operates on integer", &I);
   Assert1(DestTy->isIntOrIntVector(), "Trunc only produces integer", &I);
@@ -767,8 +767,8 @@ void Verifier::visitZExtInst(ZExtInst &I) {
   Assert1(DestTy->isIntOrIntVector(), "ZExt only produces an integer", &I);
   Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy),
           "zext source and destination must both be a vector or neither", &I);
-  unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
-  unsigned DestBitSize = DestTy->getPrimitiveSizeInBits();
+  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+  unsigned DestBitSize = DestTy->getScalarSizeInBits();
 
   Assert1(SrcBitSize < DestBitSize,"Type too small for ZExt", &I);
 
@@ -781,8 +781,8 @@ void Verifier::visitSExtInst(SExtInst &I) {
   const Type *DestTy = I.getType();
 
   // Get the size of the types in bits, we'll need this later
-  unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
-  unsigned DestBitSize = DestTy->getPrimitiveSizeInBits();
+  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+  unsigned DestBitSize = DestTy->getScalarSizeInBits();
 
   Assert1(SrcTy->isIntOrIntVector(), "SExt only operates on integer", &I);
   Assert1(DestTy->isIntOrIntVector(), "SExt only produces an integer", &I);
@@ -798,8 +798,8 @@ void Verifier::visitFPTruncInst(FPTruncInst &I) {
   const Type *SrcTy = I.getOperand(0)->getType();
   const Type *DestTy = I.getType();
   // Get the size of the types in bits, we'll need this later
-  unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
-  unsigned DestBitSize = DestTy->getPrimitiveSizeInBits();
+  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+  unsigned DestBitSize = DestTy->getScalarSizeInBits();
 
   Assert1(SrcTy->isFPOrFPVector(),"FPTrunc only operates on FP", &I);
   Assert1(DestTy->isFPOrFPVector(),"FPTrunc only produces an FP", &I);
@@ -816,8 +816,8 @@ void Verifier::visitFPExtInst(FPExtInst &I) {
   const Type *DestTy = I.getType();
 
   // Get the size of the types in bits, we'll need this later
-  unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
-  unsigned DestBitSize = DestTy->getPrimitiveSizeInBits();
+  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+  unsigned DestBitSize = DestTy->getScalarSizeInBits();
 
   Assert1(SrcTy->isFPOrFPVector(),"FPExt only operates on FP", &I);
   Assert1(DestTy->isFPOrFPVector(),"FPExt only produces an FP", &I);
diff --git a/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll b/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll
index a2cbcee244bf..bf27e7753538 100644
--- a/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll
+++ b/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution \
+; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \
 ; RUN:   -scalar-evolution-max-iterations=0 | grep {Loop bb: backedge-taken count is 100}
 ; PR1533
 
diff --git a/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll b/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll
index 34d97664ccb6..95f932a9a581 100644
--- a/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll
+++ b/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalar-evolution -analyze | grep {Loop bb: backedge-taken count is (-1 + (-1 \\* %x) + %y)}
+; RUN: llvm-as < %s | opt -scalar-evolution -analyze -disable-output | grep {Loop bb: backedge-taken count is (-1 + (-1 \\* %x) + %y)}
 ; PR1597
 
 define i32 @f(i32 %x, i32 %y) {
diff --git a/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll b/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll
index c4de09375598..e5e47d549f53 100644
--- a/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll
+++ b/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution \
+; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \
 ; RUN:   -scalar-evolution-max-iterations=0 | grep {backedge-taken count is 13}
 ; PR1706
 
diff --git a/test/Analysis/ScalarEvolution/2007-11-18-OrInstruction.ll b/test/Analysis/ScalarEvolution/2007-11-18-OrInstruction.ll
index 82d6ee27b457..01f338a29c27 100644
--- a/test/Analysis/ScalarEvolution/2007-11-18-OrInstruction.ll
+++ b/test/Analysis/ScalarEvolution/2007-11-18-OrInstruction.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution | grep -e {-->  %b}
+; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | grep -e {-->  %b}
 ; PR1810
 
 define void @fun() {
diff --git a/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll b/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll
index 182a0876e658..b9a53b318bf5 100644
--- a/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll
+++ b/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalar-evolution -analyze | grep {Loop header: backedge-taken count is (0 smax %n)}
+; RUN: llvm-as < %s | opt -scalar-evolution -analyze -disable-output | grep {Loop header: backedge-taken count is (0 smax %n)}
 
 define void @foo(i32 %n) {
 entry:
diff --git a/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll b/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll
index cd3a80f79194..b943bc7d4c61 100644
--- a/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll
+++ b/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalar-evolution -analyze | grep {Loop loop: backedge-taken count is (100 + (-100 smax %n))}
+; RUN: llvm-as < %s | opt -scalar-evolution -analyze -disable-output | grep {Loop loop: backedge-taken count is (100 + (-100 smax %n))}
 ; PR2002
 
 define void @foo(i8 %n) {
diff --git a/test/Analysis/ScalarEvolution/2008-02-15-UMax.ll b/test/Analysis/ScalarEvolution/2008-02-15-UMax.ll
index 0f977f804eb8..59b51093f40a 100644
--- a/test/Analysis/ScalarEvolution/2008-02-15-UMax.ll
+++ b/test/Analysis/ScalarEvolution/2008-02-15-UMax.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution | grep umax
+; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | grep umax
 ; PR2003
 
 define i32 @foo(i32 %n) {
diff --git a/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll b/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll
index 13852d45652a..5453ae3ae80b 100644
--- a/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll
+++ b/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution \
+; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \
 ; RUN:   -scalar-evolution-max-iterations=0 | grep {backedge-taken count is 61}
 ; PR2364
 
diff --git a/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll b/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll
index a0fcad713149..6ba0f25eb061 100644
--- a/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll
+++ b/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution |& not grep smax
+; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output |& not grep smax
 ; PR2261
 
 @lut = common global [256 x i8] zeroinitializer, align 32		; <[256 x i8]*> [#uses=1]
diff --git a/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll b/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll
index 5501ee28869e..3c022e7181bd 100644
--- a/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll
+++ b/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution |& not grep smax
+; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output |& not grep smax
 ; PR2070
 
 define i32 @a(i32 %x) nounwind  {
diff --git a/test/Analysis/ScalarEvolution/2008-07-19-InfiniteLoop.ll b/test/Analysis/ScalarEvolution/2008-07-19-InfiniteLoop.ll
index a3cc600053f4..5dcad53f6a60 100644
--- a/test/Analysis/ScalarEvolution/2008-07-19-InfiniteLoop.ll
+++ b/test/Analysis/ScalarEvolution/2008-07-19-InfiniteLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution \
+; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \
 ; RUN:   -scalar-evolution-max-iterations=0 | grep Unpredictable
 ; PR2088
 
diff --git a/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll b/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll
index 9ba118c15063..54c929dcdaf6 100644
--- a/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll
+++ b/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution \
+; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \
 ; RUN:   -scalar-evolution-max-iterations=0 | grep {backedge-taken count is 113}
 ; PR2088
 
diff --git a/test/Analysis/ScalarEvolution/2008-11-02-QuadraticCrash.ll b/test/Analysis/ScalarEvolution/2008-11-02-QuadraticCrash.ll
index c6cb87fca140..1e9d0bfc9c23 100644
--- a/test/Analysis/ScalarEvolution/2008-11-02-QuadraticCrash.ll
+++ b/test/Analysis/ScalarEvolution/2008-11-02-QuadraticCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution
+; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output
 ; PR1827
 
 declare void @use(i32)
diff --git a/test/Analysis/ScalarEvolution/2008-11-15-CubicOOM.ll b/test/Analysis/ScalarEvolution/2008-11-15-CubicOOM.ll
index 89e9efa37ca6..c0b3a1fe01b2 100644
--- a/test/Analysis/ScalarEvolution/2008-11-15-CubicOOM.ll
+++ b/test/Analysis/ScalarEvolution/2008-11-15-CubicOOM.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution
+; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output
 ; PR2602
 
 define i32 @a() nounwind  {
diff --git a/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll b/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll
index 0651fd85487f..56d1fe7b541d 100644
--- a/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll
+++ b/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution |& \
+; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output |& \
 ; RUN: grep {Loop bb: backedge-taken count is (7 + (-1 \\* %argc))}
 ; XFAIL: *
 
diff --git a/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll b/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll
index 78cda0e175f4..8fb1604fd190 100644
--- a/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll
+++ b/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll
@@ -1,4 +1,9 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution |& grep {/u 3}
+; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \
+; RUN:  | grep {Loop bb: Unpredictable backedge-taken count\\.}
+
+; ScalarEvolution can't compute a trip count because it doesn't know if
+; dividing by the stride will have a remainder. This could theoretically
+; be teaching it how to use a more elaborate trip count computation.
 
 define i32 @f(i32 %x) nounwind readnone {
 entry:
diff --git a/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll b/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll
index 4a76970ce19a..d506f9c3f82f 100644
--- a/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll
+++ b/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution |& grep {/u 3}
+; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output |& grep {/u 3}
 ; XFAIL: *
 
 define i32 @f(i32 %x) nounwind readnone {
diff --git a/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll b/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll
index 3df1fe000135..643d2f835b8c 100644
--- a/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll
+++ b/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution | grep {backedge-taken count is 255}
+; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | grep {backedge-taken count is 255}
 ; XFAIL: *
 
 define i32 @foo(i32 %x, i32 %y, i32* %lam, i32* %alp) nounwind {
diff --git a/test/Analysis/ScalarEvolution/2008-12-11-SMaxOverflow.ll b/test/Analysis/ScalarEvolution/2008-12-11-SMaxOverflow.ll
index 9703bcb15422..995a1d95a8a8 100644
--- a/test/Analysis/ScalarEvolution/2008-12-11-SMaxOverflow.ll
+++ b/test/Analysis/ScalarEvolution/2008-12-11-SMaxOverflow.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution | grep {0 smax}
+; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | grep {0 smax}
 ; XFAIL: *
 
 define i32 @f(i32 %c.idx.val) {
diff --git a/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll b/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll
index 4d4fcd783edb..8e064c70da9a 100644
--- a/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll
+++ b/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution |& \
+; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output |& \
 ; RUN: grep {(((-1 \\* %i0) + (100005 smax %i0)) /u 5)}
 ; XFAIL: *
 
diff --git a/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll b/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll
index 2557c8bbbab2..950c1d21d910 100644
--- a/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll
+++ b/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution |& grep {/u 5}
+; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output |& grep {/u 5}
 ; XFAIL: *
 
 define i8 @foo0(i8 %i0) nounwind {
diff --git a/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll b/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll
index 07714cd1b507..65c4cdbb1362 100644
--- a/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll
+++ b/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution | not grep {/u -1}
+; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | not grep {/u -1}
 ; PR3275
 
 @g_16 = external global i16		; <i16*> [#uses=3]
diff --git a/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll b/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll
index c23f0f0ef1d8..6aced23cf950 100644
--- a/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll
+++ b/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution | grep {(trunc i} | not grep ext
+; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | grep {(trunc i} | not grep ext
 
 define i16 @test1(i8 %x) {
   %A = sext i8 %x to i32
diff --git a/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll b/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll
index bea46a765d65..5e5128bd577a 100644
--- a/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll
+++ b/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution | grep {count is 2}
+; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | grep {count is 2}
 ; PR3171
 
 	%struct.Foo = type { i32 }
diff --git a/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll b/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll
index 76ca4a084747..27a546f32e82 100644
--- a/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll
+++ b/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution \
+; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \
 ; RUN:   -scalar-evolution-max-iterations=0 | grep {backedge-taken count is 100}
 ; PR1101
 
diff --git a/test/Analysis/ScalarEvolution/avoid-smax-0.ll b/test/Analysis/ScalarEvolution/avoid-smax-0.ll
index df39cc0af084..ce7ee7791d58 100644
--- a/test/Analysis/ScalarEvolution/avoid-smax-0.ll
+++ b/test/Analysis/ScalarEvolution/avoid-smax-0.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -scalar-evolution -analyze | grep {Loop bb3: backedge-taken count is (-1 + %n)}
+; RUN: llvm-as < %s | opt -scalar-evolution -analyze -disable-output | grep {Loop bb3: backedge-taken count is (-1 + %n)}
 
 ; We don't want to use a max in the trip count expression in
 ; this testcase.
diff --git a/test/Analysis/ScalarEvolution/do-loop.ll b/test/Analysis/ScalarEvolution/do-loop.ll
index c6b3298638b1..85c38e4f1c5a 100644
--- a/test/Analysis/ScalarEvolution/do-loop.ll
+++ b/test/Analysis/ScalarEvolution/do-loop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution | grep smax
+; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | grep smax
 ; PR1614
 
 define i32 @f(i32 %x, i32 %y) {
diff --git a/test/Analysis/ScalarEvolution/smax.ll b/test/Analysis/ScalarEvolution/smax.ll
index 4818ee97f250..366dfdee7146 100644
--- a/test/Analysis/ScalarEvolution/smax.ll
+++ b/test/Analysis/ScalarEvolution/smax.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution | grep smax | count 2
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution | grep \
+; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | grep smax | count 2
+; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | grep \
 ; RUN:     {%. smax %. smax %.}
 ; PR1614
 
diff --git a/test/Analysis/ScalarEvolution/trip-count.ll b/test/Analysis/ScalarEvolution/trip-count.ll
index f8894d6428ff..c5be858d1ee9 100644
--- a/test/Analysis/ScalarEvolution/trip-count.ll
+++ b/test/Analysis/ScalarEvolution/trip-count.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution \
+; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \
 ; RUN:   -scalar-evolution-max-iterations=0 | grep {backedge-taken count is 10000}
 ; PR1101
 
diff --git a/test/Analysis/ScalarEvolution/trip-count2.ll b/test/Analysis/ScalarEvolution/trip-count2.ll
index 8615c76db69c..374a5621cebf 100644
--- a/test/Analysis/ScalarEvolution/trip-count2.ll
+++ b/test/Analysis/ScalarEvolution/trip-count2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -analyze -scalar-evolution | \
+; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | \
 ; RUN:   grep {backedge-taken count is 4}
 ; PR1101
 
diff --git a/test/Analysis/ScalarEvolution/trip-count3.ll b/test/Analysis/ScalarEvolution/trip-count3.ll
index a95138f0ef47..35c86835f3ac 100644
--- a/test/Analysis/ScalarEvolution/trip-count3.ll
+++ b/test/Analysis/ScalarEvolution/trip-count3.ll
@@ -1,5 +1,9 @@
 ; RUN: llvm-as < %s | opt -scalar-evolution -analyze -disable-output \
-; RUN:  | grep {backedge-taken count is ((64 + (-64 smax (-1 + (-1 \\* %0))) + %0) /u 64)}
+; RUN:  | grep {Loop bb3\\.i: Unpredictable backedge-taken count\\.}
+
+; ScalarEvolution can't compute a trip count because it doesn't know if
+; dividing by the stride will have a remainder. This could theoretically
+; be teaching it how to use a more elaborate trip count computation.
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Analysis/ScalarEvolution/trip-count5.ll b/test/Analysis/ScalarEvolution/trip-count5.ll
new file mode 100644
index 000000000000..822dc2638f5a
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/trip-count5.ll
@@ -0,0 +1,48 @@
+; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output > %t
+; RUN: grep sext %t | count 2
+; RUN: not grep {(sext} %t
+
+; ScalarEvolution should be able to compute a maximum trip count
+; value sufficient to fold away both sext casts.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define float @t(float* %pTmp1, float* %peakWeight, float* %nrgReducePeakrate, i32 %bim) nounwind {
+entry:
+	%tmp3 = load float* %peakWeight, align 4		; <float> [#uses=2]
+	%tmp2538 = icmp sgt i32 %bim, 0		; <i1> [#uses=1]
+	br i1 %tmp2538, label %bb.nph, label %bb4
+
+bb.nph:		; preds = %entry
+	br label %bb
+
+bb:		; preds = %bb1, %bb.nph
+	%distERBhi.036 = phi float [ %tmp10, %bb1 ], [ 0.000000e+00, %bb.nph ]		; <float> [#uses=1]
+	%hiPart.035 = phi i32 [ %tmp12, %bb1 ], [ 0, %bb.nph ]		; <i32> [#uses=2]
+	%peakCount.034 = phi float [ %tmp19, %bb1 ], [ %tmp3, %bb.nph ]		; <float> [#uses=1]
+	%tmp6 = sext i32 %hiPart.035 to i64		; <i64> [#uses=1]
+	%tmp7 = getelementptr float* %pTmp1, i64 %tmp6		; <float*> [#uses=1]
+	%tmp8 = load float* %tmp7, align 4		; <float> [#uses=1]
+	%tmp10 = fadd float %tmp8, %distERBhi.036		; <float> [#uses=3]
+	%tmp12 = add i32 %hiPart.035, 1		; <i32> [#uses=3]
+	%tmp15 = sext i32 %tmp12 to i64		; <i64> [#uses=1]
+	%tmp16 = getelementptr float* %peakWeight, i64 %tmp15		; <float*> [#uses=1]
+	%tmp17 = load float* %tmp16, align 4		; <float> [#uses=1]
+	%tmp19 = fadd float %tmp17, %peakCount.034		; <float> [#uses=2]
+	br label %bb1
+
+bb1:		; preds = %bb
+	%tmp21 = fcmp olt float %tmp10, 2.500000e+00		; <i1> [#uses=1]
+	%tmp25 = icmp slt i32 %tmp12, %bim		; <i1> [#uses=1]
+	%tmp27 = and i1 %tmp21, %tmp25		; <i1> [#uses=1]
+	br i1 %tmp27, label %bb, label %bb1.bb4_crit_edge
+
+bb1.bb4_crit_edge:		; preds = %bb1
+	br label %bb4
+
+bb4:		; preds = %bb1.bb4_crit_edge, %entry
+	%distERBhi.0.lcssa = phi float [ %tmp10, %bb1.bb4_crit_edge ], [ 0.000000e+00, %entry ]		; <float> [#uses=1]
+	%peakCount.0.lcssa = phi float [ %tmp19, %bb1.bb4_crit_edge ], [ %tmp3, %entry ]		; <float> [#uses=1]
+	%tmp31 = fdiv float %peakCount.0.lcssa, %distERBhi.0.lcssa		; <float> [#uses=1]
+	ret float %tmp31
+}
diff --git a/test/Analysis/ScalarEvolution/xor-and.ll b/test/Analysis/ScalarEvolution/xor-and.ll
new file mode 100644
index 000000000000..843052456a80
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/xor-and.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-as < %s | opt -scalar-evolution -disable-output -analyze \
+; RUN:   | grep {\\-->  (zext i4 (-8 + (trunc i64 (8 \\* %x) to i4)) to i64)}
+
+; ScalarEvolution shouldn't try to analyze %z into something like
+;   -->  (zext i4 (-1 + (-1 * (trunc i64 (8 * %x) to i4))) to i64)
+
+define i64 @foo(i64 %x) {
+  %a = shl i64 %x, 3
+  %t = and i64 %a, 8
+  %z = xor i64 %t, 8
+  ret i64 %z
+}
diff --git a/test/CodeGen/ARM/2009-06-15-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-06-15-RegScavengerAssert.ll
new file mode 100644
index 000000000000..c715a189287a
--- /dev/null
+++ b/test/CodeGen/ARM/2009-06-15-RegScavengerAssert.ll
@@ -0,0 +1,344 @@
+; RUN: llvm-as < %s | llc -mtriple=armv6-apple-darwin
+
+  %struct.term = type { i32, i32, i32 }
+
+declare fastcc i8* @memory_Malloc(i32) nounwind
+
+define fastcc %struct.term* @t1() nounwind {
+entry:
+	br i1 undef, label %bb, label %bb1
+
+bb:		; preds = %entry
+	ret %struct.term* undef
+
+bb1:		; preds = %entry
+	%0 = tail call fastcc i8* @memory_Malloc(i32 12) nounwind		; <i8*> [#uses=0]
+	%1 = tail call fastcc i8* @memory_Malloc(i32 12) nounwind		; <i8*> [#uses=0]
+	ret %struct.term* undef
+}
+
+
+define i32 @t2(i32 %argc, i8** nocapture %argv) nounwind {
+entry:
+	br label %bb6.i8
+
+bb6.i8:		; preds = %memory_CalculateRealBlockSize1374.exit.i, %entry
+	br i1 undef, label %memory_CalculateRealBlockSize1374.exit.i, label %bb.i.i9
+
+bb.i.i9:		; preds = %bb6.i8
+	br label %memory_CalculateRealBlockSize1374.exit.i
+
+memory_CalculateRealBlockSize1374.exit.i:		; preds = %bb.i.i9, %bb6.i8
+	%0 = phi i32 [ undef, %bb.i.i9 ], [ undef, %bb6.i8 ]		; <i32> [#uses=2]
+	store i32 %0, i32* undef, align 4
+	%1 = urem i32 8184, %0		; <i32> [#uses=1]
+	%2 = sub i32 8188, %1		; <i32> [#uses=1]
+	store i32 %2, i32* undef, align 4
+	br i1 undef, label %memory_Init.exit, label %bb6.i8
+
+memory_Init.exit:		; preds = %memory_CalculateRealBlockSize1374.exit.i
+	br label %bb.i.i
+
+bb.i.i:		; preds = %bb.i.i, %memory_Init.exit
+	br i1 undef, label %symbol_Init.exit, label %bb.i.i
+
+symbol_Init.exit:		; preds = %bb.i.i
+	br label %bb.i.i67
+
+bb.i.i67:		; preds = %bb.i.i67, %symbol_Init.exit
+	br i1 undef, label %symbol_CreatePrecedence3522.exit, label %bb.i.i67
+
+symbol_CreatePrecedence3522.exit:		; preds = %bb.i.i67
+	br label %bb.i.i8.i
+
+bb.i.i8.i:		; preds = %bb.i.i8.i, %symbol_CreatePrecedence3522.exit
+	br i1 undef, label %cont_Create.exit9.i, label %bb.i.i8.i
+
+cont_Create.exit9.i:		; preds = %bb.i.i8.i
+	br label %bb.i.i.i72
+
+bb.i.i.i72:		; preds = %bb.i.i.i72, %cont_Create.exit9.i
+	br i1 undef, label %cont_Init.exit, label %bb.i.i.i72
+
+cont_Init.exit:		; preds = %bb.i.i.i72
+	br label %bb.i103
+
+bb.i103:		; preds = %bb.i103, %cont_Init.exit
+	br i1 undef, label %subs_Init.exit, label %bb.i103
+
+subs_Init.exit:		; preds = %bb.i103
+	br i1 undef, label %bb1.i.i.i80, label %cc_Init.exit
+
+bb1.i.i.i80:		; preds = %subs_Init.exit
+	unreachable
+
+cc_Init.exit:		; preds = %subs_Init.exit
+	br label %bb.i.i375
+
+bb.i.i375:		; preds = %bb.i.i375, %cc_Init.exit
+	br i1 undef, label %bb.i439, label %bb.i.i375
+
+bb.i439:		; preds = %bb.i439, %bb.i.i375
+	br i1 undef, label %opts_DeclareSPASSFlagsAsOptions.exit, label %bb.i439
+
+opts_DeclareSPASSFlagsAsOptions.exit:		; preds = %bb.i439
+	br i1 undef, label %opts_TranslateShortOptDeclarations.exit.i, label %bb.i.i82
+
+bb.i.i82:		; preds = %opts_DeclareSPASSFlagsAsOptions.exit
+	unreachable
+
+opts_TranslateShortOptDeclarations.exit.i:		; preds = %opts_DeclareSPASSFlagsAsOptions.exit
+	br i1 undef, label %list_Length.exit.i.thread.i, label %bb.i.i4.i
+
+list_Length.exit.i.thread.i:		; preds = %opts_TranslateShortOptDeclarations.exit.i
+	br i1 undef, label %bb18.i.i.i, label %bb26.i.i.i
+
+bb.i.i4.i:		; preds = %opts_TranslateShortOptDeclarations.exit.i
+	unreachable
+
+bb18.i.i.i:		; preds = %list_Length.exit.i.thread.i
+	unreachable
+
+bb26.i.i.i:		; preds = %list_Length.exit.i.thread.i
+	br i1 undef, label %bb27.i142, label %opts_GetOptLongOnly.exit.thread97.i
+
+opts_GetOptLongOnly.exit.thread97.i:		; preds = %bb26.i.i.i
+	br label %bb27.i142
+
+bb27.i142:		; preds = %opts_GetOptLongOnly.exit.thread97.i, %bb26.i.i.i
+	br label %bb1.i3.i
+
+bb1.i3.i:		; preds = %bb1.i3.i, %bb27.i142
+	br i1 undef, label %opts_FreeLongOptsArray.exit.i, label %bb1.i3.i
+
+opts_FreeLongOptsArray.exit.i:		; preds = %bb1.i3.i
+	br label %bb.i443
+
+bb.i443:		; preds = %bb.i443, %opts_FreeLongOptsArray.exit.i
+	br i1 undef, label %flag_InitStoreByDefaults3542.exit, label %bb.i443
+
+flag_InitStoreByDefaults3542.exit:		; preds = %bb.i443
+	br i1 undef, label %bb6.i449, label %bb.i503
+
+bb6.i449:		; preds = %flag_InitStoreByDefaults3542.exit
+	unreachable
+
+bb.i503:		; preds = %bb.i503, %flag_InitStoreByDefaults3542.exit
+	br i1 undef, label %flag_CleanStore3464.exit, label %bb.i503
+
+flag_CleanStore3464.exit:		; preds = %bb.i503
+	br i1 undef, label %bb1.i81.i.preheader, label %bb.i173
+
+bb.i173:		; preds = %flag_CleanStore3464.exit
+	unreachable
+
+bb1.i81.i.preheader:		; preds = %flag_CleanStore3464.exit
+	br i1 undef, label %bb1.i64.i.preheader, label %bb5.i179
+
+bb5.i179:		; preds = %bb1.i81.i.preheader
+	unreachable
+
+bb1.i64.i.preheader:		; preds = %bb1.i81.i.preheader
+	br i1 undef, label %dfg_DeleteProofList.exit.i, label %bb.i9.i
+
+bb.i9.i:		; preds = %bb1.i64.i.preheader
+	unreachable
+
+dfg_DeleteProofList.exit.i:		; preds = %bb1.i64.i.preheader
+	br i1 undef, label %term_DeleteTermList621.exit.i, label %bb.i.i62.i
+
+bb.i.i62.i:		; preds = %bb.i.i62.i, %dfg_DeleteProofList.exit.i
+	br i1 undef, label %term_DeleteTermList621.exit.i, label %bb.i.i62.i
+
+term_DeleteTermList621.exit.i:		; preds = %bb.i.i62.i, %dfg_DeleteProofList.exit.i
+	br i1 undef, label %dfg_DFGParser.exit, label %bb.i.i211
+
+bb.i.i211:		; preds = %term_DeleteTermList621.exit.i
+	unreachable
+
+dfg_DFGParser.exit:		; preds = %term_DeleteTermList621.exit.i
+	br label %bb.i513
+
+bb.i513:		; preds = %bb2.i516, %dfg_DFGParser.exit
+	br i1 undef, label %bb2.i516, label %bb1.i514
+
+bb1.i514:		; preds = %bb.i513
+	unreachable
+
+bb2.i516:		; preds = %bb.i513
+	br i1 undef, label %bb.i509, label %bb.i513
+
+bb.i509:		; preds = %bb.i509, %bb2.i516
+	br i1 undef, label %symbol_TransferPrecedence3468.exit511, label %bb.i509
+
+symbol_TransferPrecedence3468.exit511:		; preds = %bb.i509
+	br i1 undef, label %bb20, label %bb21
+
+bb20:		; preds = %symbol_TransferPrecedence3468.exit511
+	unreachable
+
+bb21:		; preds = %symbol_TransferPrecedence3468.exit511
+	br i1 undef, label %cnf_Init.exit, label %bb.i498
+
+bb.i498:		; preds = %bb21
+	unreachable
+
+cnf_Init.exit:		; preds = %bb21
+	br i1 undef, label %bb23, label %bb22
+
+bb22:		; preds = %cnf_Init.exit
+	br i1 undef, label %bb2.i.i496, label %bb.i.i494
+
+bb.i.i494:		; preds = %bb22
+	unreachable
+
+bb2.i.i496:		; preds = %bb22
+	unreachable
+
+bb23:		; preds = %cnf_Init.exit
+	br i1 undef, label %bb28, label %bb24
+
+bb24:		; preds = %bb23
+	unreachable
+
+bb28:		; preds = %bb23
+	br i1 undef, label %bb31, label %bb29
+
+bb29:		; preds = %bb28
+	unreachable
+
+bb31:		; preds = %bb28
+	br i1 undef, label %bb34, label %bb32
+
+bb32:		; preds = %bb31
+	unreachable
+
+bb34:		; preds = %bb31
+	br i1 undef, label %bb83, label %bb66
+
+bb66:		; preds = %bb34
+	unreachable
+
+bb83:		; preds = %bb34
+	br i1 undef, label %bb2.i1668, label %bb.i1667
+
+bb.i1667:		; preds = %bb83
+	unreachable
+
+bb2.i1668:		; preds = %bb83
+	br i1 undef, label %bb5.i205, label %bb3.i204
+
+bb3.i204:		; preds = %bb2.i1668
+	unreachable
+
+bb5.i205:		; preds = %bb2.i1668
+	br i1 undef, label %bb.i206.i, label %ana_AnalyzeSortStructure.exit.i
+
+bb.i206.i:		; preds = %bb5.i205
+	br i1 undef, label %bb1.i207.i, label %ana_AnalyzeSortStructure.exit.i
+
+bb1.i207.i:		; preds = %bb.i206.i
+	br i1 undef, label %bb25.i1801.thread, label %bb.i1688
+
+bb.i1688:		; preds = %bb1.i207.i
+	unreachable
+
+bb25.i1801.thread:		; preds = %bb1.i207.i
+	unreachable
+
+ana_AnalyzeSortStructure.exit.i:		; preds = %bb.i206.i, %bb5.i205
+	br i1 undef, label %bb7.i207, label %bb.i1806
+
+bb.i1806:		; preds = %ana_AnalyzeSortStructure.exit.i
+	br i1 undef, label %bb2.i.i.i1811, label %bb.i.i.i1809
+
+bb.i.i.i1809:		; preds = %bb.i1806
+	unreachable
+
+bb2.i.i.i1811:		; preds = %bb.i1806
+	unreachable
+
+bb7.i207:		; preds = %ana_AnalyzeSortStructure.exit.i
+	br i1 undef, label %bb9.i, label %bb8.i
+
+bb8.i:		; preds = %bb7.i207
+	unreachable
+
+bb9.i:		; preds = %bb7.i207
+	br i1 undef, label %bb23.i, label %bb26.i
+
+bb23.i:		; preds = %bb9.i
+	br i1 undef, label %bb25.i, label %bb24.i
+
+bb24.i:		; preds = %bb23.i
+	br i1 undef, label %sort_SortTheoryIsTrivial.exit.i, label %bb.i2093
+
+bb.i2093:		; preds = %bb.i2093, %bb24.i
+	br label %bb.i2093
+
+sort_SortTheoryIsTrivial.exit.i:		; preds = %bb24.i
+	br i1 undef, label %bb3.i2141, label %bb4.i2143
+
+bb3.i2141:		; preds = %sort_SortTheoryIsTrivial.exit.i
+	unreachable
+
+bb4.i2143:		; preds = %sort_SortTheoryIsTrivial.exit.i
+	br i1 undef, label %bb8.i2178, label %bb5.i2144
+
+bb5.i2144:		; preds = %bb4.i2143
+	br i1 undef, label %bb7.i2177, label %bb1.i28.i
+
+bb1.i28.i:		; preds = %bb5.i2144
+	br i1 undef, label %bb4.i43.i, label %bb2.i.i2153
+
+bb2.i.i2153:		; preds = %bb1.i28.i
+	br i1 undef, label %bb4.i.i33.i, label %bb.i.i30.i
+
+bb.i.i30.i:		; preds = %bb2.i.i2153
+	unreachable
+
+bb4.i.i33.i:		; preds = %bb2.i.i2153
+	br i1 undef, label %bb9.i.i36.i, label %bb5.i.i34.i
+
+bb5.i.i34.i:		; preds = %bb4.i.i33.i
+	unreachable
+
+bb9.i.i36.i:		; preds = %bb4.i.i33.i
+	br i1 undef, label %bb14.i.i.i2163, label %bb10.i.i37.i
+
+bb10.i.i37.i:		; preds = %bb9.i.i36.i
+	unreachable
+
+bb14.i.i.i2163:		; preds = %bb9.i.i36.i
+	br i1 undef, label %sort_LinkPrint.exit.i.i, label %bb15.i.i.i2164
+
+bb15.i.i.i2164:		; preds = %bb14.i.i.i2163
+	unreachable
+
+sort_LinkPrint.exit.i.i:		; preds = %bb14.i.i.i2163
+	unreachable
+
+bb4.i43.i:		; preds = %bb1.i28.i
+	unreachable
+
+bb7.i2177:		; preds = %bb5.i2144
+	unreachable
+
+bb8.i2178:		; preds = %bb4.i2143
+	br i1 undef, label %sort_ApproxStaticSortTheory.exit, label %bb.i5.i2185.preheader
+
+bb.i5.i2185.preheader:		; preds = %bb8.i2178
+	br label %bb.i5.i2185
+
+bb.i5.i2185:		; preds = %bb.i5.i2185, %bb.i5.i2185.preheader
+	br i1 undef, label %sort_ApproxStaticSortTheory.exit, label %bb.i5.i2185
+
+sort_ApproxStaticSortTheory.exit:		; preds = %bb.i5.i2185, %bb8.i2178
+	br label %bb25.i
+
+bb25.i:		; preds = %sort_ApproxStaticSortTheory.exit, %bb23.i
+	unreachable
+
+bb26.i:		; preds = %bb9.i
+	unreachable
+}
diff --git a/test/CodeGen/ARM/2009-06-18-ThumbCommuteMul.ll b/test/CodeGen/ARM/2009-06-18-ThumbCommuteMul.ll
new file mode 100644
index 000000000000..9b2aba94ec8d
--- /dev/null
+++ b/test/CodeGen/ARM/2009-06-18-ThumbCommuteMul.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as < %s | llc -march=thumb | grep r0 | count 1
+
+define i32 @a(i32 %x, i32 %y) nounwind readnone {
+entry:
+	%mul = mul i32 %y, %x		; <i32> [#uses=1]
+	ret i32 %mul
+}
+
diff --git a/test/CodeGen/ARM/2009-06-19-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-06-19-RegScavengerAssert.ll
new file mode 100644
index 000000000000..cbe2385ab27a
--- /dev/null
+++ b/test/CodeGen/ARM/2009-06-19-RegScavengerAssert.ll
@@ -0,0 +1,30 @@
+; RUN: llvm-as < %s | llc -mtriple=armv6-eabi -mattr=+vfp2 -float-abi=hard
+; PR4419
+
+define float @__ieee754_acosf(float %x) nounwind {
+entry:
+	br i1 undef, label %bb, label %bb4
+
+bb:		; preds = %entry
+	ret float undef
+
+bb4:		; preds = %entry
+	br i1 undef, label %bb5, label %bb6
+
+bb5:		; preds = %bb4
+	ret float undef
+
+bb6:		; preds = %bb4
+	br i1 undef, label %bb11, label %bb12
+
+bb11:		; preds = %bb6
+	%0 = tail call float @__ieee754_sqrtf(float undef) nounwind		; <float> [#uses=1]
+	%1 = fmul float %0, -2.000000e+00		; <float> [#uses=1]
+	%2 = fadd float %1, 0x400921FB40000000		; <float> [#uses=1]
+	ret float %2
+
+bb12:		; preds = %bb6
+	ret float undef
+}
+
+declare float @__ieee754_sqrtf(float)
diff --git a/test/CodeGen/ARM/ifcvt9.ll b/test/CodeGen/ARM/ifcvt9.ll
new file mode 100644
index 000000000000..bbd2f2ed6213
--- /dev/null
+++ b/test/CodeGen/ARM/ifcvt9.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-as < %s | llc -march=arm
+
+define fastcc void @t() nounwind {
+entry:
+	br i1 undef, label %bb.i.i3, label %growMapping.exit
+
+bb.i.i3:		; preds = %entry
+	unreachable
+
+growMapping.exit:		; preds = %entry
+	unreachable
+}
diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll
new file mode 100644
index 000000000000..f1bee058a0fc
--- /dev/null
+++ b/test/CodeGen/ARM/ldrd.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-as < %s | llc -mtriple=armv6-apple-darwin | grep ldrd
+; RUN: llvm-as < %s | llc -mtriple=armv5-apple-darwin | not grep ldrd
+; RUN: llvm-as < %s | llc -mtriple=armv6-eabi | not grep ldrd
+; rdar://r6949835
+
+@b = external global i64*
+
+define i64 @t(i64 %a) nounwind readonly {
+entry:
+	%0 = load i64** @b, align 4
+	%1 = load i64* %0, align 4
+	%2 = mul i64 %1, %a
+	ret i64 %2
+}
diff --git a/test/CodeGen/ARM/stm.ll b/test/CodeGen/ARM/stm.ll
index 585645b02149..ed5e4c5f5943 100644
--- a/test/CodeGen/ARM/stm.ll
+++ b/test/CodeGen/ARM/stm.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 -arm-pre-alloc-loadstore-opti | grep stm | count 2
+; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 | grep stm | count 2
 
 @"\01LC" = internal constant [32 x i8] c"Boolean Not: %d %d %d %d %d %d\0A\00", section "__TEXT,__cstring,cstring_literals"		; <[32 x i8]*> [#uses=1]
 @"\01LC1" = internal constant [26 x i8] c"Bitwise Not: %d %d %d %d\0A\00", section "__TEXT,__cstring,cstring_literals"		; <[26 x i8]*> [#uses=1]
diff --git a/test/CodeGen/ARM/thumb2-add.ll b/test/CodeGen/ARM/thumb2-add.ll
new file mode 100644
index 000000000000..d4f408ff76e7
--- /dev/null
+++ b/test/CodeGen/ARM/thumb2-add.ll
@@ -0,0 +1,50 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep add | grep #255
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep add | grep #256
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep add | grep #257
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep add | grep #4094
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep add | grep #4095
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep add | grep #4096
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep add
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep add | grep lsl | grep #8
+
+define i32 @t2ADDrc_255(i32 %lhs) {
+    %Rd = add i32 %lhs, 255;
+    ret i32 %Rd
+}
+
+define i32 @t2ADDrc_256(i32 %lhs) {
+    %Rd = add i32 %lhs, 256;
+    ret i32 %Rd
+}
+
+define i32 @t2ADDrc_257(i32 %lhs) {
+    %Rd = add i32 %lhs, 257;
+    ret i32 %Rd
+}
+
+define i32 @t2ADDrc_4094(i32 %lhs) {
+    %Rd = add i32 %lhs, 4094;
+    ret i32 %Rd
+}
+
+define i32 @t2ADDrc_4095(i32 %lhs) {
+    %Rd = add i32 %lhs, 4095;
+    ret i32 %Rd
+}
+
+define i32 @t2ADDrc_4096(i32 %lhs) {
+    %Rd = add i32 %lhs, 4096;
+    ret i32 %Rd
+}
+
+define i32 @t2ADDrr(i32 %lhs, i32 %rhs) {
+    %Rd = add i32 %lhs, %rhs;
+    ret i32 %Rd
+}
+
+define i32 @t2ADDrs(i32 %lhs, i32 %rhs) {
+    %tmp = shl i32 %rhs, 8
+    %Rd = add i32 %lhs, %tmp;
+    ret i32 %Rd
+}
+
diff --git a/test/CodeGen/ARM/thumb2-mov.ll b/test/CodeGen/ARM/thumb2-mov.ll
new file mode 100644
index 000000000000..0c4c59689b60
--- /dev/null
+++ b/test/CodeGen/ARM/thumb2-mov.ll
@@ -0,0 +1,127 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep #11206827
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep #2868947712
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep #2880154539
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep #251658240
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep #3948544
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov | grep movt
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep #258
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep #4026531840
+
+; Test #<const>
+
+; var 2.1 - 0x00ab00ab
+define i32 @t2_const_var2_1_ok_1(i32 %lhs) {
+    %ret = add i32 %lhs, 11206827 ; 0x00ab00ab
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_1_fail_1(i32 %lhs) {
+    %ret = add i32 %lhs, 11206843 ; 0x00ab00bb
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_1_fail_2(i32 %lhs) {
+    %ret = add i32 %lhs, 27984043 ; 0x01ab00ab
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_1_fail_3(i32 %lhs) {
+    %ret = add i32 %lhs, 27984299 ; 0x01ab01ab
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_1_fail_4(i32 %lhs) {
+    %ret = add i32 %lhs, 28027649 ; 0x01abab01
+    ret i32 %ret
+}
+
+; var 2.2 - 0xab00ab00
+define i32 @t2_const_var2_2_ok_1(i32 %lhs) {
+    %ret = add i32 %lhs, 2868947712 ; 0xab00ab00
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_2_fail_1(i32 %lhs) {
+    %ret = add i32 %lhs, 2868951552 ; 0xab00ba00
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_2_fail_2(i32 %lhs) {
+    %ret = add i32 %lhs, 2868947728 ; 0xab00ab10
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_2_fail_3(i32 %lhs) {
+    %ret = add i32 %lhs, 2869996304 ; 0xab10ab10
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_2_fail_4(i32 %lhs) {
+    %ret = add i32 %lhs, 279685904 ; 0x10abab10
+    ret i32 %ret
+}
+
+; var 2.3 - 0xabababab
+define i32 @t2_const_var2_3_ok_1(i32 %lhs) {
+    %ret = add i32 %lhs, 2880154539 ; 0xabababab
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_3_fail_1(i32 %lhs) {
+    %ret = add i32 %lhs, 2880154554 ; 0xabababba
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_3_fail_2(i32 %lhs) {
+    %ret = add i32 %lhs, 2880158379 ; 0xababbaab
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_3_fail_3(i32 %lhs) {
+    %ret = add i32 %lhs, 2881137579 ; 0xabbaabab
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_3_fail_4(i32 %lhs) {
+    %ret = add i32 %lhs, 3131812779 ; 0xbaababab
+    ret i32 %ret
+}
+
+; var 3 - 0x0F000000
+define i32 @t2_const_var3_1_ok_1(i32 %lhs) {
+    %ret = add i32 %lhs, 251658240 ; 0x0F000000
+    ret i32 %ret
+}
+
+define i32 @t2_const_var3_2_ok_1(i32 %lhs) {
+    %ret = add i32 %lhs, 3948544 ; 0b00000000001111000100000000000000
+    ret i32 %ret
+}
+
+define i32 @t2_const_var3_2_fail_1(i32 %lhs) {
+    %ret = add i32 %lhs, 3940352 ; 0b00000000001111000010000000000000
+    ret i32 %ret
+}
+
+define i32 @t2_const_var3_3_ok_1(i32 %lhs) {
+    %ret = add i32 %lhs, 258 ; 0b00000000000000000000000100000010
+    ret i32 %ret
+}
+
+define i32 @t2_const_var3_4_ok_1(i32 %lhs) {
+    %ret = add i32 %lhs, 4026531840 ; 0xF0000000
+    ret i32 %ret
+}
+
diff --git a/test/CodeGen/ARM/thumb2-mov2.ll b/test/CodeGen/ARM/thumb2-mov2.ll
new file mode 100644
index 000000000000..d2f8c0b91a58
--- /dev/null
+++ b/test/CodeGen/ARM/thumb2-mov2.ll
@@ -0,0 +1,65 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep movt | grep #1234
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep movt | grep #1234
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep movt | grep #1234
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep movt | grep #1234
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov  | grep movt
+
+define i32 @t2MOVTi16_ok_1(i32 %a) {
+    %1 = and i32 %a, 65535
+    %2 = shl i32 1234, 16
+    %3 = or  i32 %1, %2
+
+    ret i32 %3
+}
+
+define i32 @t2MOVTi16_test_1(i32 %a) {
+    %1 = shl i32  255,   8
+    %2 = shl i32 1234,   8
+    %3 = or  i32   %1, 255  ; This give us 0xFFFF in %3
+    %4 = shl i32   %2,   8  ; This gives us (1234 << 16) in %4
+    %5 = and i32   %a,  %3
+    %6 = or  i32   %4,  %5
+
+    ret i32 %6
+}
+
+define i32 @t2MOVTi16_test_2(i32 %a) {
+    %1 = shl i32  255,   8
+    %2 = shl i32 1234,   8
+    %3 = or  i32   %1, 255  ; This give us 0xFFFF in %3
+    %4 = shl i32   %2,   6
+    %5 = and i32   %a,  %3
+    %6 = shl i32   %4,   2  ; This gives us (1234 << 16) in %6
+    %7 = or  i32   %5,  %6
+
+    ret i32 %7
+}
+
+define i32 @t2MOVTi16_test_3(i32 %a) {
+    %1 = shl i32  255,   8
+    %2 = shl i32 1234,   8
+    %3 = or  i32   %1, 255  ; This give us 0xFFFF in %3
+    %4 = shl i32   %2,   6
+    %5 = and i32   %a,  %3
+    %6 = shl i32   %4,   2  ; This gives us (1234 << 16) in %6
+    %7 = lshr i32  %6,   6
+    %8 = shl i32   %7,   6
+    %9 = or  i32   %5,  %8
+
+    ret i32 %9
+}
+
+define i32 @t2MOVTi16_test_nomatch_1(i32 %a) {
+    %1 = shl i32  255,   8
+    %2 = shl i32 1234,   8
+    %3 = or  i32   %1, 255  ; This give us 0xFFFF in %3
+    %4 = shl i32   %2,   6
+    %5 = and i32   %a,  %3
+    %6 = shl i32   %4,   2  ; This gives us (1234 << 16) in %6
+    %7 = lshr i32  %6,   3
+    %8 = or  i32   %5,  %7
+
+    ret i32 %8
+}
+
+
diff --git a/test/CodeGen/ARM/thumb2-shifter.ll b/test/CodeGen/ARM/thumb2-shifter.ll
new file mode 100644
index 000000000000..f9ec5067ec01
--- /dev/null
+++ b/test/CodeGen/ARM/thumb2-shifter.ll
@@ -0,0 +1,40 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep lsl
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep lsr
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep asr
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep ror
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov
+
+define i32 @t2ADDrs_lsl(i32 %X, i32 %Y) {
+        %A = shl i32 %Y, 16
+        %B = add i32 %X, %A
+        ret i32 %B
+}
+
+define i32 @t2ADDrs_lsr(i32 %X, i32 %Y) {
+        %A = lshr i32 %Y, 16
+        %B = add i32 %X, %A
+        ret i32 %B
+}
+
+define i32 @t2ADDrs_asr(i32 %X, i32 %Y) {
+        %A = ashr i32 %Y, 16
+        %B = add i32 %X, %A
+        ret i32 %B
+}
+
+; i32 ror(n) = (x >> n) | (x << (32 - n))
+define i32 @t2ADDrs_ror(i32 %X, i32 %Y) {
+        %A = lshr i32 %Y, 16
+        %B = shl  i32 %Y, 16
+        %C = or   i32 %B, %A
+        %R = add  i32 %X, %C
+        ret i32 %R
+}
+
+define i32 @t2ADDrs_noRegShift(i32 %X, i32 %Y, i8 %sh) {
+        %shift.upgrd.1 = zext i8 %sh to i32
+        %A = shl i32 %Y, %shift.upgrd.1
+        %B = add i32 %X, %A
+        ret i32 %B
+}
+
diff --git a/test/CodeGen/ARM/vargs2.ll b/test/CodeGen/ARM/vargs2.ll
index fb0b8d8ef7cb..5cc86a95ba09 100644
--- a/test/CodeGen/ARM/vargs2.ll
+++ b/test/CodeGen/ARM/vargs2.ll
@@ -1,6 +1,6 @@
 ; RUN: llvm-as < %s | llc -march=thumb
-; RUN: llvm-as < %s | llc -march=thumb | \
-; RUN:   grep pop | count 2
+; RUN: llvm-as < %s | llc -mtriple=arm-linux -march=thumb | grep pop | count 1
+; RUN: llvm-as < %s | llc -mtriple=arm-darwin -march=thumb | grep pop | count 2
 
 @str = internal constant [4 x i8] c"%d\0A\00"           ; <[4 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/CellSPU/mul-with-overflow.ll b/test/CodeGen/CellSPU/mul-with-overflow.ll
new file mode 100644
index 000000000000..755b99be9cdd
--- /dev/null
+++ b/test/CodeGen/CellSPU/mul-with-overflow.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-as < %s | llc -march=cellspu
+
+declare {i16, i1} @llvm.smul.with.overflow.i16(i16 %a, i16 %b)
+define i1 @a(i16 %x) zeroext nounwind {
+  %res = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %x, i16 3)
+  %obil = extractvalue {i16, i1} %res, 1
+  ret i1 %obil
+}
+
+declare {i16, i1} @llvm.umul.with.overflow.i16(i16 %a, i16 %b)
+define i1 @b(i16 %x) zeroext nounwind {
+  %res = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %x, i16 3)
+  %obil = extractvalue {i16, i1} %res, 1
+  ret i1 %obil
+}
diff --git a/test/CodeGen/PowerPC/mul-with-overflow.ll b/test/CodeGen/PowerPC/mul-with-overflow.ll
new file mode 100644
index 000000000000..0276846d7cbb
--- /dev/null
+++ b/test/CodeGen/PowerPC/mul-with-overflow.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-as < %s | llc -march=ppc32
+
+declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
+define i1 @a(i32 %x) zeroext nounwind {
+  %res = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 3)
+  %obil = extractvalue {i32, i1} %res, 1
+  ret i1 %obil
+}
+
+declare {i32, i1} @llvm.smul.with.overflow.i32(i32 %a, i32 %b)
+define i1 @b(i32 %x) zeroext nounwind {
+  %res = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %x, i32 3)
+  %obil = extractvalue {i32, i1} %res, 1
+  ret i1 %obil
+}
diff --git a/test/CodeGen/X86/2009-06-15-not-a-tail-call.ll b/test/CodeGen/X86/2009-06-15-not-a-tail-call.ll
new file mode 100644
index 000000000000..095e6a103621
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-15-not-a-tail-call.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-as < %s | llc -march=x86 -tailcallopt | not grep TAILCALL 
+
+; Bug 4396. This tail call can NOT be optimized.
+
+declare fastcc i8* @_D3gcx2GC12mallocNoSyncMFmkZPv() nounwind
+
+define fastcc i8* @_D3gcx2GC12callocNoSyncMFmkZPv() nounwind {
+entry:
+	%tmp6 = tail call fastcc i8* @_D3gcx2GC12mallocNoSyncMFmkZPv()		; <i8*> [#uses=2]
+	%tmp9 = tail call i8* @memset(i8* %tmp6, i32 0, i64 2)		; <i8*> [#uses=0]
+	ret i8* %tmp6
+}
+
+declare i8* @memset(i8*, i32, i64)
diff --git a/test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll b/test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll
new file mode 100644
index 000000000000..d6ff5b6803e3
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,-sse2
+; PR2484
+
+define <4 x float> @f4523(<4 x float> %a,<4 x float> %b) nounwind {
+entry:
+%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4,i32
+5,i32 2,i32 3>
+ret <4 x float> %shuffle
+}
diff --git a/test/CodeGen/X86/fmul-zero.ll b/test/CodeGen/X86/fmul-zero.ll
index 8f705a4d242f..73aa713de52e 100644
--- a/test/CodeGen/X86/fmul-zero.ll
+++ b/test/CodeGen/X86/fmul-zero.ll
@@ -3,7 +3,7 @@
 
 define void @test14(<4 x float>*) nounwind {
         load <4 x float>* %0, align 1
-        mul <4 x float> %2, zeroinitializer
+        fmul <4 x float> %2, zeroinitializer
         store <4 x float> %3, <4 x float>* %0, align 1
         ret void
 }
diff --git a/test/CodeGen/X86/inline-asm-fpstack2.ll b/test/CodeGen/X86/inline-asm-fpstack2.ll
new file mode 100644
index 000000000000..968561826b5c
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-fpstack2.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: grep {fld	%%st(0)} %t
+; PR4185
+
+define void @test() {
+return:
+	call void asm sideeffect "fistpl $0", "{st}"(double 1.000000e+06)
+	call void asm sideeffect "fistpl $0", "{st}"(double 1.000000e+06)
+	ret void
+}
diff --git a/test/CodeGen/X86/iv-users-in-other-loops.ll b/test/CodeGen/X86/iv-users-in-other-loops.ll
index 2208b2d2e0e3..a48f0616291f 100644
--- a/test/CodeGen/X86/iv-users-in-other-loops.ll
+++ b/test/CodeGen/X86/iv-users-in-other-loops.ll
@@ -1,11 +1,11 @@
 ; RUN: llvm-as < %s | llc -march=x86-64 -f -o %t
 ; RUN: grep inc %t | count 1
 ; RUN: grep dec %t | count 2
-; RUN: grep addq %t | count 8
-; RUN: grep addb %t | count 2
-; RUN: grep leaq %t | count 12
-; RUN: grep leal %t | count 2
-; RUN: grep movq %t | count 4
+; RUN: grep addq %t | count 13
+; RUN: not grep addb %t
+; RUN: grep leaq %t | count 8
+; RUN: grep leal %t | count 4
+; RUN: grep movq %t | count 5
 
 ; IV users in each of the loops from other loops shouldn't cause LSR
 ; to insert new induction variables. Previously it would create a
diff --git a/test/CodeGen/X86/optimize-smax.ll b/test/CodeGen/X86/optimize-max-0.ll
index 0c3be31e293f..90c14565e9a6 100644
--- a/test/CodeGen/X86/optimize-smax.ll
+++ b/test/CodeGen/X86/optimize-max-0.ll
@@ -1,7 +1,7 @@
 ; RUN: llvm-as < %s | llc -march=x86 | not grep cmov
 
-; LSR should be able to eliminate the smax computations by
-; making the loops use slt comparisons instead of ne comparisons.
+; LSR should be able to eliminate the max computations by
+; making the loops use slt/ult comparisons instead of ne comparisons.
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9"
@@ -231,6 +231,231 @@ return:		; preds = %bb20
 	ret void
 }
 
+define void @bar(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
+entry:
+	%0 = mul i32 %x, %w		; <i32> [#uses=2]
+	%1 = mul i32 %x, %w		; <i32> [#uses=1]
+	%2 = udiv i32 %1, 4		; <i32> [#uses=1]
+	%.sum2 = add i32 %2, %0		; <i32> [#uses=2]
+	%cond = icmp eq i32 %d, 1		; <i1> [#uses=1]
+	br i1 %cond, label %bb29, label %bb10.preheader
+
+bb10.preheader:		; preds = %entry
+	%3 = icmp ne i32 %x, 0		; <i1> [#uses=1]
+	br i1 %3, label %bb.nph9, label %bb18.loopexit
+
+bb.nph7:		; preds = %bb7.preheader
+	%4 = mul i32 %y.08, %w		; <i32> [#uses=1]
+	%5 = mul i32 %y.08, %s		; <i32> [#uses=1]
+	%6 = add i32 %5, 1		; <i32> [#uses=1]
+	%tmp8 = icmp ugt i32 1, %w		; <i1> [#uses=1]
+	%smax9 = select i1 %tmp8, i32 1, i32 %w		; <i32> [#uses=1]
+	br label %bb6
+
+bb6:		; preds = %bb7, %bb.nph7
+	%x.06 = phi i32 [ 0, %bb.nph7 ], [ %indvar.next7, %bb7 ]		; <i32> [#uses=3]
+	%7 = add i32 %x.06, %4		; <i32> [#uses=1]
+	%8 = shl i32 %x.06, 1		; <i32> [#uses=1]
+	%9 = add i32 %6, %8		; <i32> [#uses=1]
+	%10 = getelementptr i8* %r, i32 %9		; <i8*> [#uses=1]
+	%11 = load i8* %10, align 1		; <i8> [#uses=1]
+	%12 = getelementptr i8* %j, i32 %7		; <i8*> [#uses=1]
+	store i8 %11, i8* %12, align 1
+	br label %bb7
+
+bb7:		; preds = %bb6
+	%indvar.next7 = add i32 %x.06, 1		; <i32> [#uses=2]
+	%exitcond10 = icmp ne i32 %indvar.next7, %smax9		; <i1> [#uses=1]
+	br i1 %exitcond10, label %bb6, label %bb7.bb9_crit_edge
+
+bb7.bb9_crit_edge:		; preds = %bb7
+	br label %bb9
+
+bb9:		; preds = %bb7.preheader, %bb7.bb9_crit_edge
+	br label %bb10
+
+bb10:		; preds = %bb9
+	%indvar.next11 = add i32 %y.08, 1		; <i32> [#uses=2]
+	%exitcond12 = icmp ne i32 %indvar.next11, %x		; <i1> [#uses=1]
+	br i1 %exitcond12, label %bb7.preheader, label %bb10.bb18.loopexit_crit_edge
+
+bb10.bb18.loopexit_crit_edge:		; preds = %bb10
+	br label %bb10.bb18.loopexit_crit_edge.split
+
+bb10.bb18.loopexit_crit_edge.split:		; preds = %bb.nph9, %bb10.bb18.loopexit_crit_edge
+	br label %bb18.loopexit
+
+bb.nph9:		; preds = %bb10.preheader
+	%13 = icmp ugt i32 %w, 0		; <i1> [#uses=1]
+	br i1 %13, label %bb.nph9.split, label %bb10.bb18.loopexit_crit_edge.split
+
+bb.nph9.split:		; preds = %bb.nph9
+	br label %bb7.preheader
+
+bb7.preheader:		; preds = %bb.nph9.split, %bb10
+	%y.08 = phi i32 [ 0, %bb.nph9.split ], [ %indvar.next11, %bb10 ]		; <i32> [#uses=3]
+	br i1 true, label %bb.nph7, label %bb9
+
+bb.nph5:		; preds = %bb18.loopexit
+	%14 = udiv i32 %w, 2		; <i32> [#uses=1]
+	%15 = icmp ult i32 %w, 2		; <i1> [#uses=1]
+	%16 = udiv i32 %x, 2		; <i32> [#uses=2]
+	br i1 %15, label %bb18.bb20_crit_edge.split, label %bb.nph5.split
+
+bb.nph5.split:		; preds = %bb.nph5
+	%tmp2 = icmp ugt i32 1, %16		; <i1> [#uses=1]
+	%smax3 = select i1 %tmp2, i32 1, i32 %16		; <i32> [#uses=1]
+	br label %bb13
+
+bb13:		; preds = %bb18, %bb.nph5.split
+	%y.14 = phi i32 [ 0, %bb.nph5.split ], [ %indvar.next1, %bb18 ]		; <i32> [#uses=4]
+	%17 = mul i32 %14, %y.14		; <i32> [#uses=2]
+	%18 = shl i32 %y.14, 1		; <i32> [#uses=1]
+	%19 = urem i32 %y.14, 2		; <i32> [#uses=1]
+	%20 = add i32 %19, %18		; <i32> [#uses=1]
+	%21 = mul i32 %20, %s		; <i32> [#uses=2]
+	br i1 true, label %bb.nph3, label %bb17
+
+bb.nph3:		; preds = %bb13
+	%22 = add i32 %17, %0		; <i32> [#uses=1]
+	%23 = add i32 %17, %.sum2		; <i32> [#uses=1]
+	%24 = udiv i32 %w, 2		; <i32> [#uses=2]
+	%tmp = icmp ugt i32 1, %24		; <i1> [#uses=1]
+	%smax = select i1 %tmp, i32 1, i32 %24		; <i32> [#uses=1]
+	br label %bb14
+
+bb14:		; preds = %bb15, %bb.nph3
+	%x.12 = phi i32 [ 0, %bb.nph3 ], [ %indvar.next, %bb15 ]		; <i32> [#uses=5]
+	%25 = shl i32 %x.12, 2		; <i32> [#uses=1]
+	%26 = add i32 %25, %21		; <i32> [#uses=1]
+	%27 = getelementptr i8* %r, i32 %26		; <i8*> [#uses=1]
+	%28 = load i8* %27, align 1		; <i8> [#uses=1]
+	%.sum = add i32 %22, %x.12		; <i32> [#uses=1]
+	%29 = getelementptr i8* %j, i32 %.sum		; <i8*> [#uses=1]
+	store i8 %28, i8* %29, align 1
+	%30 = shl i32 %x.12, 2		; <i32> [#uses=1]
+	%31 = or i32 %30, 2		; <i32> [#uses=1]
+	%32 = add i32 %31, %21		; <i32> [#uses=1]
+	%33 = getelementptr i8* %r, i32 %32		; <i8*> [#uses=1]
+	%34 = load i8* %33, align 1		; <i8> [#uses=1]
+	%.sum6 = add i32 %23, %x.12		; <i32> [#uses=1]
+	%35 = getelementptr i8* %j, i32 %.sum6		; <i8*> [#uses=1]
+	store i8 %34, i8* %35, align 1
+	br label %bb15
+
+bb15:		; preds = %bb14
+	%indvar.next = add i32 %x.12, 1		; <i32> [#uses=2]
+	%exitcond = icmp ne i32 %indvar.next, %smax		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb14, label %bb15.bb17_crit_edge
+
+bb15.bb17_crit_edge:		; preds = %bb15
+	br label %bb17
+
+bb17:		; preds = %bb15.bb17_crit_edge, %bb13
+	br label %bb18
+
+bb18.loopexit:		; preds = %bb10.bb18.loopexit_crit_edge.split, %bb10.preheader
+	%36 = icmp ult i32 %x, 2		; <i1> [#uses=1]
+	br i1 %36, label %bb20, label %bb.nph5
+
+bb18:		; preds = %bb17
+	%indvar.next1 = add i32 %y.14, 1		; <i32> [#uses=2]
+	%exitcond4 = icmp ne i32 %indvar.next1, %smax3		; <i1> [#uses=1]
+	br i1 %exitcond4, label %bb13, label %bb18.bb20_crit_edge
+
+bb18.bb20_crit_edge:		; preds = %bb18
+	br label %bb18.bb20_crit_edge.split
+
+bb18.bb20_crit_edge.split:		; preds = %bb18.bb20_crit_edge, %bb.nph5
+	br label %bb20
+
+bb20:		; preds = %bb18.bb20_crit_edge.split, %bb18.loopexit
+	switch i32 %d, label %return [
+		i32 3, label %bb22
+		i32 1, label %bb29
+	]
+
+bb22:		; preds = %bb20
+	%37 = mul i32 %x, %w		; <i32> [#uses=1]
+	%38 = udiv i32 %37, 4		; <i32> [#uses=1]
+	%.sum3 = add i32 %38, %.sum2		; <i32> [#uses=2]
+	%39 = add i32 %x, 15		; <i32> [#uses=1]
+	%40 = and i32 %39, -16		; <i32> [#uses=1]
+	%41 = add i32 %w, 15		; <i32> [#uses=1]
+	%42 = and i32 %41, -16		; <i32> [#uses=1]
+	%43 = mul i32 %40, %s		; <i32> [#uses=1]
+	%44 = icmp ugt i32 %x, 0		; <i1> [#uses=1]
+	br i1 %44, label %bb.nph, label %bb26
+
+bb.nph:		; preds = %bb22
+	br label %bb23
+
+bb23:		; preds = %bb24, %bb.nph
+	%y.21 = phi i32 [ 0, %bb.nph ], [ %indvar.next5, %bb24 ]		; <i32> [#uses=3]
+	%45 = mul i32 %y.21, %42		; <i32> [#uses=1]
+	%.sum1 = add i32 %45, %43		; <i32> [#uses=1]
+	%46 = getelementptr i8* %r, i32 %.sum1		; <i8*> [#uses=1]
+	%47 = mul i32 %y.21, %w		; <i32> [#uses=1]
+	%.sum5 = add i32 %47, %.sum3		; <i32> [#uses=1]
+	%48 = getelementptr i8* %j, i32 %.sum5		; <i8*> [#uses=1]
+	tail call void @llvm.memcpy.i32(i8* %48, i8* %46, i32 %w, i32 1)
+	br label %bb24
+
+bb24:		; preds = %bb23
+	%indvar.next5 = add i32 %y.21, 1		; <i32> [#uses=2]
+	%exitcond6 = icmp ne i32 %indvar.next5, %x		; <i1> [#uses=1]
+	br i1 %exitcond6, label %bb23, label %bb24.bb26_crit_edge
+
+bb24.bb26_crit_edge:		; preds = %bb24
+	br label %bb26
+
+bb26:		; preds = %bb24.bb26_crit_edge, %bb22
+	%49 = mul i32 %x, %w		; <i32> [#uses=1]
+	%.sum4 = add i32 %.sum3, %49		; <i32> [#uses=1]
+	%50 = getelementptr i8* %j, i32 %.sum4		; <i8*> [#uses=1]
+	%51 = mul i32 %x, %w		; <i32> [#uses=1]
+	%52 = udiv i32 %51, 2		; <i32> [#uses=1]
+	tail call void @llvm.memset.i32(i8* %50, i8 -128, i32 %52, i32 1)
+	ret void
+
+bb29:		; preds = %bb20, %entry
+	%53 = add i32 %w, 15		; <i32> [#uses=1]
+	%54 = and i32 %53, -16		; <i32> [#uses=1]
+	%55 = icmp ugt i32 %x, 0		; <i1> [#uses=1]
+	br i1 %55, label %bb.nph11, label %bb33
+
+bb.nph11:		; preds = %bb29
+	br label %bb30
+
+bb30:		; preds = %bb31, %bb.nph11
+	%y.310 = phi i32 [ 0, %bb.nph11 ], [ %indvar.next13, %bb31 ]		; <i32> [#uses=3]
+	%56 = mul i32 %y.310, %54		; <i32> [#uses=1]
+	%57 = getelementptr i8* %r, i32 %56		; <i8*> [#uses=1]
+	%58 = mul i32 %y.310, %w		; <i32> [#uses=1]
+	%59 = getelementptr i8* %j, i32 %58		; <i8*> [#uses=1]
+	tail call void @llvm.memcpy.i32(i8* %59, i8* %57, i32 %w, i32 1)
+	br label %bb31
+
+bb31:		; preds = %bb30
+	%indvar.next13 = add i32 %y.310, 1		; <i32> [#uses=2]
+	%exitcond14 = icmp ne i32 %indvar.next13, %x		; <i1> [#uses=1]
+	br i1 %exitcond14, label %bb30, label %bb31.bb33_crit_edge
+
+bb31.bb33_crit_edge:		; preds = %bb31
+	br label %bb33
+
+bb33:		; preds = %bb31.bb33_crit_edge, %bb29
+	%60 = mul i32 %x, %w		; <i32> [#uses=1]
+	%61 = getelementptr i8* %j, i32 %60		; <i8*> [#uses=1]
+	%62 = mul i32 %x, %w		; <i32> [#uses=1]
+	%63 = udiv i32 %62, 2		; <i32> [#uses=1]
+	tail call void @llvm.memset.i32(i8* %61, i8 -128, i32 %63, i32 1)
+	ret void
+
+return:		; preds = %bb20
+	ret void
+}
+
 declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
 
 declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
diff --git a/test/CodeGen/X86/optimize-max-1.ll b/test/CodeGen/X86/optimize-max-1.ll
new file mode 100644
index 000000000000..084e1818f5dd
--- /dev/null
+++ b/test/CodeGen/X86/optimize-max-1.ll
@@ -0,0 +1,78 @@
+; RUN: llvm-as < %s | llc -march=x86-64 | not grep cmov
+
+; LSR should be able to eliminate both smax and umax expressions
+; in loop trip counts.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define void @fs(double* nocapture %p, i64 %n) nounwind {
+entry:
+	%tmp = icmp slt i64 %n, 1		; <i1> [#uses=1]
+	%smax = select i1 %tmp, i64 1, i64 %n		; <i64> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%i.0 = phi i64 [ 0, %entry ], [ %0, %bb ]		; <i64> [#uses=2]
+	%scevgep = getelementptr double* %p, i64 %i.0		; <double*> [#uses=1]
+	store double 0.000000e+00, double* %scevgep, align 8
+	%0 = add i64 %i.0, 1		; <i64> [#uses=2]
+	%exitcond = icmp eq i64 %0, %smax		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb
+	ret void
+}
+
+define void @bs(double* nocapture %p, i64 %n) nounwind {
+entry:
+	%tmp = icmp sge i64 %n, 1		; <i1> [#uses=1]
+	%smax = select i1 %tmp, i64 %n, i64 1		; <i64> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%i.0 = phi i64 [ 0, %entry ], [ %0, %bb ]		; <i64> [#uses=2]
+	%scevgep = getelementptr double* %p, i64 %i.0		; <double*> [#uses=1]
+	store double 0.000000e+00, double* %scevgep, align 8
+	%0 = add i64 %i.0, 1		; <i64> [#uses=2]
+	%exitcond = icmp eq i64 %0, %smax		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb
+	ret void
+}
+
+define void @fu(double* nocapture %p, i64 %n) nounwind {
+entry:
+	%tmp = icmp eq i64 %n, 0		; <i1> [#uses=1]
+	%umax = select i1 %tmp, i64 1, i64 %n		; <i64> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%i.0 = phi i64 [ 0, %entry ], [ %0, %bb ]		; <i64> [#uses=2]
+	%scevgep = getelementptr double* %p, i64 %i.0		; <double*> [#uses=1]
+	store double 0.000000e+00, double* %scevgep, align 8
+	%0 = add i64 %i.0, 1		; <i64> [#uses=2]
+	%exitcond = icmp eq i64 %0, %umax		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb
+	ret void
+}
+
+define void @bu(double* nocapture %p, i64 %n) nounwind {
+entry:
+	%tmp = icmp ne i64 %n, 0		; <i1> [#uses=1]
+	%umax = select i1 %tmp, i64 %n, i64 1		; <i64> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%i.0 = phi i64 [ 0, %entry ], [ %0, %bb ]		; <i64> [#uses=2]
+	%scevgep = getelementptr double* %p, i64 %i.0		; <double*> [#uses=1]
+	store double 0.000000e+00, double* %scevgep, align 8
+	%0 = add i64 %i.0, 1		; <i64> [#uses=2]
+	%exitcond = icmp eq i64 %0, %umax		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb
+	ret void
+}
diff --git a/test/CodeGen/X86/optimize-max-2.ll b/test/CodeGen/X86/optimize-max-2.ll
new file mode 100644
index 000000000000..effc3fc737d9
--- /dev/null
+++ b/test/CodeGen/X86/optimize-max-2.ll
@@ -0,0 +1,30 @@
+; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: grep cmov %t | count 2
+; RUN: grep jne %t | count 1
+
+; LSR's OptimizeMax function shouldn't try to eliminate this max, because
+; it has three operands.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define void @foo(double* nocapture %p, i64 %x, i64 %y) nounwind {
+entry:
+	%tmp = icmp eq i64 %y, 0		; <i1> [#uses=1]
+	%umax = select i1 %tmp, i64 1, i64 %y		; <i64> [#uses=2]
+	%tmp8 = icmp ugt i64 %umax, %x		; <i1> [#uses=1]
+	%umax9 = select i1 %tmp8, i64 %umax, i64 %x		; <i64> [#uses=1]
+	br label %bb4
+
+bb4:		; preds = %bb4, %entry
+	%i.07 = phi i64 [ 0, %entry ], [ %2, %bb4 ]		; <i64> [#uses=2]
+	%scevgep = getelementptr double* %p, i64 %i.07		; <double*> [#uses=2]
+	%0 = load double* %scevgep, align 8		; <double> [#uses=1]
+	%1 = fmul double %0, 2.000000e+00		; <double> [#uses=1]
+	store double %1, double* %scevgep, align 8
+	%2 = add i64 %i.07, 1		; <i64> [#uses=2]
+	%exitcond = icmp eq i64 %2, %umax9		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb4
+
+return:		; preds = %bb4
+	ret void
+}
diff --git a/test/CodeGen/X86/pic_jumptable.ll b/test/CodeGen/X86/pic_jumptable.ll
index 1731feb58e91..229865bb71b9 100644
--- a/test/CodeGen/X86/pic_jumptable.ll
+++ b/test/CodeGen/X86/pic_jumptable.ll
@@ -1,6 +1,8 @@
 ; RUN: llvm-as < %s | llc -relocation-model=pic -mtriple=i386-linux-gnu -asm-verbose=false | not grep -F .text
 ; RUN: llvm-as < %s | llc -relocation-model=pic -mtriple=i686-apple-darwin -asm-verbose=false | not grep lea
 ; RUN: llvm-as < %s | llc -relocation-model=pic -mtriple=i686-apple-darwin -asm-verbose=false | grep add | count 2
+; RUN: llvm-as < %s | llc                       -mtriple=x86_64-apple-darwin | not grep 'lJTI'
+; rdar://6971437
 
 declare void @_Z3bari(i32)
 
diff --git a/test/CodeGen/X86/tls1-pic.ll b/test/CodeGen/X86/tls1-pic.ll
index a73e75ba8068..e43bf7ce66e6 100644
--- a/test/CodeGen/X86/tls1-pic.ll
+++ b/test/CodeGen/X86/tls1-pic.ll
@@ -1,5 +1,5 @@
 ; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu -relocation-model=pic > %t
-; RUN: grep {leal	i@TLSGD(,%ebx,1), %eax} %t
+; RUN: grep {leal	i@TLSGD(,%ebx), %eax} %t
 ; RUN: grep {call	___tls_get_addr@PLT} %t
 ; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic > %t2
 ; RUN: grep {leaq	i@TLSGD(%rip), %rdi} %t2
diff --git a/test/CodeGen/X86/tls1.ll b/test/CodeGen/X86/tls1.ll
index 5155dfd4a118..85ff360a5508 100644
--- a/test/CodeGen/X86/tls1.ll
+++ b/test/CodeGen/X86/tls1.ll
@@ -5,7 +5,7 @@
 
 @i = thread_local global i32 15
 
-define i32 @f() {
+define i32 @f() nounwind {
 entry:
 	%tmp1 = load i32* @i
 	ret i32 %tmp1
diff --git a/test/CodeGen/X86/tls2-pic.ll b/test/CodeGen/X86/tls2-pic.ll
index cdfe97ca0b04..6ab3ee0a69fd 100644
--- a/test/CodeGen/X86/tls2-pic.ll
+++ b/test/CodeGen/X86/tls2-pic.ll
@@ -1,5 +1,5 @@
 ; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu -relocation-model=pic > %t
-; RUN: grep {leal	i@TLSGD(,%ebx,1), %eax} %t
+; RUN: grep {leal	i@TLSGD(,%ebx), %eax} %t
 ; RUN: grep {call	___tls_get_addr@PLT} %t
 ; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic > %t2
 ; RUN: grep {leaq	i@TLSGD(%rip), %rdi} %t2
@@ -7,7 +7,7 @@
 
 @i = thread_local global i32 15
 
-define i32* @f() {
+define i32* @f() nounwind {
 entry:
 	ret i32* @i
 }
diff --git a/test/CodeGen/X86/tls3-pic.ll b/test/CodeGen/X86/tls3-pic.ll
index f62cca218ab0..8e6df29afbf9 100644
--- a/test/CodeGen/X86/tls3-pic.ll
+++ b/test/CodeGen/X86/tls3-pic.ll
@@ -1,5 +1,5 @@
 ; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu -relocation-model=pic  > %t
-; RUN: grep {leal	i@TLSGD(,%ebx,1), %eax} %t
+; RUN: grep {leal	i@TLSGD(,%ebx), %eax} %t
 ; RUN: grep {call	___tls_get_addr@PLT} %t
 ; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic > %t2
 ; RUN: grep {leaq	i@TLSGD(%rip), %rdi} %t2
diff --git a/test/CodeGen/X86/tls4-pic.ll b/test/CodeGen/X86/tls4-pic.ll
index ec3d43591cd3..94de78f7aef7 100644
--- a/test/CodeGen/X86/tls4-pic.ll
+++ b/test/CodeGen/X86/tls4-pic.ll
@@ -1,5 +1,5 @@
 ; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu -relocation-model=pic > %t
-; RUN: grep {leal	i@TLSGD(,%ebx,1), %eax} %t
+; RUN: grep {leal	i@TLSGD(,%ebx), %eax} %t
 ; RUN: grep {call	___tls_get_addr@PLT} %t
 ; RUN: llvm-as < %s | llc -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic > %t2
 ; RUN: grep {leaq	i@TLSGD(%rip), %rdi} %t2
diff --git a/test/CodeGen/X86/umul-with-overflow.ll b/test/CodeGen/X86/umul-with-overflow.ll
new file mode 100644
index 000000000000..9e69154f10f9
--- /dev/null
+++ b/test/CodeGen/X86/umul-with-overflow.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as < %s | llc -march=x86 | grep "\\\\\\\<mul"
+
+declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
+define i1 @a(i32 %x) zeroext nounwind {
+  %res = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 3)
+  %obil = extractvalue {i32, i1} %res, 1
+  ret i1 %obil
+}
diff --git a/test/DebugInfo/2009-06-15-InlineFuncStart.ll b/test/DebugInfo/2009-06-15-InlineFuncStart.ll
new file mode 100644
index 000000000000..2ece6a3bf8ce
--- /dev/null
+++ b/test/DebugInfo/2009-06-15-InlineFuncStart.ll
@@ -0,0 +1,77 @@
+; Test inlined function handling. This test case is copied from 
+; 2009-06-12-InlineFunctStart.ll with one change. In function main, the bb1
+; does not have llvm.dbg.stoppiont intrinsic before llvm.dbg.func.start.
+; RUN: llvm-as < %s | llc
+; RUN: llvm-as < %s | llc -O0
+	%llvm.dbg.anchor.type = type { i32, i32 }
+	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
+	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8* }
+	%llvm.dbg.subprogram.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1 }
+@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 393262, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 1, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 393216, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
+@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 393233, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([52 x i8]* @.str2, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
+@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 393216, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
+@.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
+@.str1 = internal constant [5 x i8] c"/tmp\00", section "llvm.metadata"		; <[5 x i8]*> [#uses=1]
+@.str2 = internal constant [52 x i8] c"4.2.1 (Based on Apple Inc. build 5627) (LLVM build)\00", section "llvm.metadata"		; <[52 x i8]*> [#uses=1]
+@.str3 = internal constant [4 x i8] c"foo\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
+@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 393252, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str4, i32 0, i32 0), { }* null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
+@.str4 = internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
+@llvm.dbg.subprogram5 = internal constant %llvm.dbg.subprogram.type { i32 393262, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([5 x i8]* @.str6, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str6, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 2, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+@.str6 = internal constant [5 x i8] c"main\00", section "llvm.metadata"		; <[5 x i8]*> [#uses=1]
+
+define i32 @foo() nounwind alwaysinline {
+entry:
+	%retval = alloca i32		; <i32*> [#uses=2]
+	%0 = alloca i32		; <i32*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
+	call void @llvm.dbg.stoppoint(i32 1, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	store i32 42, i32* %0, align 4
+	%1 = load i32* %0, align 4		; <i32> [#uses=1]
+	store i32 %1, i32* %retval, align 4
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load i32* %retval		; <i32> [#uses=1]
+	call void @llvm.dbg.stoppoint(i32 1, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
+	ret i32 %retval1
+}
+
+declare void @llvm.dbg.func.start({ }*) nounwind
+
+declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
+
+declare void @llvm.dbg.region.end({ }*) nounwind
+
+define i32 @main() nounwind {
+entry:
+	%retval.i = alloca i32		; <i32*> [#uses=2]
+	%0 = alloca i32		; <i32*> [#uses=2]
+	%retval = alloca i32		; <i32*> [#uses=2]
+	%1 = alloca i32		; <i32*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram5 to { }*))
+        br label %bb1
+
+return:		; preds = %entry
+	%retval1 = load i32* %retval		; <i32> [#uses=1]
+	call void @llvm.dbg.stoppoint(i32 2, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram5 to { }*))
+	ret i32 %retval1
+
+bb1:
+	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*)) nounwind
+	call void @llvm.dbg.stoppoint(i32 1, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*)) nounwind
+	store i32 42, i32* %0, align 4
+	%2 = load i32* %0, align 4		; <i32> [#uses=1]
+	store i32 %2, i32* %retval.i, align 4
+	%retval1.i = load i32* %retval.i		; <i32> [#uses=1]
+	call void @llvm.dbg.stoppoint(i32 1, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*)) nounwind
+	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*)) nounwind
+	store i32 %retval1.i, i32* %1, align 4
+	%3 = load i32* %1, align 4		; <i32> [#uses=1]
+	store i32 %3, i32* %retval, align 4
+	br label %return
+}
diff --git a/test/DebugInfo/2009-06-15-abstract_origin.ll b/test/DebugInfo/2009-06-15-abstract_origin.ll
new file mode 100644
index 000000000000..3029da02a266
--- /dev/null
+++ b/test/DebugInfo/2009-06-15-abstract_origin.ll
@@ -0,0 +1,275 @@
+; RUN: llvm-as < %s | llc -asm-verbose -O0 | not grep ".long	0x0	## DW_AT_abstract_origin"
+
+	%llvm.dbg.anchor.type = type { i32, i32 }
+	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
+	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8*, i32 }
+	%llvm.dbg.composite.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }*, { }*, i32 }
+	%llvm.dbg.derivedtype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }* }
+	%llvm.dbg.global_variable.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1, { }* }
+	%llvm.dbg.subprogram.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1 }
+	%llvm.dbg.variable.type = type { i32, { }*, i8*, { }*, i32, { }* }
+	%struct.AAAAAImageParser = type { %struct.CObject* }
+	%struct.CObject = type { i32 }
+@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
+@.str = internal constant [9 x i8] c"tcase.cc\00", section "llvm.metadata"		; <[9 x i8]*> [#uses=1]
+@.str1 = internal constant [6 x i8] c"/tmp/\00", section "llvm.metadata"		; <[6 x i8]*> [#uses=1]
+@.str2 = internal constant [55 x i8] c"4.2.1 (Based on Apple Inc. build 5646) (LLVM build 00)\00", section "llvm.metadata"		; <[55 x i8]*> [#uses=1]
+@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 4, i8* getelementptr ([9 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([55 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null, i32 0 }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
+@.str3 = internal constant [8 x i8] c"tcase.h\00", section "llvm.metadata"		; <[8 x i8]*> [#uses=1]
+@llvm.dbg.compile_unit4 = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 4, i8* getelementptr ([8 x i8]* @.str3, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([55 x i8]* @.str2, i32 0, i32 0), i1 false, i1 false, i8* null, i32 0 }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
+@.str5 = internal constant [8 x i8] c"CObject\00", section "llvm.metadata"		; <[8 x i8]*> [#uses=1]
+@.str6 = internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
+@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 458788, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str6, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
+@.str7 = internal constant [2 x i8] c"d\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
+@llvm.dbg.derivedtype = internal constant %llvm.dbg.derivedtype.type { i32 458765, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([2 x i8]* @.str7, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*), i32 6, i64 32, i64 32, i64 0, i32 1, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@llvm.dbg.derivedtype8 = internal constant %llvm.dbg.derivedtype.type { i32 458767, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite18 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@llvm.dbg.array = internal constant [3 x { }*] [{ }* null, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype8 to { }*), { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*)], section "llvm.metadata"		; <[3 x { }*]*> [#uses=1]
+@llvm.dbg.composite9 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([3 x { }*]* @llvm.dbg.array to { }*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
+@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
+@.str10 = internal constant [4 x i8] c"set\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
+@.str11 = internal constant [18 x i8] c"_ZN7CObject3setEi\00", section "llvm.metadata"		; <[18 x i8]*> [#uses=1]
+@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str10, i32 0, i32 0), i8* getelementptr ([4 x i8]* @.str10, i32 0, i32 0), i8* getelementptr ([18 x i8]* @.str11, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*), i32 3, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite9 to { }*), i1 false, i1 false }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+@llvm.dbg.array12 = internal constant [2 x { }*] [{ }* null, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype8 to { }*)], section "llvm.metadata"		; <[2 x { }*]*> [#uses=1]
+@llvm.dbg.composite13 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([2 x { }*]* @llvm.dbg.array12 to { }*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
+@.str14 = internal constant [8 x i8] c"release\00", section "llvm.metadata"		; <[8 x i8]*> [#uses=1]
+@.str15 = internal constant [22 x i8] c"_ZN7CObject7releaseEv\00", section "llvm.metadata"		; <[22 x i8]*> [#uses=1]
+@llvm.dbg.subprogram16 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([8 x i8]* @.str14, i32 0, i32 0), i8* getelementptr ([8 x i8]* @.str14, i32 0, i32 0), i8* getelementptr ([22 x i8]* @.str15, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*), i32 4, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite13 to { }*), i1 false, i1 false }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+@llvm.dbg.array17 = internal constant [3 x { }*] [{ }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype to { }*), { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*), { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram16 to { }*)], section "llvm.metadata"		; <[3 x { }*]*> [#uses=1]
+@llvm.dbg.composite18 = internal constant %llvm.dbg.composite.type { i32 458771, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([8 x i8]* @.str5, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*), i32 1, i64 32, i64 32, i64 0, i32 0, { }* null, { }* bitcast ([3 x { }*]* @llvm.dbg.array17 to { }*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
+@llvm.dbg.derivedtype19 = internal constant %llvm.dbg.derivedtype.type { i32 458767, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite18 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@llvm.dbg.array20 = internal constant [2 x { }*] [{ }* null, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype19 to { }*)], section "llvm.metadata"		; <[2 x { }*]*> [#uses=1]
+@llvm.dbg.composite = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([2 x { }*]* @llvm.dbg.array20 to { }*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
+@llvm.dbg.subprogram21 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([8 x i8]* @.str14, i32 0, i32 0), i8* getelementptr ([8 x i8]* @.str14, i32 0, i32 0), i8* getelementptr ([22 x i8]* @.str15, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*), i32 4, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+@llvm.dbg.derivedtype22 = internal constant %llvm.dbg.derivedtype.type { i32 458790, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype19 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@.str23 = internal constant [5 x i8] c"this\00", section "llvm.metadata"		; <[5 x i8]*> [#uses=1]
+@llvm.dbg.variable = internal constant %llvm.dbg.variable.type { i32 459009, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram21 to { }*), i8* getelementptr ([5 x i8]* @.str23, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*), i32 4, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype22 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
+@llvm.dbg.array24 = internal constant [2 x { }*] [{ }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype19 to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype19 to { }*)], section "llvm.metadata"		; <[2 x { }*]*> [#uses=1]
+@llvm.dbg.composite25 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([2 x { }*]* @llvm.dbg.array24 to { }*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
+@.str26 = internal constant [14 x i8] c"ReleaseObject\00", section "llvm.metadata"		; <[14 x i8]*> [#uses=1]
+@.str27 = internal constant [27 x i8] c"_Z13ReleaseObjectP7CObject\00", section "llvm.metadata"		; <[27 x i8]*> [#uses=1]
+@llvm.dbg.subprogram28 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([14 x i8]* @.str26, i32 0, i32 0), i8* getelementptr ([14 x i8]* @.str26, i32 0, i32 0), i8* getelementptr ([27 x i8]* @.str27, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*), i32 10, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite25 to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+@.str29 = internal constant [7 x i8] c"object\00", section "llvm.metadata"		; <[7 x i8]*> [#uses=1]
+@llvm.dbg.variable30 = internal constant %llvm.dbg.variable.type { i32 459009, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram28 to { }*), i8* getelementptr ([7 x i8]* @.str29, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*), i32 10, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype22 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
+@.str31 = internal constant [17 x i8] c"AAAAAImageParser\00", section "llvm.metadata"		; <[17 x i8]*> [#uses=1]
+@.str33 = internal constant [13 x i8] c"mCustomWhite\00", section "llvm.metadata"		; <[13 x i8]*> [#uses=1]
+@llvm.dbg.derivedtype34 = internal constant %llvm.dbg.derivedtype.type { i32 458765, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([13 x i8]* @.str33, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*), i32 21, i64 32, i64 32, i64 0, i32 1, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype19 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@llvm.dbg.derivedtype35 = internal constant %llvm.dbg.derivedtype.type { i32 458767, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite45 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@llvm.dbg.array36 = internal constant [3 x { }*] [{ }* null, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype35 to { }*), { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*)], section "llvm.metadata"		; <[3 x { }*]*> [#uses=1]
+@llvm.dbg.composite37 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([3 x { }*]* @llvm.dbg.array36 to { }*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
+@.str38 = internal constant [18 x i8] c"~AAAAAImageParser\00", section "llvm.metadata"		; <[18 x i8]*> [#uses=1]
+@llvm.dbg.subprogram39 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([18 x i8]* @.str38, i32 0, i32 0), i8* getelementptr ([18 x i8]* @.str38, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*), i32 24, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite37 to { }*), i1 false, i1 false }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+@llvm.dbg.array40 = internal constant [3 x { }*] [{ }* null, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype35 to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype19 to { }*)], section "llvm.metadata"		; <[3 x { }*]*> [#uses=1]
+@llvm.dbg.composite41 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([3 x { }*]* @llvm.dbg.array40 to { }*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
+@.str42 = internal constant [36 x i8] c"_ZN16AAAAAImageParser3setEP7CObject\00", section "llvm.metadata"		; <[36 x i8]*> [#uses=1]
+@llvm.dbg.subprogram43 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str10, i32 0, i32 0), i8* getelementptr ([4 x i8]* @.str10, i32 0, i32 0), i8* getelementptr ([36 x i8]* @.str42, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*), i32 19, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite41 to { }*), i1 false, i1 false }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+@llvm.dbg.array44 = internal constant [3 x { }*] [{ }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype34 to { }*), { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram39 to { }*), { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram43 to { }*)], section "llvm.metadata"		; <[3 x { }*]*> [#uses=1]
+@llvm.dbg.composite45 = internal constant %llvm.dbg.composite.type { i32 458771, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([17 x i8]* @.str31, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*), i32 16, i64 32, i64 32, i64 0, i32 0, { }* null, { }* bitcast ([3 x { }*]* @llvm.dbg.array44 to { }*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
+@llvm.dbg.derivedtype46 = internal constant %llvm.dbg.derivedtype.type { i32 458767, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite45 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@llvm.dbg.array47 = internal constant [2 x { }*] [{ }* null, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype46 to { }*)], section "llvm.metadata"		; <[2 x { }*]*> [#uses=1]
+@llvm.dbg.composite48 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([2 x { }*]* @llvm.dbg.array47 to { }*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
+@.str49 = internal constant [26 x i8] c"_ZN16AAAAAImageParserD2Ev\00", section "llvm.metadata"		; <[26 x i8]*> [#uses=1]
+@llvm.dbg.subprogram50 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([18 x i8]* @.str38, i32 0, i32 0), i8* getelementptr ([18 x i8]* @.str38, i32 0, i32 0), i8* getelementptr ([26 x i8]* @.str49, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*), i32 24, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite48 to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+@llvm.dbg.derivedtype51 = internal constant %llvm.dbg.derivedtype.type { i32 458790, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype46 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@llvm.dbg.variable52 = internal constant %llvm.dbg.variable.type { i32 459009, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram50 to { }*), i8* getelementptr ([5 x i8]* @.str23, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*), i32 24, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype51 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
+@.str53 = internal constant [26 x i8] c"_ZN16AAAAAImageParserD1Ev\00", section "llvm.metadata"		; <[26 x i8]*> [#uses=1]
+@llvm.dbg.subprogram54 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([18 x i8]* @.str38, i32 0, i32 0), i8* getelementptr ([18 x i8]* @.str38, i32 0, i32 0), i8* getelementptr ([26 x i8]* @.str53, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*), i32 24, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite48 to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+@llvm.dbg.variable55 = internal constant %llvm.dbg.variable.type { i32 459009, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram54 to { }*), i8* getelementptr ([5 x i8]* @.str23, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*), i32 24, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype51 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
+@llvm.dbg.array56 = internal constant [1 x { }*] [{ }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*)], section "llvm.metadata"		; <[1 x { }*]*> [#uses=1]
+@llvm.dbg.composite57 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([1 x { }*]* @llvm.dbg.array56 to { }*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
+@.str58 = internal constant [5 x i8] c"main\00", section "llvm.metadata"		; <[5 x i8]*> [#uses=1]
+@llvm.dbg.subprogram59 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([5 x i8]* @.str58, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str58, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str58, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 3, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite57 to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+@.str60 = internal constant [2 x i8] c"C\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
+@llvm.dbg.variable61 = internal constant %llvm.dbg.variable.type { i32 459008, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram59 to { }*), i8* getelementptr ([2 x i8]* @.str60, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 4, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype46 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
+@_ZZ4mainE3C.0 = private constant %struct.AAAAAImageParser zeroinitializer		; <%struct.AAAAAImageParser*> [#uses=2]
+@llvm.dbg.global_variables = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 52 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
+@.str62 = internal constant [14 x i8] c"_ZZ4mainE3C.0\00", section "llvm.metadata"		; <[14 x i8]*> [#uses=1]
+@.str63 = internal constant [4 x i8] c"C.0\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
+@llvm.dbg.global_variable = internal constant %llvm.dbg.global_variable.type { i32 458804, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.global_variables to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([14 x i8]* @.str62, i32 0, i32 0), i8* getelementptr ([4 x i8]* @.str63, i32 0, i32 0), i8* getelementptr ([14 x i8]* @.str62, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 4, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite45 to { }*), i1 false, i1 true, { }* bitcast (%struct.AAAAAImageParser* @_ZZ4mainE3C.0 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.global_variable.type*> [#uses=0]
+
+define void @_ZN16AAAAAImageParserD2Ev(%struct.AAAAAImageParser* %this) nounwind ssp {
+entry:
+	%object_addr.i = alloca %struct.CObject*		; <%struct.CObject**> [#uses=4]
+	%retval.i = alloca %struct.CObject*		; <%struct.CObject**> [#uses=2]
+	%0 = alloca %struct.CObject*		; <%struct.CObject**> [#uses=2]
+	%this_addr = alloca %struct.AAAAAImageParser*		; <%struct.AAAAAImageParser**> [#uses=3]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram50 to { }*))
+	%1 = bitcast %struct.AAAAAImageParser** %this_addr to { }*		; <{ }*> [#uses=1]
+	call void @llvm.dbg.declare({ }* %1, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable52 to { }*))
+	store %struct.AAAAAImageParser* %this, %struct.AAAAAImageParser** %this_addr
+	call void @llvm.dbg.stoppoint(i32 26, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*))
+	%2 = load %struct.AAAAAImageParser** %this_addr, align 4		; <%struct.AAAAAImageParser*> [#uses=1]
+	%3 = getelementptr %struct.AAAAAImageParser* %2, i32 0, i32 0		; <%struct.CObject**> [#uses=1]
+	%4 = load %struct.CObject** %3, align 4		; <%struct.CObject*> [#uses=1]
+	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram28 to { }*)) nounwind
+	%5 = bitcast %struct.CObject** %object_addr.i to { }*		; <{ }*> [#uses=1]
+	call void @llvm.dbg.declare({ }* %5, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable30 to { }*)) nounwind
+	store %struct.CObject* %4, %struct.CObject** %object_addr.i
+	call void @llvm.dbg.stoppoint(i32 11, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*)) nounwind
+	%6 = load %struct.CObject** %object_addr.i, align 4		; <%struct.CObject*> [#uses=1]
+	%7 = icmp ne %struct.CObject* %6, null		; <i1> [#uses=1]
+	br i1 %7, label %bb.i, label %_Z13ReleaseObjectP7CObject.exit
+
+bb.i:		; preds = %entry
+	call void @llvm.dbg.stoppoint(i32 12, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*)) nounwind
+	%8 = load %struct.CObject** %object_addr.i, align 4		; <%struct.CObject*> [#uses=1]
+	call void @_ZN7CObject7releaseEv(%struct.CObject* %8) nounwind
+	br label %_Z13ReleaseObjectP7CObject.exit
+
+_Z13ReleaseObjectP7CObject.exit:		; preds = %bb.i, %entry
+	call void @llvm.dbg.stoppoint(i32 13, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*)) nounwind
+	store %struct.CObject* null, %struct.CObject** %0, align 4
+	%9 = load %struct.CObject** %0, align 4		; <%struct.CObject*> [#uses=1]
+	store %struct.CObject* %9, %struct.CObject** %retval.i, align 4
+	%retval2.i = load %struct.CObject** %retval.i		; <%struct.CObject*> [#uses=0]
+	call void @llvm.dbg.stoppoint(i32 13, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*)) nounwind
+	call void @llvm.dbg.stoppoint(i32 27, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*))
+	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram28 to { }*))
+	br label %bb
+
+bb:		; preds = %_Z13ReleaseObjectP7CObject.exit
+	call void @llvm.dbg.stoppoint(i32 27, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*))
+	br label %return
+
+return:		; preds = %bb
+	call void @llvm.dbg.stoppoint(i32 27, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*))
+	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram50 to { }*))
+	ret void
+}
+
+define linkonce_odr void @_ZN7CObject7releaseEv(%struct.CObject* %this) nounwind ssp {
+entry:
+	%this_addr = alloca %struct.CObject*		; <%struct.CObject**> [#uses=3]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram21 to { }*))
+	%0 = bitcast %struct.CObject** %this_addr to { }*		; <{ }*> [#uses=1]
+	call void @llvm.dbg.declare({ }* %0, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable to { }*))
+	store %struct.CObject* %this, %struct.CObject** %this_addr
+	call void @llvm.dbg.stoppoint(i32 4, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*))
+	%1 = load %struct.CObject** %this_addr, align 4		; <%struct.CObject*> [#uses=1]
+	%2 = getelementptr %struct.CObject* %1, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 0, i32* %2, align 4
+	br label %return
+
+return:		; preds = %entry
+	call void @llvm.dbg.stoppoint(i32 4, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*))
+	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram21 to { }*))
+	ret void
+}
+
+declare void @llvm.dbg.func.start({ }*) nounwind readnone
+
+declare void @llvm.dbg.declare({ }*, { }*) nounwind readnone
+
+declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind readnone
+
+declare void @llvm.dbg.region.end({ }*) nounwind readnone
+
+define void @_ZN16AAAAAImageParserD1Ev(%struct.AAAAAImageParser* %this) nounwind ssp {
+entry:
+	%object_addr.i = alloca %struct.CObject*		; <%struct.CObject**> [#uses=4]
+	%retval.i = alloca %struct.CObject*		; <%struct.CObject**> [#uses=2]
+	%0 = alloca %struct.CObject*		; <%struct.CObject**> [#uses=2]
+	%this_addr = alloca %struct.AAAAAImageParser*		; <%struct.AAAAAImageParser**> [#uses=3]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram54 to { }*))
+	%1 = bitcast %struct.AAAAAImageParser** %this_addr to { }*		; <{ }*> [#uses=1]
+	call void @llvm.dbg.declare({ }* %1, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable55 to { }*))
+	store %struct.AAAAAImageParser* %this, %struct.AAAAAImageParser** %this_addr
+	call void @llvm.dbg.stoppoint(i32 26, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*))
+	%2 = load %struct.AAAAAImageParser** %this_addr, align 4		; <%struct.AAAAAImageParser*> [#uses=1]
+	%3 = getelementptr %struct.AAAAAImageParser* %2, i32 0, i32 0		; <%struct.CObject**> [#uses=1]
+	%4 = load %struct.CObject** %3, align 4		; <%struct.CObject*> [#uses=1]
+	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram28 to { }*)) nounwind
+	%5 = bitcast %struct.CObject** %object_addr.i to { }*		; <{ }*> [#uses=1]
+	call void @llvm.dbg.declare({ }* %5, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable30 to { }*)) nounwind
+	store %struct.CObject* %4, %struct.CObject** %object_addr.i
+	call void @llvm.dbg.stoppoint(i32 11, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*)) nounwind
+	%6 = load %struct.CObject** %object_addr.i, align 4		; <%struct.CObject*> [#uses=1]
+	%7 = icmp ne %struct.CObject* %6, null		; <i1> [#uses=1]
+	br i1 %7, label %bb.i, label %_Z13ReleaseObjectP7CObject.exit
+
+bb.i:		; preds = %entry
+	call void @llvm.dbg.stoppoint(i32 12, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*)) nounwind
+	%8 = load %struct.CObject** %object_addr.i, align 4		; <%struct.CObject*> [#uses=1]
+	call void @_ZN7CObject7releaseEv(%struct.CObject* %8) nounwind
+	br label %_Z13ReleaseObjectP7CObject.exit
+
+_Z13ReleaseObjectP7CObject.exit:		; preds = %bb.i, %entry
+	call void @llvm.dbg.stoppoint(i32 13, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*)) nounwind
+	store %struct.CObject* null, %struct.CObject** %0, align 4
+	%9 = load %struct.CObject** %0, align 4		; <%struct.CObject*> [#uses=1]
+	store %struct.CObject* %9, %struct.CObject** %retval.i, align 4
+	%retval2.i = load %struct.CObject** %retval.i		; <%struct.CObject*> [#uses=0]
+	call void @llvm.dbg.stoppoint(i32 13, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*)) nounwind
+	call void @llvm.dbg.stoppoint(i32 27, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*))
+	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram28 to { }*))
+	br label %bb
+
+bb:		; preds = %_Z13ReleaseObjectP7CObject.exit
+	call void @llvm.dbg.stoppoint(i32 27, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*))
+	br label %return
+
+return:		; preds = %bb
+	call void @llvm.dbg.stoppoint(i32 27, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*))
+	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram54 to { }*))
+	ret void
+}
+
+define i32 @main() ssp {
+entry:
+	%retval = alloca i32		; <i32*> [#uses=2]
+	%C = alloca %struct.AAAAAImageParser*		; <%struct.AAAAAImageParser**> [#uses=3]
+	%0 = alloca i32		; <i32*> [#uses=2]
+	%C.1 = alloca %struct.AAAAAImageParser*		; <%struct.AAAAAImageParser**> [#uses=4]
+	%1 = alloca %struct.AAAAAImageParser*		; <%struct.AAAAAImageParser**> [#uses=3]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram59 to { }*))
+	%2 = bitcast %struct.AAAAAImageParser** %C to { }*		; <{ }*> [#uses=1]
+	call void @llvm.dbg.declare({ }* %2, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable61 to { }*))
+	call void @llvm.dbg.stoppoint(i32 4, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	%3 = call i8* @_Znwm(i32 4)		; <i8*> [#uses=1]
+	%4 = bitcast i8* %3 to %struct.AAAAAImageParser*		; <%struct.AAAAAImageParser*> [#uses=1]
+	store %struct.AAAAAImageParser* %4, %struct.AAAAAImageParser** %1, align 4
+	%5 = load %struct.AAAAAImageParser** %1, align 4		; <%struct.AAAAAImageParser*> [#uses=1]
+	%6 = getelementptr %struct.AAAAAImageParser* %5, i32 0, i32 0		; <%struct.CObject**> [#uses=1]
+	%7 = load %struct.CObject** getelementptr (%struct.AAAAAImageParser* @_ZZ4mainE3C.0, i32 0, i32 0), align 4		; <%struct.CObject*> [#uses=1]
+	store %struct.CObject* %7, %struct.CObject** %6, align 4
+	%8 = load %struct.AAAAAImageParser** %1, align 4		; <%struct.AAAAAImageParser*> [#uses=1]
+	store %struct.AAAAAImageParser* %8, %struct.AAAAAImageParser** %C, align 4
+	call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	%9 = load %struct.AAAAAImageParser** %C, align 4		; <%struct.AAAAAImageParser*> [#uses=1]
+	store %struct.AAAAAImageParser* %9, %struct.AAAAAImageParser** %C.1, align 4
+	%10 = load %struct.AAAAAImageParser** %C.1, align 4		; <%struct.AAAAAImageParser*> [#uses=1]
+	%11 = icmp ne %struct.AAAAAImageParser* %10, null		; <i1> [#uses=1]
+	br i1 %11, label %bb, label %bb1
+		; No predecessors!
+	call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br label %bb
+
+bb:		; preds = %12, %entry
+	call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	%13 = load %struct.AAAAAImageParser** %C.1, align 4		; <%struct.AAAAAImageParser*> [#uses=1]
+	call void @_ZN16AAAAAImageParserD1Ev(%struct.AAAAAImageParser* %13) nounwind
+	%14 = load %struct.AAAAAImageParser** %C.1, align 4		; <%struct.AAAAAImageParser*> [#uses=1]
+	%15 = bitcast %struct.AAAAAImageParser* %14 to i8*		; <i8*> [#uses=1]
+	call void @_ZdlPv(i8* %15) nounwind
+	br label %bb1
+
+bb1:		; preds = %bb, %entry
+	call void @llvm.dbg.stoppoint(i32 6, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	store i32 0, i32* %0, align 4
+	%16 = load i32* %0, align 4		; <i32> [#uses=1]
+	store i32 %16, i32* %retval, align 4
+	br label %return
+
+return:		; preds = %bb1
+	%retval2 = load i32* %retval		; <i32> [#uses=1]
+	call void @llvm.dbg.stoppoint(i32 6, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram59 to { }*))
+	ret i32 %retval2
+}
+
+declare i8* @_Znwm(i32)
+
+declare void @_ZdlPv(i8*) nounwind
diff --git a/test/Feature/vector-cast-constant-exprs.ll b/test/Feature/vector-cast-constant-exprs.ll
new file mode 100644
index 000000000000..ffdc0f080fa6
--- /dev/null
+++ b/test/Feature/vector-cast-constant-exprs.ll
@@ -0,0 +1,37 @@
+; RUN: llvm-as < %s | llvm-dis | not grep {ret.*(}
+
+; All of these constant expressions should fold.
+
+define <2 x float> @ga() {
+  ret <2 x float> fptrunc (<2 x double><double 4.3, double 3.2> to <2 x float>)
+}
+define <2 x double> @gb() {
+  ret <2 x double> fpext (<2 x float><float 2.0, float 8.0> to <2 x double>)
+}
+define <2 x i64> @gd() {
+  ret <2 x i64> zext (<2 x i32><i32 3, i32 4> to <2 x i64>)
+}
+define <2 x i64> @ge() {
+  ret <2 x i64> sext (<2 x i32><i32 3, i32 4> to <2 x i64>)
+}
+define <2 x i32> @gf() {
+  ret <2 x i32> trunc (<2 x i64><i64 3, i64 4> to <2 x i32>)
+}
+define <2 x i32> @gh() {
+  ret <2 x i32> fptoui (<2 x float><float 8.0, float 7.0> to <2 x i32>)
+}
+define <2 x i32> @gi() {
+  ret <2 x i32> fptosi (<2 x float><float 8.0, float 7.0> to <2 x i32>)
+}
+define <2 x float> @gj() {
+  ret <2 x float> uitofp (<2 x i32><i32 8, i32 7> to <2 x float>)
+}
+define <2 x float> @gk() {
+  ret <2 x float> sitofp (<2 x i32><i32 8, i32 7> to <2 x float>)
+}
+define <2 x double> @gl() {
+  ret <2 x double> bitcast (<2 x double><double 4.0, double 3.0> to <2 x double>)
+}
+define <2 x double> @gm() {
+  ret <2 x double> bitcast (<2 x i64><i64 4, i64 3> to <2 x double>)
+}
diff --git a/test/FrontendC++/2009-06-16-DebugInfoCrash.cpp b/test/FrontendC++/2009-06-16-DebugInfoCrash.cpp
new file mode 100644
index 000000000000..b3758d2782ca
--- /dev/null
+++ b/test/FrontendC++/2009-06-16-DebugInfoCrash.cpp
@@ -0,0 +1,10 @@
+// RUN: %llvmgxx -c -emit-llvm %s -o /dev/null -g
+// This crashes if we try to emit debug info for TEMPLATE_DECL members.
+template <class T> class K2PtrVectorBase {};
+template <class T> class K2Vector {};
+template <class U > class K2Vector<U*> : public K2PtrVectorBase<U*> {};
+class ScriptInfoManager {
+  void PostRegister() ;
+  template <class SI> short ReplaceExistingElement(K2Vector<SI*>& v);
+};
+void ScriptInfoManager::PostRegister() {}
diff --git a/test/FrontendC++/2009-06-20-DarwinPPCLayout.cpp b/test/FrontendC++/2009-06-20-DarwinPPCLayout.cpp
new file mode 100644
index 000000000000..eef0e86e2ae5
--- /dev/null
+++ b/test/FrontendC++/2009-06-20-DarwinPPCLayout.cpp
@@ -0,0 +1,32 @@
+// RUN: %llvmgxx -S -m32 -emit-llvm %s -o - | grep baz | grep global | grep {struct.bar}
+// RUN: %llvmgxx -S -m32 -emit-llvm %s -o - | grep ccc | grep global | grep {struct.CC}
+// RUN: %llvmgxx -S -m32 -emit-llvm %s -o - | grep quux | grep global | grep {struct.bar}
+// RUN: %llvmgxx -S -m32 -emit-llvm %s -o - | grep foo | grep global | grep {struct.SRCFilter::FilterEntry}
+// RUN: %llvmgxx -S -m32 -emit-llvm %s -o - | grep {struct.bar} | grep {1 x i32}
+// RUN: %llvmgxx -S -m32 -emit-llvm %s -o - | grep {struct.CC} | grep {struct.bar}
+// RUN: %llvmgxx -S -m32 -emit-llvm %s -o - | grep {struct.bar} | grep {1 x i32}
+// RUN: %llvmgxx -S -m32 -emit-llvm %s -o - | grep {struct.SRCFilter::FilterEntry} | not grep {1 x i32}
+// XFAIL: *
+// XTARGET: powerpc-apple-darwin
+
+template<class _T1, class _T2>     struct payre     {
+  _T1 first;
+  _T2 second;
+  payre()       : first(), second() {    }
+};
+struct KBFP {
+  double mCutoffFrequency;
+};
+class SRCFilter {
+  struct FilterEntry: public payre<KBFP, float*>{};
+  static FilterEntry foo;
+};
+SRCFilter::FilterEntry SRCFilter::foo;    // 12 bytes
+payre<KBFP, float*> baz;                  // 16 bytes
+class CC {                                // 16 bytes
+  public: payre<KBFP, float*> x;          
+};
+class CC ccc;
+
+struct bar { KBFP x; float* y;};          // 16 bytes
+struct bar quux;
diff --git a/test/FrontendC/2009-06-14-HighlyAligned.c b/test/FrontendC/2009-06-14-HighlyAligned.c
new file mode 100644
index 000000000000..4678b75b6daf
--- /dev/null
+++ b/test/FrontendC/2009-06-14-HighlyAligned.c
@@ -0,0 +1,8 @@
+// RUN: %llvmgcc %s -S
+// PR4332
+
+static int highly_aligned __attribute__((aligned(4096)));
+
+int f() {
+	return highly_aligned;
+}
diff --git a/test/FrontendC/2009-06-18-StaticInitTailPadPack.c b/test/FrontendC/2009-06-18-StaticInitTailPadPack.c
new file mode 100644
index 000000000000..17f35c04a9ec
--- /dev/null
+++ b/test/FrontendC/2009-06-18-StaticInitTailPadPack.c
@@ -0,0 +1,26 @@
+// RUN: %llvmgcc %s -S -o -
+// rdar://6983634
+
+  typedef struct A *Foo;
+#pragma pack(push, 2)
+  struct Bar {
+    Foo             f1;
+    unsigned short  f2;
+    float           f3;
+  };
+  struct Baz {
+    struct Bar   f1;
+    struct Bar   f2;
+  };
+  struct Qux {
+    unsigned long   f1;
+    struct Baz             f2;
+  };
+extern const struct Qux Bork;
+const struct Qux Bork = {
+  0,
+  {
+    {0},
+    {0}
+  }
+};
diff --git a/test/Transforms/ConstProp/2009-06-20-constexpr-zero-lhs.ll b/test/Transforms/ConstProp/2009-06-20-constexpr-zero-lhs.ll
new file mode 100644
index 000000000000..332260590ae1
--- /dev/null
+++ b/test/Transforms/ConstProp/2009-06-20-constexpr-zero-lhs.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-as < %s | llvm-dis | not grep ptrtoint
+; PR4424
+@G = external global i32
+@test1 = constant i32 sdiv (i32 0, i32 ptrtoint (i32* @G to i32))
+@test2 = constant i32 udiv (i32 0, i32 ptrtoint (i32* @G to i32))
+@test3 = constant i32 srem (i32 0, i32 ptrtoint (i32* @G to i32))
+@test4 = constant i32 urem (i32 0, i32 ptrtoint (i32* @G to i32))
+@test5 = constant i32 lshr (i32 0, i32 ptrtoint (i32* @G to i32))
+@test6 = constant i32 ashr (i32 0, i32 ptrtoint (i32* @G to i32))
+@test7 = constant i32 shl (i32 0, i32 ptrtoint (i32* @G to i32))
+
diff --git a/test/Transforms/ConstProp/div-zero.ll b/test/Transforms/ConstProp/div-zero.ll
new file mode 100644
index 000000000000..166c643dcc99
--- /dev/null
+++ b/test/Transforms/ConstProp/div-zero.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {ret i32 0}
+; PR4424
+declare void @ext()
+
+define i32 @foo(i32 %ptr) {
+entry:
+        %zero = sub i32 %ptr, %ptr              ; <i32> [#uses=1]
+        %div_zero = sdiv i32 %zero, ptrtoint (i32* getelementptr (i32* null,
+i32 1) to i32)             ; <i32> [#uses=1]
+        ret i32 %div_zero
+}
+
diff --git a/test/Transforms/GVN/2009-06-17-InvalidPRE.ll b/test/Transforms/GVN/2009-06-17-InvalidPRE.ll
new file mode 100644
index 000000000000..c8982c86cb9e
--- /dev/null
+++ b/test/Transforms/GVN/2009-06-17-InvalidPRE.ll
@@ -0,0 +1,72 @@
+; RUN: llvm-as < %s | opt -gvn -enable-load-pre | llvm-dis | not grep pre1
+; GVN load pre was hoisting the loads at %13 and %16 up to bb4.outer.  
+; This is invalid as it bypasses the check for %m.0.ph==null in bb4. 
+; ModuleID = 'mbuf.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+  %struct.mbuf = type { %struct.mbuf*, %struct.mbuf*, i32, i8*, i16, i16, i32 }
+
+define void @m_adj(%struct.mbuf* %mp, i32 %req_len) nounwind optsize {
+entry:
+  %0 = icmp eq %struct.mbuf* %mp, null    ; <i1> [#uses=1]
+  %1 = icmp slt i32 %req_len, 0   ; <i1> [#uses=1]
+  %or.cond = or i1 %1, %0   ; <i1> [#uses=1]
+  br i1 %or.cond, label %return, label %bb4.preheader
+
+bb4.preheader:    ; preds = %entry
+  br label %bb4.outer
+
+bb2:    ; preds = %bb1
+  %2 = sub i32 %len.0, %13   ; <i32> [#uses=1]
+  %3 = getelementptr %struct.mbuf* %m.0.ph, i32 0, i32 2    ; <i32*> [#uses=1]
+  store i32 0, i32* %3, align 4
+  %4 = getelementptr %struct.mbuf* %m.0.ph, i32 0, i32 0    ; <%struct.mbuf**> [#uses=1]
+  %5 = load %struct.mbuf** %4, align 4    ; <%struct.mbuf*> [#uses=1]
+  br label %bb4.outer
+
+bb4.outer:    ; preds = %bb4.preheader, %bb2
+  %m.0.ph = phi %struct.mbuf* [ %5, %bb2 ], [ %mp, %bb4.preheader ]   ; <%struct.mbuf*> [#uses=7]
+  %len.0.ph = phi i32 [ %2, %bb2 ], [ %req_len, %bb4.preheader ]    ; <i32> [#uses=1]
+  %6 = icmp ne %struct.mbuf* %m.0.ph, null    ; <i1> [#uses=1]
+  %7 = getelementptr %struct.mbuf* %m.0.ph, i32 0, i32 2    ; <i32*> [#uses=1]
+  %8 = getelementptr %struct.mbuf* %m.0.ph, i32 0, i32 2   ; <i32*> [#uses=1]
+  %9 = getelementptr %struct.mbuf* %m.0.ph, i32 0, i32 3   ; <i8**> [#uses=1]
+  %10 = getelementptr %struct.mbuf* %m.0.ph, i32 0, i32 3   ; <i8**> [#uses=1]
+  br label %bb4
+
+bb4:    ; preds = %bb4.outer, %bb3
+  %len.0 = phi i32 [ 0, %bb3 ], [ %len.0.ph, %bb4.outer ]   ; <i32> [#uses=6]
+  %11 = icmp sgt i32 %len.0, 0    ; <i1> [#uses=1]
+  %12 = and i1 %11, %6    ; <i1> [#uses=1]
+  br i1 %12, label %bb1, label %bb7
+
+bb1:    ; preds = %bb4
+  %13 = load i32* %7, align 4    ; <i32> [#uses=3]
+  %14 = icmp sgt i32 %13, %len.0    ; <i1> [#uses=1]
+  br i1 %14, label %bb3, label %bb2
+
+bb3:    ; preds = %bb1
+  %15 = sub i32 %13, %len.0    ; <i32> [#uses=1]
+  store i32 %15, i32* %8, align 4
+  %16 = load i8** %9, align 4    ; <i8*> [#uses=1]
+  %17 = getelementptr i8* %16, i32 %len.0   ; <i8*> [#uses=1]
+  store i8* %17, i8** %10, align 4
+  br label %bb4
+
+bb7:    ; preds = %bb4
+  %18 = getelementptr %struct.mbuf* %mp, i32 0, i32 5   ; <i16*> [#uses=1]
+  %19 = load i16* %18, align 2    ; <i16> [#uses=1]
+  %20 = zext i16 %19 to i32   ; <i32> [#uses=1]
+  %21 = and i32 %20, 2    ; <i32> [#uses=1]
+  %22 = icmp eq i32 %21, 0    ; <i1> [#uses=1]
+  br i1 %22, label %return, label %bb8
+
+bb8:    ; preds = %bb7
+  %23 = sub i32 %req_len, %len.0    ; <i32> [#uses=1]
+  %24 = getelementptr %struct.mbuf* %mp, i32 0, i32 6   ; <i32*> [#uses=1]
+  store i32 %23, i32* %24, align 4
+  ret void
+
+return:   ; preds = %bb7, %entry
+  ret void
+}
diff --git a/test/Transforms/GVN/pre-single-pred.ll b/test/Transforms/GVN/pre-single-pred.ll
index b735ea9827c2..cb71617caed0 100644
--- a/test/Transforms/GVN/pre-single-pred.ll
+++ b/test/Transforms/GVN/pre-single-pred.ll
@@ -1,6 +1,7 @@
 ; RUN: llvm-as < %s | opt -gvn -enable-load-pre | llvm-dis | not grep {tmp3 = load}
 
-define i32 @f(i32* nocapture %p, i32 %n) nounwind {
+@p = external global i32
+define i32 @f(i32 %n) nounwind {
 entry:
 	br label %for.cond
 
@@ -13,9 +14,9 @@ for.cond.for.end_crit_edge:		; preds = %for.cond
 	br label %for.end
 
 for.body:		; preds = %for.cond
-	%tmp3 = load i32* %p		; <i32> [#uses=1]
+	%tmp3 = load i32* @p		; <i32> [#uses=1]
 	%dec = add i32 %tmp3, -1		; <i32> [#uses=2]
-	store i32 %dec, i32* %p
+	store i32 %dec, i32* @p
 	%cmp6 = icmp slt i32 %dec, 0		; <i1> [#uses=1]
 	br i1 %cmp6, label %for.body.for.end_crit_edge, label %for.inc
 
@@ -27,6 +28,6 @@ for.inc:		; preds = %for.body
 	br label %for.cond
 
 for.end:		; preds = %for.body.for.end_crit_edge, %for.cond.for.end_crit_edge
-	%tmp9 = load i32* %p		; <i32> [#uses=1]
+	%tmp9 = load i32* @p		; <i32> [#uses=1]
 	ret i32 %tmp9
 }
diff --git a/test/Transforms/IndVarSimplify/ashr-tripcount.ll b/test/Transforms/IndVarSimplify/ashr-tripcount.ll
new file mode 100644
index 000000000000..7b2cad28bf07
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/ashr-tripcount.ll
@@ -0,0 +1,107 @@
+; RUN: llvm-as < %s | opt -indvars | llvm-dis > %t
+; RUN: grep sext %t | count 1
+
+; Indvars should be able to eliminate all of the sign extensions
+; inside the loop.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+@pow_2_tab = external constant [0 x float]		; <[0 x float]*> [#uses=1]
+@pow_2_025_tab = external constant [0 x float]		; <[0 x float]*> [#uses=1]
+@i_pow_2_tab = external constant [0 x float]		; <[0 x float]*> [#uses=1]
+@i_pow_2_025_tab = external constant [0 x float]		; <[0 x float]*> [#uses=1]
+
+define void @foo(i32 %gain, i32 %noOfLines, i32* %quaSpectrum, float* %iquaSpectrum, float* %pow4_3_tab_ptr) nounwind {
+entry:
+	%t0 = icmp slt i32 %gain, 0		; <i1> [#uses=1]
+	br i1 %t0, label %bb1, label %bb2
+
+bb1:		; preds = %entry
+	%t1 = sub i32 0, %gain		; <i32> [#uses=1]
+	%t2 = sub i32 0, %gain		; <i32> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %bb1, %entry
+	%pow_2_tab.pn = phi [0 x float]* [ @i_pow_2_tab, %bb1 ], [ @pow_2_tab, %entry ]		; <[0 x float]*> [#uses=1]
+	%.pn3.in.in = phi i32 [ %t1, %bb1 ], [ %gain, %entry ]		; <i32> [#uses=1]
+	%pow_2_025_tab.pn = phi [0 x float]* [ @i_pow_2_025_tab, %bb1 ], [ @pow_2_025_tab, %entry ]		; <[0 x float]*> [#uses=1]
+	%.pn2.in.in = phi i32 [ %t2, %bb1 ], [ %gain, %entry ]		; <i32> [#uses=1]
+	%.pn3.in = ashr i32 %.pn3.in.in, 2		; <i32> [#uses=1]
+	%.pn2.in = and i32 %.pn2.in.in, 3		; <i32> [#uses=1]
+	%.pn3 = sext i32 %.pn3.in to i64		; <i64> [#uses=1]
+	%.pn2 = zext i32 %.pn2.in to i64		; <i64> [#uses=1]
+	%.pn.in = getelementptr [0 x float]* %pow_2_tab.pn, i64 0, i64 %.pn3		; <float*> [#uses=1]
+	%.pn1.in = getelementptr [0 x float]* %pow_2_025_tab.pn, i64 0, i64 %.pn2		; <float*> [#uses=1]
+	%.pn = load float* %.pn.in		; <float> [#uses=1]
+	%.pn1 = load float* %.pn1.in		; <float> [#uses=1]
+	%invQuantizer.0 = fmul float %.pn, %.pn1		; <float> [#uses=4]
+	%t3 = ashr i32 %noOfLines, 2		; <i32> [#uses=1]
+	%t4 = icmp sgt i32 %t3, 0		; <i1> [#uses=1]
+	br i1 %t4, label %bb.nph, label %return
+
+bb.nph:		; preds = %bb2
+	%t5 = ashr i32 %noOfLines, 2		; <i32> [#uses=1]
+	br label %bb3
+
+bb3:		; preds = %bb4, %bb.nph
+	%i.05 = phi i32 [ %t49, %bb4 ], [ 0, %bb.nph ]		; <i32> [#uses=9]
+	%k.04 = phi i32 [ %t48, %bb4 ], [ 0, %bb.nph ]		; <i32> [#uses=1]
+	%t6 = sext i32 %i.05 to i64		; <i64> [#uses=1]
+	%t7 = getelementptr i32* %quaSpectrum, i64 %t6		; <i32*> [#uses=1]
+	%t8 = load i32* %t7, align 4		; <i32> [#uses=1]
+	%t9 = zext i32 %t8 to i64		; <i64> [#uses=1]
+	%t10 = getelementptr float* %pow4_3_tab_ptr, i64 %t9		; <float*> [#uses=1]
+	%t11 = load float* %t10, align 4		; <float> [#uses=1]
+	%t12 = or i32 %i.05, 1		; <i32> [#uses=1]
+	%t13 = sext i32 %t12 to i64		; <i64> [#uses=1]
+	%t14 = getelementptr i32* %quaSpectrum, i64 %t13		; <i32*> [#uses=1]
+	%t15 = load i32* %t14, align 4		; <i32> [#uses=1]
+	%t16 = zext i32 %t15 to i64		; <i64> [#uses=1]
+	%t17 = getelementptr float* %pow4_3_tab_ptr, i64 %t16		; <float*> [#uses=1]
+	%t18 = load float* %t17, align 4		; <float> [#uses=1]
+	%t19 = or i32 %i.05, 2		; <i32> [#uses=1]
+	%t20 = sext i32 %t19 to i64		; <i64> [#uses=1]
+	%t21 = getelementptr i32* %quaSpectrum, i64 %t20		; <i32*> [#uses=1]
+	%t22 = load i32* %t21, align 4		; <i32> [#uses=1]
+	%t23 = zext i32 %t22 to i64		; <i64> [#uses=1]
+	%t24 = getelementptr float* %pow4_3_tab_ptr, i64 %t23		; <float*> [#uses=1]
+	%t25 = load float* %t24, align 4		; <float> [#uses=1]
+	%t26 = or i32 %i.05, 3		; <i32> [#uses=1]
+	%t27 = sext i32 %t26 to i64		; <i64> [#uses=1]
+	%t28 = getelementptr i32* %quaSpectrum, i64 %t27		; <i32*> [#uses=1]
+	%t29 = load i32* %t28, align 4		; <i32> [#uses=1]
+	%t30 = zext i32 %t29 to i64		; <i64> [#uses=1]
+	%t31 = getelementptr float* %pow4_3_tab_ptr, i64 %t30		; <float*> [#uses=1]
+	%t32 = load float* %t31, align 4		; <float> [#uses=1]
+	%t33 = fmul float %t11, %invQuantizer.0		; <float> [#uses=1]
+	%t34 = sext i32 %i.05 to i64		; <i64> [#uses=1]
+	%t35 = getelementptr float* %iquaSpectrum, i64 %t34		; <float*> [#uses=1]
+	store float %t33, float* %t35, align 4
+	%t36 = or i32 %i.05, 1		; <i32> [#uses=1]
+	%t37 = fmul float %t18, %invQuantizer.0		; <float> [#uses=1]
+	%t38 = sext i32 %t36 to i64		; <i64> [#uses=1]
+	%t39 = getelementptr float* %iquaSpectrum, i64 %t38		; <float*> [#uses=1]
+	store float %t37, float* %t39, align 4
+	%t40 = or i32 %i.05, 2		; <i32> [#uses=1]
+	%t41 = fmul float %t25, %invQuantizer.0		; <float> [#uses=1]
+	%t42 = sext i32 %t40 to i64		; <i64> [#uses=1]
+	%t43 = getelementptr float* %iquaSpectrum, i64 %t42		; <float*> [#uses=1]
+	store float %t41, float* %t43, align 4
+	%t44 = or i32 %i.05, 3		; <i32> [#uses=1]
+	%t45 = fmul float %t32, %invQuantizer.0		; <float> [#uses=1]
+	%t46 = sext i32 %t44 to i64		; <i64> [#uses=1]
+	%t47 = getelementptr float* %iquaSpectrum, i64 %t46		; <float*> [#uses=1]
+	store float %t45, float* %t47, align 4
+	%t48 = add i32 %k.04, 1		; <i32> [#uses=2]
+	%t49 = add i32 %i.05, 4		; <i32> [#uses=1]
+	br label %bb4
+
+bb4:		; preds = %bb3
+	%t50 = icmp sgt i32 %t5, %t48		; <i1> [#uses=1]
+	br i1 %t50, label %bb3, label %bb4.return_crit_edge
+
+bb4.return_crit_edge:		; preds = %bb4
+	br label %return
+
+return:		; preds = %bb4.return_crit_edge, %bb2
+	ret void
+}
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate_6.ll b/test/Transforms/IndVarSimplify/loop_evaluate_6.ll
index 35fbf52b7ef0..0d17a801a9cc 100644
--- a/test/Transforms/IndVarSimplify/loop_evaluate_6.ll
+++ b/test/Transforms/IndVarSimplify/loop_evaluate_6.ll
@@ -1,7 +1,9 @@
 ; RUN: llvm-as < %s | opt -indvars -loop-deletion | llvm-dis | grep phi | count 1
+; XFAIL: *
 
-; Indvars should be able to evaluate this loop, allowing loop deletion
-; to delete it.
+; Indvars can't evaluate this loop, because ScalarEvolution can't compute
+; an exact trip count, because it doesn't know if dividing by the stride will
+; have a remainder. It could be done with more aggressive VRP though.
 
 define i32 @test(i32 %x_offs) nounwind readnone {
 entry:
diff --git a/test/Transforms/IndVarSimplify/masked-iv.ll b/test/Transforms/IndVarSimplify/masked-iv.ll
index c7583c9655a8..f77b9357d75a 100644
--- a/test/Transforms/IndVarSimplify/masked-iv.ll
+++ b/test/Transforms/IndVarSimplify/masked-iv.ll
@@ -1,4 +1,6 @@
-; RUN: llvm-as < %s | opt -indvars | llvm-dis | grep trunc | count 1
+; RUN: llvm-as < %s | opt -indvars | llvm-dis > %t
+; RUN: not grep trunc %t
+; RUN: grep and %t | count 1
 
 ; Indvars should do the IV arithmetic in the canonical IV type (i64),
 ; and only use one truncation.
diff --git a/test/Transforms/IndVarSimplify/pointer.ll b/test/Transforms/IndVarSimplify/pointer.ll
new file mode 100644
index 000000000000..7ad116191925
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pointer.ll
@@ -0,0 +1,38 @@
+; RUN: llvm-as < %s | opt -indvars | llvm-dis > %t
+; RUN: grep {%exitcond = icmp eq i64 %indvar.next, %n} %t
+; RUN: grep {getelementptr i8\\* %A, i64 %indvar} %t
+; RUN: grep getelementptr %t | count 1
+; RUN: grep add %t | count 1
+; RUN: not grep scevgep %t
+; RUN: not grep ptrtoint %t
+
+; Indvars should be able to expand the pointer-arithmetic
+; IV into an integer IV indexing into a simple getelementptr.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64"
+
+define void @foo(i8* %A, i64 %n) nounwind {
+entry:
+	%0 = icmp eq i64 %n, 0		; <i1> [#uses=1]
+	br i1 %0, label %return, label %bb.nph
+
+bb.nph:		; preds = %entry
+	%1 = getelementptr i8* %A, i64 %n		; <i8*> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb1, %bb.nph
+	%q.01 = phi i8* [ %2, %bb1 ], [ %A, %bb.nph ]		; <i8*> [#uses=2]
+	store i8 0, i8* %q.01, align 1
+	%2 = getelementptr i8* %q.01, i64 1		; <i8*> [#uses=2]
+	br label %bb1
+
+bb1:		; preds = %bb
+	%3 = icmp eq i8* %1, %2		; <i1> [#uses=1]
+	br i1 %3, label %bb1.return_crit_edge, label %bb
+
+bb1.return_crit_edge:		; preds = %bb1
+	br label %return
+
+return:		; preds = %bb1.return_crit_edge, %entry
+	ret void
+}
diff --git a/test/Transforms/IndVarSimplify/shrunk-constant.ll b/test/Transforms/IndVarSimplify/shrunk-constant.ll
new file mode 100644
index 000000000000..0b2ecaf38d77
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/shrunk-constant.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-as < %s | opt -scalar-evolution -analyze -disable-output \
+; RUN:  | grep {\\-->  (zext i4 {-7,+,-8}<loop> to i32)}
+
+define fastcc void @foo() nounwind {
+entry:
+	br label %loop
+
+loop:
+	%i = phi i32 [ 0, %entry ], [ %t2, %loop ]
+	%t0 = add i32 %i, 9
+	%t1 = and i32 %t0, 9
+        store i32 %t1, i32* null
+	%t2 = add i32 %i, 8
+	br label %loop
+}
diff --git a/test/Transforms/InstCombine/2009-06-11-StoreAddrSpace.ll b/test/Transforms/InstCombine/2009-06-11-StoreAddrSpace.ll
index e86fed3c3644..ffb55a792148 100644
--- a/test/Transforms/InstCombine/2009-06-11-StoreAddrSpace.ll
+++ b/test/Transforms/InstCombine/2009-06-11-StoreAddrSpace.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep store
+; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {store i32 0,}
 ; PR4366
 
 define void @a() {
diff --git a/test/Transforms/InstCombine/2009-06-16-SRemDemandedBits.ll b/test/Transforms/InstCombine/2009-06-16-SRemDemandedBits.ll
new file mode 100644
index 000000000000..82b223a4ddc9
--- /dev/null
+++ b/test/Transforms/InstCombine/2009-06-16-SRemDemandedBits.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep srem
+; PR3439
+
+define i32 @a(i32 %x) nounwind {
+entry:
+	%rem = srem i32 %x, 2
+	%and = and i32 %rem, 2
+	ret i32 %and
+}
diff --git a/test/Transforms/InstCombine/select-load-call.ll b/test/Transforms/InstCombine/select-load-call.ll
new file mode 100644
index 000000000000..367356e6311a
--- /dev/null
+++ b/test/Transforms/InstCombine/select-load-call.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {ret i32 1}
+
+declare void @test2()
+
+define i32 @test(i1 %cond, i32 *%P) {
+  %A = alloca i32
+  store i32 1, i32* %P
+  store i32 1, i32* %A
+
+  call void @test2() readonly
+
+  %P2 = select i1 %cond, i32 *%P, i32* %A
+  %V = load i32* %P2
+  ret i32 %V
+}
diff --git a/test/Transforms/InstCombine/trunc-mask-ext.ll b/test/Transforms/InstCombine/trunc-mask-ext.ll
new file mode 100644
index 000000000000..7e3d8446acff
--- /dev/null
+++ b/test/Transforms/InstCombine/trunc-mask-ext.ll
@@ -0,0 +1,38 @@
+; RUN: llvm-as < %s | opt -instcombine | llvm-dis > %t
+; RUN: not grep zext %t
+; RUN: not grep sext %t
+
+; Instcombine should be able to eliminate all of these ext casts.
+
+declare void @use(i32)
+
+define i64 @foo(i64 %a) {
+  %b = trunc i64 %a to i32
+  %c = and i32 %b, 15
+  %d = zext i32 %c to i64
+  call void @use(i32 %b)
+  ret i64 %d
+}
+define i64 @bar(i64 %a) {
+  %b = trunc i64 %a to i32
+  %c = shl i32 %b, 4
+  %q = ashr i32 %c, 4
+  %d = sext i32 %q to i64
+  call void @use(i32 %b)
+  ret i64 %d
+}
+define i64 @goo(i64 %a) {
+  %b = trunc i64 %a to i32
+  %c = and i32 %b, 8
+  %d = zext i32 %c to i64
+  call void @use(i32 %b)
+  ret i64 %d
+}
+define i64 @hoo(i64 %a) {
+  %b = trunc i64 %a to i32
+  %c = and i32 %b, 8
+  %x = xor i32 %c, 8
+  %d = zext i32 %x to i64
+  call void @use(i32 %b)
+  ret i64 %d
+}
diff --git a/test/Transforms/InstCombine/vector-casts-0.ll b/test/Transforms/InstCombine/vector-casts-0.ll
new file mode 100644
index 000000000000..ae5b8a9c7c42
--- /dev/null
+++ b/test/Transforms/InstCombine/vector-casts-0.ll
@@ -0,0 +1,55 @@
+; RUN: llvm-as < %s | opt -instcombine
+
+define void @convert(<2 x i32>* %dst.addr, <2 x i64> %src) nounwind {
+entry:
+	%val = trunc <2 x i64> %src to <2 x i32>		; <<2 x i32>> [#uses=1]
+	%add = add <2 x i32> %val, <i32 1, i32 1>		; <<2 x i32>> [#uses=1]
+	store <2 x i32> %add, <2 x i32>* %dst.addr
+	ret void
+}
+
+define <2 x i65> @foo(<2 x i64> %t) {
+  %a = trunc <2 x i64> %t to <2 x i32>
+  %b = zext <2 x i32> %a to <2 x i65>
+  ret <2 x i65> %b
+}
+define <2 x i64> @bar(<2 x i65> %t) {
+  %a = trunc <2 x i65> %t to <2 x i32>
+  %b = zext <2 x i32> %a to <2 x i64>
+  ret <2 x i64> %b
+}
+define <2 x i65> @foos(<2 x i64> %t) {
+  %a = trunc <2 x i64> %t to <2 x i32>
+  %b = sext <2 x i32> %a to <2 x i65>
+  ret <2 x i65> %b
+}
+define <2 x i64> @bars(<2 x i65> %t) {
+  %a = trunc <2 x i65> %t to <2 x i32>
+  %b = sext <2 x i32> %a to <2 x i64>
+  ret <2 x i64> %b
+}
+define <2 x i64> @quxs(<2 x i64> %t) {
+  %a = trunc <2 x i64> %t to <2 x i32>
+  %b = sext <2 x i32> %a to <2 x i64>
+  ret <2 x i64> %b
+}
+define <2 x i64> @quxt(<2 x i64> %t) {
+  %a = shl <2 x i64> %t, <i64 32, i64 32>
+  %b = ashr <2 x i64> %a, <i64 32, i64 32>
+  ret <2 x i64> %b
+}
+define <2 x double> @fa(<2 x double> %t) {
+  %a = fptrunc <2 x double> %t to <2 x float>
+  %b = fpext <2 x float> %a to <2 x double>
+  ret <2 x double> %b
+}
+define <2 x double> @fb(<2 x double> %t) {
+  %a = fptoui <2 x double> %t to <2 x i64>
+  %b = uitofp <2 x i64> %a to <2 x double>
+  ret <2 x double> %b
+}
+define <2 x double> @fc(<2 x double> %t) {
+  %a = fptosi <2 x double> %t to <2 x i64>
+  %b = sitofp <2 x i64> %a to <2 x double>
+  ret <2 x double> %b
+}
diff --git a/test/Transforms/InstCombine/vector-casts-1.ll b/test/Transforms/InstCombine/vector-casts-1.ll
new file mode 100644
index 000000000000..a73a84ac1e0d
--- /dev/null
+++ b/test/Transforms/InstCombine/vector-casts-1.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-as < %s | opt -instcombine > %t
+; RUN: not grep trunc %t
+; RUN: not grep ashr %t
+
+; This turns into a&1 != 0
+define <2 x i1> @a(<2 x i64> %a) {
+  %t = trunc <2 x i64> %a to <2 x i1>
+  ret <2 x i1> %t
+}
+; The ashr turns into an lshr.
+define <2 x i64> @b(<2 x i64> %a) {
+  %b = and <2 x i64> %a, <i64 65535, i64 65535>
+  %t = ashr <2 x i64> %b, <i64 1, i64 1>
+  ret <2 x i64> %t
+}
diff --git a/test/Transforms/JumpThreading/branch-no-const.ll b/test/Transforms/JumpThreading/branch-no-const.ll
new file mode 100644
index 000000000000..0ea2431ca099
--- /dev/null
+++ b/test/Transforms/JumpThreading/branch-no-const.ll
@@ -0,0 +1,21 @@
+; RUN: llvm-as < %s | opt -jump-threading | llvm-dis | not grep phi
+
+declare i8 @mcguffin()
+
+define i32 @test(i1 %foo, i8 %b) {
+entry:
+  %a = call i8 @mcguffin()
+  br i1 %foo, label %bb1, label %bb2
+bb1:
+  br label %jt
+bb2:
+  br label %jt
+jt:
+  %x = phi i8 [%a, %bb1], [%b, %bb2]
+  %A = icmp eq i8 %x, %a
+  br i1 %A, label %rt, label %rf
+rt:
+  ret i32 7
+rf:
+  ret i32 8
+}
diff --git a/test/Transforms/JumpThreading/dup-cond.ll b/test/Transforms/JumpThreading/dup-cond.ll
new file mode 100644
index 000000000000..e20d939316a6
--- /dev/null
+++ b/test/Transforms/JumpThreading/dup-cond.ll
@@ -0,0 +1,30 @@
+; RUN: llvm-as < %s | opt -jump-threading -die | llvm-dis | grep icmp | count 1
+
+declare void @f1()
+declare void @f2()
+declare void @f3()
+
+define i32 @test(i32 %A) {
+	%tmp455 = icmp eq i32 %A, 42
+	br i1 %tmp455, label %BB1, label %BB2
+        
+BB2:
+	call void @f1()
+	br label %BB1
+        
+
+BB1:
+	%tmp459 = icmp eq i32 %A, 42
+	br i1 %tmp459, label %BB3, label %BB4
+
+BB3:
+	call void @f2()
+        ret i32 3
+
+BB4:
+	call void @f3()
+	ret i32 4
+}
+
+
+
diff --git a/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll b/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
new file mode 100644
index 000000000000..be1372ded3a7
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
@@ -0,0 +1,18 @@
+; RUN: llvm-as < %s | opt -analyze -iv-users -disable-output | grep {Stride i64 {3,+,2}<loop>:}
+
+; The value of %r is dependent on a polynomial iteration expression.
+
+define i64 @foo(i64 %n) {
+entry:
+  br label %loop
+
+loop:
+  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+  %indvar.next = add i64 %indvar, 1
+  %c = icmp eq i64 %indvar.next, %n
+  br i1 %c, label %exit, label %loop
+
+exit:
+  %r = mul i64 %indvar.next, %indvar.next
+  ret i64 %r
+}
diff --git a/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll b/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
new file mode 100644
index 000000000000..d0e7ed787284
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
@@ -0,0 +1,557 @@
+; RUN: llvm-as < %s | opt -simplifycfg -disable-output
+; END.
+	%struct..4._102 = type { %struct.QVectorData* }
+	%struct..5._125 = type { %struct.QMapData* }
+	%struct.QAbstractTextDocumentLayout = type { %struct.QObject }
+	%struct.QBasicAtomic = type { i32 }
+	%struct.QFont = type { %struct.QFontPrivate*, i32 }
+	%struct.QFontMetrics = type { %struct.QFontPrivate* }
+	%struct.QFontPrivate = type opaque
+	%"struct.QFragmentMap<QTextBlockData>" = type { %struct.QFragmentMapData }
+	%struct.QFragmentMapData = type { %"struct.QFragmentMapData::._154", i32 }
+	%"struct.QFragmentMapData::._154" = type { %"struct.QFragmentMapData::Header"* }
+	%"struct.QFragmentMapData::Header" = type { i32, i32, i32, i32, i32, i32, i32, i32 }
+	%"struct.QHash<uint,QHashDummyValue>" = type { %"struct.QHash<uint,QHashDummyValue>::._152" }
+	%"struct.QHash<uint,QHashDummyValue>::._152" = type { %struct.QHashData* }
+	%struct.QHashData = type { %"struct.QHashData::Node"*, %"struct.QHashData::Node"**, %struct.QBasicAtomic, i32, i32, i16, i16, i32, i8 }
+	%"struct.QHashData::Node" = type { %"struct.QHashData::Node"*, i32 }
+	%"struct.QList<QObject*>::._92" = type { %struct.QListData }
+	%"struct.QList<QPointer<QObject> >" = type { %"struct.QList<QObject*>::._92" }
+	%struct.QListData = type { %"struct.QListData::Data"* }
+	%"struct.QListData::Data" = type { %struct.QBasicAtomic, i32, i32, i32, i8, [1 x i8*] }
+	%"struct.QMap<QUrl,QVariant>" = type { %struct..5._125 }
+	%struct.QMapData = type { %"struct.QMapData::Node"*, [12 x %"struct.QMapData::Node"*], %struct.QBasicAtomic, i32, i32, i32, i8 }
+	%"struct.QMapData::Node" = type { %"struct.QMapData::Node"*, [1 x %"struct.QMapData::Node"*] }
+	%struct.QObject = type { i32 (...)**, %struct.QObjectData* }
+	%struct.QObjectData = type { i32 (...)**, %struct.QObject*, %struct.QObject*, %"struct.QList<QPointer<QObject> >", i8, [3 x i8], i32, i32 }
+	%struct.QObjectPrivate = type { %struct.QObjectData, i32, %struct.QObject*, %"struct.QList<QPointer<QObject> >", %"struct.QVector<QAbstractTextDocumentLayout::Selection>", %struct.QString }
+	%struct.QPaintDevice = type { i32 (...)**, i16 }
+	%struct.QPainter = type { %struct.QPainterPrivate* }
+	%struct.QPainterPrivate = type opaque
+	%struct.QPointF = type { double, double }
+	%struct.QPrinter = type { %struct.QPaintDevice, %struct.QPrinterPrivate* }
+	%struct.QPrinterPrivate = type opaque
+	%struct.QRectF = type { double, double, double, double }
+	%"struct.QSet<uint>" = type { %"struct.QHash<uint,QHashDummyValue>" }
+	%"struct.QSharedDataPointer<QTextFormatPrivate>" = type { %struct.QTextFormatPrivate* }
+	%struct.QString = type { %"struct.QString::Data"* }
+	%"struct.QString::Data" = type { %struct.QBasicAtomic, i32, i32, i16*, i8, i8, [1 x i16] }
+	%struct.QTextBlockFormat = type { %struct.QTextFormat }
+	%struct.QTextBlockGroup = type { %struct.QAbstractTextDocumentLayout }
+	%struct.QTextDocumentConfig = type { %struct.QString }
+	%struct.QTextDocumentPrivate = type { %struct.QObjectPrivate, %struct.QString, %"struct.QVector<QAbstractTextDocumentLayout::Selection>", i1, i32, i32, i1, i32, i32, i32, i32, i1, %struct.QTextFormatCollection, %struct.QTextBlockGroup*, %struct.QAbstractTextDocumentLayout*, %"struct.QFragmentMap<QTextBlockData>", %"struct.QFragmentMap<QTextBlockData>", i32, %"struct.QList<QPointer<QObject> >", %"struct.QList<QPointer<QObject> >", %"struct.QMap<QUrl,QVariant>", %"struct.QMap<QUrl,QVariant>", %"struct.QMap<QUrl,QVariant>", %struct.QTextDocumentConfig, i1, i1, %struct.QPointF }
+	%struct.QTextFormat = type { %"struct.QSharedDataPointer<QTextFormatPrivate>", i32 }
+	%struct.QTextFormatCollection = type { %"struct.QVector<QAbstractTextDocumentLayout::Selection>", %"struct.QVector<QAbstractTextDocumentLayout::Selection>", %"struct.QSet<uint>", %struct.QFont }
+	%struct.QTextFormatPrivate = type opaque
+	%"struct.QVector<QAbstractTextDocumentLayout::Selection>" = type { %struct..4._102 }
+	%struct.QVectorData = type { %struct.QBasicAtomic, i32, i32, i8 }
+
+define void @_ZNK13QTextDocument5printEP8QPrinter(%struct.QAbstractTextDocumentLayout* %this, %struct.QPrinter* %printer) {
+entry:
+	%tmp = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=2]
+	%tmp.upgrd.1 = alloca %struct.QRectF, align 16		; <%struct.QRectF*> [#uses=5]
+	%tmp2 = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=3]
+	%tmp.upgrd.2 = alloca %struct.QFontMetrics, align 16		; <%struct.QFontMetrics*> [#uses=4]
+	%tmp.upgrd.3 = alloca %struct.QFont, align 16		; <%struct.QFont*> [#uses=4]
+	%tmp3 = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=2]
+	%p = alloca %struct.QPainter, align 16		; <%struct.QPainter*> [#uses=14]
+	%body = alloca %struct.QRectF, align 16		; <%struct.QRectF*> [#uses=9]
+        %foo = alloca double, align 8
+        %bar = alloca double, align 8
+	%pageNumberPos = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=4]
+	%scaledPageSize = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=6]
+	%printerPageSize = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=3]
+	%fmt = alloca %struct.QTextBlockFormat, align 16		; <%struct.QTextBlockFormat*> [#uses=5]
+	%font = alloca %struct.QFont, align 16		; <%struct.QFont*> [#uses=5]
+	%tmp.upgrd.4 = call %struct.QTextDocumentPrivate* @_ZNK13QTextDocument6d_funcEv( %struct.QAbstractTextDocumentLayout* %this )		; <%struct.QTextDocumentPrivate*> [#uses=5]
+	%tmp.upgrd.5 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
+	call void @_ZN8QPainterC1EP12QPaintDevice( %struct.QPainter* %p, %struct.QPaintDevice* %tmp.upgrd.5 )
+	%tmp.upgrd.6 = invoke i1 @_ZNK8QPainter8isActiveEv( %struct.QPainter* %p )
+			to label %invcont unwind label %cleanup329		; <i1> [#uses=1]
+invcont:		; preds = %entry
+	br i1 %tmp.upgrd.6, label %cond_next, label %cleanup328
+cond_next:		; preds = %invcont
+	%tmp8 = invoke %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument14documentLayoutEv( %struct.QAbstractTextDocumentLayout* %this )
+			to label %invcont7 unwind label %cleanup329		; <%struct.QAbstractTextDocumentLayout*> [#uses=0]
+invcont7:		; preds = %cond_next
+	%tmp10 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26		; <%struct.QPointF*> [#uses=1]
+	call void @_ZN7QPointFC1Edd( %struct.QPointF* %tmp, double 0.000000e+00, double 0.000000e+00 )
+	call void @_ZN6QRectFC1ERK7QPointFRK6QSizeF( %struct.QRectF* %body, %struct.QPointF* %tmp, %struct.QPointF* %tmp10 )
+	call void @_ZN7QPointFC1Ev( %struct.QPointF* %pageNumberPos )
+	%tmp12 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26		; <%struct.QPointF*> [#uses=1]
+	%tmp13 = call i1 @_ZNK6QSizeF7isValidEv( %struct.QPointF* %tmp12 )		; <i1> [#uses=1]
+	br i1 %tmp13, label %cond_next15, label %bb
+cond_next15:		; preds = %invcont7
+	%tmp17 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26		; <%struct.QPointF*> [#uses=1]
+	%tmp.upgrd.7 = call double @_ZNK6QSizeF6heightEv( %struct.QPointF* %tmp17 )		; <double> [#uses=1]
+	%tmp18 = fcmp oeq double %tmp.upgrd.7, 0x41DFFFFFFFC00000		; <i1> [#uses=1]
+	br i1 %tmp18, label %bb, label %cond_next20
+cond_next20:		; preds = %cond_next15
+	br label %bb21
+bb:		; preds = %cond_next15, %invcont7
+	br label %bb21
+bb21:		; preds = %bb, %cond_next20
+	%iftmp.406.0 = phi i1 [ false, %bb ], [ true, %cond_next20 ]		; <i1> [#uses=1]
+	br i1 %iftmp.406.0, label %cond_true24, label %cond_false
+cond_true24:		; preds = %bb21
+	%tmp.upgrd.8 = invoke i32 @_Z13qt_defaultDpiv( )
+			to label %invcont25 unwind label %cleanup329		; <i32> [#uses=1]
+invcont25:		; preds = %cond_true24
+	%tmp26 = sitofp i32 %tmp.upgrd.8 to double		; <double> [#uses=2]
+	%tmp30 = invoke %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument14documentLayoutEv( %struct.QAbstractTextDocumentLayout* %this )
+			to label %invcont29 unwind label %cleanup329		; <%struct.QAbstractTextDocumentLayout*> [#uses=1]
+invcont29:		; preds = %invcont25
+	%tmp32 = invoke %struct.QPaintDevice* @_ZNK27QAbstractTextDocumentLayout11paintDeviceEv( %struct.QAbstractTextDocumentLayout* %tmp30 )
+			to label %invcont31 unwind label %cleanup329		; <%struct.QPaintDevice*> [#uses=3]
+invcont31:		; preds = %invcont29
+	%tmp34 = icmp eq %struct.QPaintDevice* %tmp32, null		; <i1> [#uses=1]
+	br i1 %tmp34, label %cond_next42, label %cond_true35
+cond_true35:		; preds = %invcont31
+	%tmp38 = invoke i32 @_ZNK12QPaintDevice11logicalDpiXEv( %struct.QPaintDevice* %tmp32 )
+			to label %invcont37 unwind label %cleanup329		; <i32> [#uses=1]
+invcont37:		; preds = %cond_true35
+	%tmp38.upgrd.9 = sitofp i32 %tmp38 to double		; <double> [#uses=1]
+	%tmp41 = invoke i32 @_ZNK12QPaintDevice11logicalDpiYEv( %struct.QPaintDevice* %tmp32 )
+			to label %invcont40 unwind label %cleanup329		; <i32> [#uses=1]
+invcont40:		; preds = %invcont37
+	%tmp41.upgrd.10 = sitofp i32 %tmp41 to double		; <double> [#uses=1]
+	br label %cond_next42
+cond_next42:		; preds = %invcont40, %invcont31
+	%sourceDpiY.2 = phi double [ %tmp41.upgrd.10, %invcont40 ], [ %tmp26, %invcont31 ]		; <double> [#uses=1]
+	%sourceDpiX.2 = phi double [ %tmp38.upgrd.9, %invcont40 ], [ %tmp26, %invcont31 ]		; <double> [#uses=1]
+	%tmp44 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
+	%tmp46 = invoke i32 @_ZNK12QPaintDevice11logicalDpiXEv( %struct.QPaintDevice* %tmp44 )
+			to label %invcont45 unwind label %cleanup329		; <i32> [#uses=1]
+invcont45:		; preds = %cond_next42
+	%tmp46.upgrd.11 = sitofp i32 %tmp46 to double		; <double> [#uses=1]
+	%tmp48 = fdiv double %tmp46.upgrd.11, %sourceDpiX.2		; <double> [#uses=2]
+	%tmp50 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
+	%tmp52 = invoke i32 @_ZNK12QPaintDevice11logicalDpiYEv( %struct.QPaintDevice* %tmp50 )
+			to label %invcont51 unwind label %cleanup329		; <i32> [#uses=1]
+invcont51:		; preds = %invcont45
+	%tmp52.upgrd.12 = sitofp i32 %tmp52 to double		; <double> [#uses=1]
+	%tmp54 = fdiv double %tmp52.upgrd.12, %sourceDpiY.2		; <double> [#uses=2]
+	invoke void @_ZN8QPainter5scaleEdd( %struct.QPainter* %p, double %tmp48, double %tmp54 )
+			to label %invcont57 unwind label %cleanup329
+invcont57:		; preds = %invcont51
+	%tmp.upgrd.13 = getelementptr %struct.QPointF* %scaledPageSize, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp60 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26, i32 0		; <double*> [#uses=1]
+	%tmp61 = load double* %tmp60		; <double> [#uses=1]
+	store double %tmp61, double* %tmp.upgrd.13
+	%tmp62 = getelementptr %struct.QPointF* %scaledPageSize, i32 0, i32 1		; <double*> [#uses=1]
+	%tmp63 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26, i32 1		; <double*> [#uses=1]
+	%tmp64 = load double* %tmp63		; <double> [#uses=1]
+	store double %tmp64, double* %tmp62
+	%tmp65 = call double* @_ZN6QSizeF6rwidthEv( %struct.QPointF* %scaledPageSize )		; <double*> [#uses=2]
+	%tmp67 = load double* %tmp65		; <double> [#uses=1]
+	%tmp69 = mul double %tmp67, %tmp48		; <double> [#uses=1]
+	store double %tmp69, double* %tmp65
+	%tmp71 = call double* @_ZN6QSizeF7rheightEv( %struct.QPointF* %scaledPageSize )		; <double*> [#uses=2]
+	%tmp73 = load double* %tmp71		; <double> [#uses=1]
+	%tmp75 = mul double %tmp73, %tmp54		; <double> [#uses=1]
+	store double %tmp75, double* %tmp71
+	%tmp78 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
+	%tmp80 = invoke i32 @_ZNK12QPaintDevice6heightEv( %struct.QPaintDevice* %tmp78 )
+			to label %invcont79 unwind label %cleanup329		; <i32> [#uses=1]
+invcont79:		; preds = %invcont57
+	%tmp82 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
+	%tmp84 = invoke i32 @_ZNK12QPaintDevice5widthEv( %struct.QPaintDevice* %tmp82 )
+			to label %invcont83 unwind label %cleanup329		; <i32> [#uses=1]
+invcont83:		; preds = %invcont79
+	%tmp80.upgrd.14 = sitofp i32 %tmp80 to double		; <double> [#uses=1]
+	%tmp84.upgrd.15 = sitofp i32 %tmp84 to double		; <double> [#uses=1]
+	call void @_ZN6QSizeFC1Edd( %struct.QPointF* %printerPageSize, double %tmp84.upgrd.15, double %tmp80.upgrd.14 )
+	%tmp85 = call double @_ZNK6QSizeF6heightEv( %struct.QPointF* %printerPageSize )		; <double> [#uses=1]
+	%tmp86 = call double @_ZNK6QSizeF6heightEv( %struct.QPointF* %scaledPageSize )		; <double> [#uses=1]
+	%tmp87 = fdiv double %tmp85, %tmp86		; <double> [#uses=1]
+	%tmp88 = call double @_ZNK6QSizeF5widthEv( %struct.QPointF* %printerPageSize )		; <double> [#uses=1]
+	%tmp89 = call double @_ZNK6QSizeF5widthEv( %struct.QPointF* %scaledPageSize )		; <double> [#uses=1]
+	%tmp90 = fdiv double %tmp88, %tmp89		; <double> [#uses=1]
+	invoke void @_ZN8QPainter5scaleEdd( %struct.QPainter* %p, double %tmp90, double %tmp87 )
+			to label %cond_next194 unwind label %cleanup329
+cond_false:		; preds = %bb21
+	%tmp.upgrd.16 = getelementptr %struct.QAbstractTextDocumentLayout* %this, i32 0, i32 0		; <%struct.QObject*> [#uses=1]
+	%tmp95 = invoke %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument5cloneEP7QObject( %struct.QAbstractTextDocumentLayout* %this, %struct.QObject* %tmp.upgrd.16 )
+			to label %invcont94 unwind label %cleanup329		; <%struct.QAbstractTextDocumentLayout*> [#uses=9]
+invcont94:		; preds = %cond_false
+	%tmp99 = invoke %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument14documentLayoutEv( %struct.QAbstractTextDocumentLayout* %tmp95 )
+			to label %invcont98 unwind label %cleanup329		; <%struct.QAbstractTextDocumentLayout*> [#uses=1]
+invcont98:		; preds = %invcont94
+	%tmp101 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
+			to label %invcont100 unwind label %cleanup329		; <%struct.QPaintDevice*> [#uses=1]
+invcont100:		; preds = %invcont98
+	invoke void @_ZN27QAbstractTextDocumentLayout14setPaintDeviceEP12QPaintDevice( %struct.QAbstractTextDocumentLayout* %tmp99, %struct.QPaintDevice* %tmp101 )
+			to label %invcont103 unwind label %cleanup329
+invcont103:		; preds = %invcont100
+	%tmp105 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
+			to label %invcont104 unwind label %cleanup329		; <%struct.QPaintDevice*> [#uses=1]
+invcont104:		; preds = %invcont103
+	%tmp107 = invoke i32 @_ZNK12QPaintDevice11logicalDpiYEv( %struct.QPaintDevice* %tmp105 )
+			to label %invcont106 unwind label %cleanup329		; <i32> [#uses=1]
+invcont106:		; preds = %invcont104
+	%tmp108 = sitofp i32 %tmp107 to double		; <double> [#uses=1]
+	%tmp109 = mul double %tmp108, 0x3FE93264C993264C		; <double> [#uses=1]
+	%tmp109.upgrd.17 = fptosi double %tmp109 to i32		; <i32> [#uses=3]
+	%tmp.upgrd.18 = call %struct.QTextBlockGroup* @_ZNK13QTextDocument9rootFrameEv( %struct.QAbstractTextDocumentLayout* %tmp95 )		; <%struct.QTextBlockGroup*> [#uses=1]
+	invoke void @_ZNK10QTextFrame11frameFormatEv( %struct.QTextBlockFormat* sret  %fmt, %struct.QTextBlockGroup* %tmp.upgrd.18 )
+			to label %invcont111 unwind label %cleanup329
+invcont111:		; preds = %invcont106
+	%tmp112 = sitofp i32 %tmp109.upgrd.17 to double		; <double> [#uses=1]
+	invoke void @_ZN16QTextFrameFormat9setMarginEd( %struct.QTextBlockFormat* %fmt, double %tmp112 )
+			to label %invcont114 unwind label %cleanup192
+invcont114:		; preds = %invcont111
+	%tmp116 = call %struct.QTextBlockGroup* @_ZNK13QTextDocument9rootFrameEv( %struct.QAbstractTextDocumentLayout* %tmp95 )		; <%struct.QTextBlockGroup*> [#uses=1]
+	invoke void @_ZN10QTextFrame14setFrameFormatERK16QTextFrameFormat( %struct.QTextBlockGroup* %tmp116, %struct.QTextBlockFormat* %fmt )
+			to label %invcont117 unwind label %cleanup192
+invcont117:		; preds = %invcont114
+	%tmp119 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
+			to label %invcont118 unwind label %cleanup192		; <%struct.QPaintDevice*> [#uses=1]
+invcont118:		; preds = %invcont117
+	%tmp121 = invoke i32 @_ZNK12QPaintDevice6heightEv( %struct.QPaintDevice* %tmp119 )
+			to label %invcont120 unwind label %cleanup192		; <i32> [#uses=1]
+invcont120:		; preds = %invcont118
+	%tmp121.upgrd.19 = sitofp i32 %tmp121 to double		; <double> [#uses=1]
+	%tmp123 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
+			to label %invcont122 unwind label %cleanup192		; <%struct.QPaintDevice*> [#uses=1]
+invcont122:		; preds = %invcont120
+	%tmp125 = invoke i32 @_ZNK12QPaintDevice5widthEv( %struct.QPaintDevice* %tmp123 )
+			to label %invcont124 unwind label %cleanup192		; <i32> [#uses=1]
+invcont124:		; preds = %invcont122
+	%tmp125.upgrd.20 = sitofp i32 %tmp125 to double		; <double> [#uses=1]
+	call void @_ZN6QRectFC1Edddd( %struct.QRectF* %tmp.upgrd.1, double 0.000000e+00, double 0.000000e+00, double %tmp125.upgrd.20, double %tmp121.upgrd.19 )
+	%tmp126 = getelementptr %struct.QRectF* %body, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp127 = getelementptr %struct.QRectF* %tmp.upgrd.1, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp128 = load double* %tmp127		; <double> [#uses=1]
+	store double %tmp128, double* %tmp126
+	%tmp129 = getelementptr %struct.QRectF* %body, i32 0, i32 1		; <double*> [#uses=1]
+	%tmp130 = getelementptr %struct.QRectF* %tmp.upgrd.1, i32 0, i32 1		; <double*> [#uses=1]
+	%tmp131 = load double* %tmp130		; <double> [#uses=1]
+	store double %tmp131, double* %tmp129
+	%tmp132 = getelementptr %struct.QRectF* %body, i32 0, i32 2		; <double*> [#uses=1]
+	%tmp133 = getelementptr %struct.QRectF* %tmp.upgrd.1, i32 0, i32 2		; <double*> [#uses=1]
+	%tmp134 = load double* %tmp133		; <double> [#uses=1]
+	store double %tmp134, double* %tmp132
+	%tmp135 = getelementptr %struct.QRectF* %body, i32 0, i32 3		; <double*> [#uses=1]
+	%tmp136 = getelementptr %struct.QRectF* %tmp.upgrd.1, i32 0, i32 3		; <double*> [#uses=1]
+	%tmp137 = load double* %tmp136		; <double> [#uses=1]
+	store double %tmp137, double* %tmp135
+	%tmp138 = call double @_ZNK6QRectF6heightEv( %struct.QRectF* %body )		; <double> [#uses=1]
+	%tmp139 = sitofp i32 %tmp109.upgrd.17 to double		; <double> [#uses=1]
+	%tmp140 = sub double %tmp138, %tmp139		; <double> [#uses=1]
+	%tmp142 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
+			to label %invcont141 unwind label %cleanup192		; <%struct.QPaintDevice*> [#uses=1]
+invcont141:		; preds = %invcont124
+	invoke void @_ZNK13QTextDocument11defaultFontEv( %struct.QFont* sret  %tmp.upgrd.3, %struct.QAbstractTextDocumentLayout* %tmp95 )
+			to label %invcont144 unwind label %cleanup192
+invcont144:		; preds = %invcont141
+	invoke void @_ZN12QFontMetricsC1ERK5QFontP12QPaintDevice( %struct.QFontMetrics* %tmp.upgrd.2, %struct.QFont* %tmp.upgrd.3, %struct.QPaintDevice* %tmp142 )
+			to label %invcont146 unwind label %cleanup173
+invcont146:		; preds = %invcont144
+	%tmp149 = invoke i32 @_ZNK12QFontMetrics6ascentEv( %struct.QFontMetrics* %tmp.upgrd.2 )
+			to label %invcont148 unwind label %cleanup168		; <i32> [#uses=1]
+invcont148:		; preds = %invcont146
+	%tmp149.upgrd.21 = sitofp i32 %tmp149 to double		; <double> [#uses=1]
+	%tmp150 = add double %tmp140, %tmp149.upgrd.21		; <double> [#uses=1]
+	%tmp152 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
+			to label %invcont151 unwind label %cleanup168		; <%struct.QPaintDevice*> [#uses=1]
+invcont151:		; preds = %invcont148
+	%tmp154 = invoke i32 @_ZNK12QPaintDevice11logicalDpiYEv( %struct.QPaintDevice* %tmp152 )
+			to label %invcont153 unwind label %cleanup168		; <i32> [#uses=1]
+invcont153:		; preds = %invcont151
+	%tmp155 = mul i32 %tmp154, 5		; <i32> [#uses=1]
+	%tmp156 = sdiv i32 %tmp155, 72		; <i32> [#uses=1]
+	%tmp156.upgrd.22 = sitofp i32 %tmp156 to double		; <double> [#uses=1]
+	%tmp157 = add double %tmp150, %tmp156.upgrd.22		; <double> [#uses=1]
+	%tmp158 = call double @_ZNK6QRectF5widthEv( %struct.QRectF* %body )		; <double> [#uses=1]
+	%tmp159 = sitofp i32 %tmp109.upgrd.17 to double		; <double> [#uses=1]
+	%tmp160 = sub double %tmp158, %tmp159		; <double> [#uses=1]
+	call void @_ZN7QPointFC1Edd( %struct.QPointF* %tmp2, double %tmp160, double %tmp157 )
+	%tmp161 = getelementptr %struct.QPointF* %pageNumberPos, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp162 = getelementptr %struct.QPointF* %tmp2, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp163 = load double* %tmp162		; <double> [#uses=1]
+	store double %tmp163, double* %tmp161
+	%tmp164 = getelementptr %struct.QPointF* %pageNumberPos, i32 0, i32 1		; <double*> [#uses=1]
+	%tmp165 = getelementptr %struct.QPointF* %tmp2, i32 0, i32 1		; <double*> [#uses=1]
+	%tmp166 = load double* %tmp165		; <double> [#uses=1]
+	store double %tmp166, double* %tmp164
+	invoke void @_ZN12QFontMetricsD1Ev( %struct.QFontMetrics* %tmp.upgrd.2 )
+			to label %cleanup171 unwind label %cleanup173
+cleanup168:		; preds = %invcont151, %invcont148, %invcont146
+	invoke void @_ZN12QFontMetricsD1Ev( %struct.QFontMetrics* %tmp.upgrd.2 )
+			to label %cleanup173 unwind label %cleanup173
+cleanup171:		; preds = %invcont153
+	invoke void @_ZN5QFontD1Ev( %struct.QFont* %tmp.upgrd.3 )
+			to label %finally170 unwind label %cleanup192
+cleanup173:		; preds = %cleanup168, %cleanup168, %invcont153, %invcont144
+	invoke void @_ZN5QFontD1Ev( %struct.QFont* %tmp.upgrd.3 )
+			to label %cleanup192 unwind label %cleanup192
+finally170:		; preds = %cleanup171
+	invoke void @_ZNK13QTextDocument11defaultFontEv( %struct.QFont* sret  %font, %struct.QAbstractTextDocumentLayout* %tmp95 )
+			to label %invcont177 unwind label %cleanup192
+invcont177:		; preds = %finally170
+	invoke void @_ZN5QFont12setPointSizeEi( %struct.QFont* %font, i32 10 )
+			to label %invcont179 unwind label %cleanup187
+invcont179:		; preds = %invcont177
+	invoke void @_ZN13QTextDocument14setDefaultFontERK5QFont( %struct.QAbstractTextDocumentLayout* %tmp95, %struct.QFont* %font )
+			to label %invcont181 unwind label %cleanup187
+invcont181:		; preds = %invcont179
+	call void @_ZNK6QRectF4sizeEv( %struct.QPointF* sret  %tmp3, %struct.QRectF* %body )
+	invoke void @_ZN13QTextDocument11setPageSizeERK6QSizeF( %struct.QAbstractTextDocumentLayout* %tmp95, %struct.QPointF* %tmp3 )
+			to label %cleanup185 unwind label %cleanup187
+cleanup185:		; preds = %invcont181
+	invoke void @_ZN5QFontD1Ev( %struct.QFont* %font )
+			to label %cleanup190 unwind label %cleanup192
+cleanup187:		; preds = %invcont181, %invcont179, %invcont177
+	invoke void @_ZN5QFontD1Ev( %struct.QFont* %font )
+			to label %cleanup192 unwind label %cleanup192
+cleanup190:		; preds = %cleanup185
+	invoke void @_ZN16QTextFrameFormatD1Ev( %struct.QTextBlockFormat* %fmt )
+			to label %cond_next194 unwind label %cleanup329
+cleanup192:		; preds = %cleanup187, %cleanup187, %cleanup185, %finally170, %cleanup173, %cleanup173, %cleanup171, %invcont141, %invcont124, %invcont122, %invcont120, %invcont118, %invcont117, %invcont114, %invcont111
+	invoke void @_ZN16QTextFrameFormatD1Ev( %struct.QTextBlockFormat* %fmt )
+			to label %cleanup329 unwind label %cleanup329
+cond_next194:		; preds = %cleanup190, %invcont83
+	%clonedDoc.1 = phi %struct.QAbstractTextDocumentLayout* [ null, %invcont83 ], [ %tmp95, %cleanup190 ]		; <%struct.QAbstractTextDocumentLayout*> [#uses=3]
+	%doc.1 = phi %struct.QAbstractTextDocumentLayout* [ %this, %invcont83 ], [ %tmp95, %cleanup190 ]		; <%struct.QAbstractTextDocumentLayout*> [#uses=2]
+	%tmp197 = invoke i1 @_ZNK8QPrinter13collateCopiesEv( %struct.QPrinter* %printer )
+			to label %invcont196 unwind label %cleanup329		; <i1> [#uses=1]
+invcont196:		; preds = %cond_next194
+	br i1 %tmp197, label %cond_true200, label %cond_false204
+cond_true200:		; preds = %invcont196
+	%tmp2000 = load double* %foo
+	store double %tmp2000, double* %bar
+	%tmp203 = invoke i32 @_ZNK8QPrinter9numCopiesEv( %struct.QPrinter* %printer )
+			to label %cond_next208 unwind label %cleanup329		; <i32> [#uses=1]
+cond_false204:		; preds = %invcont196
+	%tmp2001 = load double* %foo
+	store double %tmp2001, double* %bar
+	%tmp207 = invoke i32 @_ZNK8QPrinter9numCopiesEv( %struct.QPrinter* %printer )
+			to label %cond_next208 unwind label %cleanup329		; <i32> [#uses=1]
+cond_next208:		; preds = %invcont206, %invcont202
+	%pageCopies.0 = phi i32 [ %tmp203, %cond_true200 ], [ 1, %cond_false204 ]		; <i32> [#uses=2]
+	%docCopies.0 = phi i32 [ 1, %cond_true200 ], [ %tmp207, %cond_false204 ]		; <i32> [#uses=2]
+	%tmp211 = invoke i32 @_ZNK8QPrinter8fromPageEv( %struct.QPrinter* %printer )
+			to label %invcont210 unwind label %cleanup329		; <i32> [#uses=3]
+invcont210:		; preds = %cond_next208
+	%tmp214 = invoke i32 @_ZNK8QPrinter6toPageEv( %struct.QPrinter* %printer )
+			to label %invcont213 unwind label %cleanup329		; <i32> [#uses=3]
+invcont213:		; preds = %invcont210
+	%tmp216 = icmp eq i32 %tmp211, 0		; <i1> [#uses=1]
+	br i1 %tmp216, label %cond_true217, label %cond_next225
+cond_true217:		; preds = %invcont213
+	%tmp219 = icmp eq i32 %tmp214, 0		; <i1> [#uses=1]
+	br i1 %tmp219, label %cond_true220, label %cond_next225
+cond_true220:		; preds = %cond_true217
+	%tmp223 = invoke i32 @_ZNK13QTextDocument9pageCountEv( %struct.QAbstractTextDocumentLayout* %doc.1 )
+			to label %invcont222 unwind label %cleanup329		; <i32> [#uses=1]
+invcont222:		; preds = %cond_true220
+	br label %cond_next225
+cond_next225:		; preds = %invcont222, %cond_true217, %invcont213
+	%toPage.1 = phi i32 [ %tmp223, %invcont222 ], [ %tmp214, %cond_true217 ], [ %tmp214, %invcont213 ]		; <i32> [#uses=2]
+	%fromPage.1 = phi i32 [ 1, %invcont222 ], [ %tmp211, %cond_true217 ], [ %tmp211, %invcont213 ]		; <i32> [#uses=2]
+	%tmp.page = invoke i32 @_ZNK8QPrinter9pageOrderEv( %struct.QPrinter* %printer )
+			to label %invcont227 unwind label %cleanup329		; <i32> [#uses=1]
+invcont227:		; preds = %cond_next225
+	%tmp228 = icmp eq i32 %tmp.page, 1		; <i1> [#uses=1]
+	br i1 %tmp228, label %cond_true230, label %cond_next234
+cond_true230:		; preds = %invcont227
+	br label %cond_next234
+cond_next234:		; preds = %cond_true230, %invcont227
+	%ascending.1 = phi i1 [ false, %cond_true230 ], [ true, %invcont227 ]		; <i1> [#uses=1]
+	%toPage.2 = phi i32 [ %fromPage.1, %cond_true230 ], [ %toPage.1, %invcont227 ]		; <i32> [#uses=1]
+	%fromPage.2 = phi i32 [ %toPage.1, %cond_true230 ], [ %fromPage.1, %invcont227 ]		; <i32> [#uses=1]
+	br label %bb309
+bb237:		; preds = %cond_true313, %cond_next293
+	%iftmp.410.4 = phi i1 [ %iftmp.410.5, %cond_true313 ], [ %iftmp.410.1, %cond_next293 ]		; <i1> [#uses=1]
+	%page.4 = phi i32 [ %fromPage.2, %cond_true313 ], [ %page.3, %cond_next293 ]		; <i32> [#uses=4]
+	br label %bb273
+invcont240:		; preds = %cond_true277
+	%tmp242 = icmp eq i32 %tmp241, 2		; <i1> [#uses=1]
+	br i1 %tmp242, label %bb252, label %cond_next244
+cond_next244:		; preds = %invcont240
+	%tmp247 = invoke i32 @_ZNK8QPrinter12printerStateEv( %struct.QPrinter* %printer )
+			to label %invcont246 unwind label %cleanup329		; <i32> [#uses=1]
+invcont246:		; preds = %cond_next244
+	%tmp248 = icmp eq i32 %tmp247, 3		; <i1> [#uses=1]
+	br i1 %tmp248, label %bb252, label %bb253
+bb252:		; preds = %invcont246, %invcont240
+	br label %bb254
+bb253:		; preds = %invcont246
+	br label %bb254
+bb254:		; preds = %bb253, %bb252
+	%iftmp.410.0 = phi i1 [ true, %bb252 ], [ false, %bb253 ]		; <i1> [#uses=2]
+	br i1 %iftmp.410.0, label %UserCanceled, label %cond_next258
+cond_next258:		; preds = %bb254
+	invoke fastcc void @_Z9printPageiP8QPainterPK13QTextDocumentRK6QRectFRK7QPointF( i32 %page.4, %struct.QPainter* %p, %struct.QAbstractTextDocumentLayout* %doc.1, %struct.QRectF* %body, %struct.QPointF* %pageNumberPos )
+			to label %invcont261 unwind label %cleanup329
+invcont261:		; preds = %cond_next258
+	%tmp263 = add i32 %pageCopies.0, -1		; <i32> [#uses=1]
+	%tmp265 = icmp sgt i32 %tmp263, %j.4		; <i1> [#uses=1]
+	br i1 %tmp265, label %cond_true266, label %cond_next270
+cond_true266:		; preds = %invcont261
+	%tmp269 = invoke i1 @_ZN8QPrinter7newPageEv( %struct.QPrinter* %printer )
+			to label %cond_next270 unwind label %cleanup329		; <i1> [#uses=0]
+cond_next270:		; preds = %cond_true266, %invcont261
+	%tmp272 = add i32 %j.4, 1		; <i32> [#uses=1]
+	br label %bb273
+bb273:		; preds = %cond_next270, %bb237
+	%iftmp.410.1 = phi i1 [ %iftmp.410.4, %bb237 ], [ %iftmp.410.0, %cond_next270 ]		; <i1> [#uses=2]
+	%j.4 = phi i32 [ 0, %bb237 ], [ %tmp272, %cond_next270 ]		; <i32> [#uses=3]
+	%tmp276 = icmp slt i32 %j.4, %pageCopies.0		; <i1> [#uses=1]
+	br i1 %tmp276, label %cond_true277, label %bb280
+cond_true277:		; preds = %bb273
+	%tmp241 = invoke i32 @_ZNK8QPrinter12printerStateEv( %struct.QPrinter* %printer )
+			to label %invcont240 unwind label %cleanup329		; <i32> [#uses=1]
+bb280:		; preds = %bb273
+	%tmp283 = icmp eq i32 %page.4, %toPage.2		; <i1> [#uses=1]
+	br i1 %tmp283, label %bb297, label %cond_next285
+cond_next285:		; preds = %bb280
+	br i1 %ascending.1, label %cond_true287, label %cond_false290
+cond_true287:		; preds = %cond_next285
+	%tmp289 = add i32 %page.4, 1		; <i32> [#uses=1]
+	br label %cond_next293
+cond_false290:		; preds = %cond_next285
+	%tmp292 = add i32 %page.4, -1		; <i32> [#uses=1]
+	br label %cond_next293
+cond_next293:		; preds = %cond_false290, %cond_true287
+	%page.3 = phi i32 [ %tmp289, %cond_true287 ], [ %tmp292, %cond_false290 ]		; <i32> [#uses=1]
+	%tmp296 = invoke i1 @_ZN8QPrinter7newPageEv( %struct.QPrinter* %printer )
+			to label %bb237 unwind label %cleanup329		; <i1> [#uses=0]
+bb297:		; preds = %bb280
+	%tmp299 = add i32 %docCopies.0, -1		; <i32> [#uses=1]
+	%tmp301 = icmp sgt i32 %tmp299, %i.1		; <i1> [#uses=1]
+	br i1 %tmp301, label %cond_true302, label %cond_next306
+cond_true302:		; preds = %bb297
+	%tmp305 = invoke i1 @_ZN8QPrinter7newPageEv( %struct.QPrinter* %printer )
+			to label %cond_next306 unwind label %cleanup329		; <i1> [#uses=0]
+cond_next306:		; preds = %cond_true302, %bb297
+	%tmp308 = add i32 %i.1, 1		; <i32> [#uses=1]
+	br label %bb309
+bb309:		; preds = %cond_next306, %cond_next234
+	%iftmp.410.5 = phi i1 [ undef, %cond_next234 ], [ %iftmp.410.1, %cond_next306 ]		; <i1> [#uses=1]
+	%i.1 = phi i32 [ 0, %cond_next234 ], [ %tmp308, %cond_next306 ]		; <i32> [#uses=3]
+	%tmp312 = icmp slt i32 %i.1, %docCopies.0		; <i1> [#uses=1]
+	br i1 %tmp312, label %cond_true313, label %UserCanceled
+cond_true313:		; preds = %bb309
+	br label %bb237
+UserCanceled:		; preds = %bb309, %bb254
+	%tmp318 = icmp eq %struct.QAbstractTextDocumentLayout* %clonedDoc.1, null		; <i1> [#uses=1]
+	br i1 %tmp318, label %cleanup327, label %cond_true319
+cond_true319:		; preds = %UserCanceled
+	%tmp.upgrd.23 = getelementptr %struct.QAbstractTextDocumentLayout* %clonedDoc.1, i32 0, i32 0, i32 0		; <i32 (...)***> [#uses=1]
+	%tmp.upgrd.24 = load i32 (...)*** %tmp.upgrd.23		; <i32 (...)**> [#uses=1]
+	%tmp322 = getelementptr i32 (...)** %tmp.upgrd.24, i32 4		; <i32 (...)**> [#uses=1]
+	%tmp.upgrd.25 = load i32 (...)** %tmp322		; <i32 (...)*> [#uses=1]
+	%tmp.upgrd.26 = bitcast i32 (...)* %tmp.upgrd.25 to void (%struct.QAbstractTextDocumentLayout*)*		; <void (%struct.QAbstractTextDocumentLayout*)*> [#uses=1]
+	invoke void %tmp.upgrd.26( %struct.QAbstractTextDocumentLayout* %clonedDoc.1 )
+			to label %cleanup327 unwind label %cleanup329
+cleanup327:		; preds = %cond_true319, %UserCanceled
+	call void @_ZN8QPainterD1Ev( %struct.QPainter* %p )
+	ret void
+cleanup328:		; preds = %invcont
+	call void @_ZN8QPainterD1Ev( %struct.QPainter* %p )
+	ret void
+cleanup329:		; preds = %cond_true319, %cond_true302, %cond_next293, %cond_true277, %cond_true266, %cond_next258, %cond_next244, %cond_next225, %cond_true220, %invcont210, %cond_next208, %cond_false204, %cond_true200, %cond_next194, %cleanup192, %cleanup192, %cleanup190, %invcont106, %invcont104, %invcont103, %invcont100, %invcont98, %invcont94, %cond_false, %invcont83, %invcont79, %invcont57, %invcont51, %invcont45, %cond_next42, %invcont37, %cond_true35, %invcont29, %invcont25, %cond_true24, %cond_next, %entry
+	call void @_ZN8QPainterD1Ev( %struct.QPainter* %p )
+	unwind
+}
+
+declare void @_ZN6QSizeFC1Edd(%struct.QPointF*, double, double)
+
+declare i1 @_ZNK6QSizeF7isValidEv(%struct.QPointF*)
+
+declare double @_ZNK6QSizeF5widthEv(%struct.QPointF*)
+
+declare double @_ZNK6QSizeF6heightEv(%struct.QPointF*)
+
+declare double* @_ZN6QSizeF6rwidthEv(%struct.QPointF*)
+
+declare double* @_ZN6QSizeF7rheightEv(%struct.QPointF*)
+
+declare %struct.QTextDocumentPrivate* @_ZNK13QTextDocument6d_funcEv(%struct.QAbstractTextDocumentLayout*)
+
+declare void @_ZN7QPointFC1Ev(%struct.QPointF*)
+
+declare void @_ZN7QPointFC1Edd(%struct.QPointF*, double, double)
+
+declare void @_ZN16QTextFrameFormat9setMarginEd(%struct.QTextBlockFormat*, double)
+
+declare void @_ZN6QRectFC1Edddd(%struct.QRectF*, double, double, double, double)
+
+declare void @_ZN6QRectFC1ERK7QPointFRK6QSizeF(%struct.QRectF*, %struct.QPointF*, %struct.QPointF*)
+
+declare double @_ZNK6QRectF5widthEv(%struct.QRectF*)
+
+declare double @_ZNK6QRectF6heightEv(%struct.QRectF*)
+
+declare void @_ZNK6QRectF4sizeEv(%struct.QPointF*, %struct.QRectF*)
+
+declare void @_ZN16QTextFrameFormatD1Ev(%struct.QTextBlockFormat*)
+
+declare void @_ZNK10QTextFrame11frameFormatEv(%struct.QTextBlockFormat*, %struct.QTextBlockGroup*)
+
+declare void @_ZN10QTextFrame14setFrameFormatERK16QTextFrameFormat(%struct.QTextBlockGroup*, %struct.QTextBlockFormat*)
+
+declare i32 @_ZNK12QPaintDevice5widthEv(%struct.QPaintDevice*)
+
+declare i32 @_ZNK12QPaintDevice6heightEv(%struct.QPaintDevice*)
+
+declare i32 @_ZNK12QPaintDevice11logicalDpiXEv(%struct.QPaintDevice*)
+
+declare i32 @_ZNK12QPaintDevice11logicalDpiYEv(%struct.QPaintDevice*)
+
+declare %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument5cloneEP7QObject(%struct.QAbstractTextDocumentLayout*, %struct.QObject*)
+
+declare void @_ZN5QFontD1Ev(%struct.QFont*)
+
+declare %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument14documentLayoutEv(%struct.QAbstractTextDocumentLayout*)
+
+declare %struct.QTextBlockGroup* @_ZNK13QTextDocument9rootFrameEv(%struct.QAbstractTextDocumentLayout*)
+
+declare i32 @_ZNK13QTextDocument9pageCountEv(%struct.QAbstractTextDocumentLayout*)
+
+declare void @_ZNK13QTextDocument11defaultFontEv(%struct.QFont*, %struct.QAbstractTextDocumentLayout*)
+
+declare void @_ZN13QTextDocument14setDefaultFontERK5QFont(%struct.QAbstractTextDocumentLayout*, %struct.QFont*)
+
+declare void @_ZN13QTextDocument11setPageSizeERK6QSizeF(%struct.QAbstractTextDocumentLayout*, %struct.QPointF*)
+
+declare void @_Z9printPageiP8QPainterPK13QTextDocumentRK6QRectFRK7QPointF(i32, %struct.QPainter*, %struct.QAbstractTextDocumentLayout*, %struct.QRectF*, %struct.QPointF*)
+
+declare void @_ZN12QFontMetricsD1Ev(%struct.QFontMetrics*)
+
+declare void @_ZN8QPainterC1EP12QPaintDevice(%struct.QPainter*, %struct.QPaintDevice*)
+
+declare i1 @_ZNK8QPainter8isActiveEv(%struct.QPainter*)
+
+declare i32 @_Z13qt_defaultDpiv()
+
+declare %struct.QPaintDevice* @_ZNK27QAbstractTextDocumentLayout11paintDeviceEv(%struct.QAbstractTextDocumentLayout*)
+
+declare void @_ZN8QPainter5scaleEdd(%struct.QPainter*, double, double)
+
+declare %struct.QPaintDevice* @_ZNK8QPainter6deviceEv(%struct.QPainter*)
+
+declare void @_ZN27QAbstractTextDocumentLayout14setPaintDeviceEP12QPaintDevice(%struct.QAbstractTextDocumentLayout*, %struct.QPaintDevice*)
+
+declare void @_ZN12QFontMetricsC1ERK5QFontP12QPaintDevice(%struct.QFontMetrics*, %struct.QFont*, %struct.QPaintDevice*)
+
+declare i32 @_ZNK12QFontMetrics6ascentEv(%struct.QFontMetrics*)
+
+declare void @_ZN5QFont12setPointSizeEi(%struct.QFont*, i32)
+
+declare i1 @_ZNK8QPrinter13collateCopiesEv(%struct.QPrinter*)
+
+declare i32 @_ZNK8QPrinter9numCopiesEv(%struct.QPrinter*)
+
+declare i32 @_ZNK8QPrinter8fromPageEv(%struct.QPrinter*)
+
+declare i32 @_ZNK8QPrinter6toPageEv(%struct.QPrinter*)
+
+declare i32 @_ZNK8QPrinter9pageOrderEv(%struct.QPrinter*)
+
+declare i32 @_ZNK8QPrinter12printerStateEv(%struct.QPrinter*)
+
+declare i1 @_ZN8QPrinter7newPageEv(%struct.QPrinter*)
+
+declare void @_ZN8QPainterD1Ev(%struct.QPainter*)
diff --git a/test/Transforms/SimplifyLibCalls/2008-12-20-StrcmpMemcmp.ll b/test/Transforms/SimplifyLibCalls/2008-12-20-StrcmpMemcmp.ll
deleted file mode 100644
index d35da8d1efd2..000000000000
--- a/test/Transforms/SimplifyLibCalls/2008-12-20-StrcmpMemcmp.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llvm-as < %s | opt -simplify-libcalls | llvm-dis | grep call.*memcmp
-
-@.str = internal constant [2 x i8] c"x\00"
-
-declare i32 @strcmp(i8* %dest, i8* %src)
-
-define i32 @foo(i8* %x, i8* %y) {
-  %A = call i32 @strcmp(i8* %x, i8* getelementptr ([2 x i8]* @.str, i32 0, i32 0))
-  ret i32 %A
-}
diff --git a/test/Transforms/TailCallElim/dont_reorder_load.ll b/test/Transforms/TailCallElim/dont_reorder_load.ll
new file mode 100644
index 000000000000..8fbe00838762
--- /dev/null
+++ b/test/Transforms/TailCallElim/dont_reorder_load.ll
@@ -0,0 +1,64 @@
+; RUN: llvm-as <%s | opt -tailcallelim | llvm-dis | grep call | count 3
+; PR4323
+
+; Several cases where tail call elimination should not move the load above the
+; call, and thus can't eliminate the tail recursion.
+
+
+@extern_weak_global = extern_weak global i32		; <i32*> [#uses=1]
+
+
+; This load can't be safely moved above the call because the load is from an
+; extern_weak global and may trap, but the call may unwind before that happens.
+define fastcc i32 @no_tailrecelim_1(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) readonly {
+entry:
+	%tmp2 = icmp sge i32 %start_arg, %a_len_arg		; <i1> [#uses=1]
+	br i1 %tmp2, label %if, label %else
+
+if:		; preds = %entry
+	unwind
+
+else:		; preds = %entry
+	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
+	%tmp8 = call fastcc i32 @no_tailrecelim_1(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
+	%tmp9 = load i32* @extern_weak_global		; <i32> [#uses=1]
+	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
+	ret i32 %tmp10
+}
+
+
+; This load can't be safely moved above the call because function may write to the pointer.
+define fastcc i32 @no_tailrecelim_2(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind {
+entry:
+	%tmp2 = icmp sge i32 %start_arg, %a_len_arg		; <i1> [#uses=1]
+	br i1 %tmp2, label %if, label %else
+
+if:		; preds = %entry
+	store i32 1, i32* %a_arg;
+        ret i32 0;
+
+else:		; preds = %entry
+	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
+	%tmp8 = call fastcc i32 @no_tailrecelim_2(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
+	%tmp9 = load i32* %a_arg		; <i32> [#uses=1]
+	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
+	ret i32 %tmp10
+}
+
+; This load can't be safely moved above the call because that would change the
+; order in which the volatile loads are performed.
+define fastcc i32 @no_tailrecelim_3(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind {
+entry:
+	%tmp2 = icmp sge i32 %start_arg, %a_len_arg		; <i1> [#uses=1]
+	br i1 %tmp2, label %if, label %else
+
+if:		; preds = %entry
+        ret i32 0;
+
+else:		; preds = %entry
+	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
+	%tmp8 = call fastcc i32 @no_tailrecelim_3(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
+	%tmp9 = volatile load i32* %a_arg		; <i32> [#uses=1]
+	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
+	ret i32 %tmp10
+}
diff --git a/test/Transforms/TailCallElim/reorder_load.ll b/test/Transforms/TailCallElim/reorder_load.ll
new file mode 100644
index 000000000000..aeb9042bc79f
--- /dev/null
+++ b/test/Transforms/TailCallElim/reorder_load.ll
@@ -0,0 +1,101 @@
+; RUN: llvm-as <%s | opt -tailcallelim | llvm-dis | not grep call
+; PR4323
+
+; Several cases where tail call elimination should move the load above the call,
+; then eliminate the tail recursion.
+
+
+@global = external global i32		; <i32*> [#uses=1]
+@extern_weak_global = extern_weak global i32		; <i32*> [#uses=1]
+
+
+; This load can be moved above the call because the function won't write to it
+; and the call has no side effects.
+define fastcc i32 @raise_load_1(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind readonly {
+entry:
+	%tmp2 = icmp sge i32 %start_arg, %a_len_arg		; <i1> [#uses=1]
+	br i1 %tmp2, label %if, label %else
+
+if:		; preds = %entry
+	ret i32 0
+
+else:		; preds = %entry
+	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
+	%tmp8 = call fastcc i32 @raise_load_1(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
+	%tmp9 = load i32* %a_arg		; <i32> [#uses=1]
+	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
+	ret i32 %tmp10
+}
+
+
+; This load can be moved above the call because the function won't write to it
+; and the load provably can't trap.
+define fastcc i32 @raise_load_2(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) readonly {
+entry:
+	%tmp2 = icmp sge i32 %start_arg, %a_len_arg		; <i1> [#uses=1]
+	br i1 %tmp2, label %if, label %else
+
+if:		; preds = %entry
+	ret i32 0
+
+else:		; preds = %entry
+	%nullcheck = icmp eq i32* %a_arg, null		; <i1> [#uses=1]
+	br i1 %nullcheck, label %unwind, label %recurse
+
+unwind:		; preds = %else
+	unwind
+
+recurse:		; preds = %else
+	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
+	%tmp8 = call fastcc i32 @raise_load_2(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
+	%tmp9 = load i32* @global		; <i32> [#uses=1]
+	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
+	ret i32 %tmp10
+}
+
+
+; This load can be safely moved above the call (even though it's from an
+; extern_weak global) because the call has no side effects.
+define fastcc i32 @raise_load_3(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind readonly {
+entry:
+	%tmp2 = icmp sge i32 %start_arg, %a_len_arg		; <i1> [#uses=1]
+	br i1 %tmp2, label %if, label %else
+
+if:		; preds = %entry
+	ret i32 0
+
+else:		; preds = %entry
+	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
+	%tmp8 = call fastcc i32 @raise_load_3(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
+	%tmp9 = load i32* @extern_weak_global		; <i32> [#uses=1]
+	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
+	ret i32 %tmp10
+}
+
+
+; The second load can be safely moved above the call even though it's from an
+; unknown pointer (which normally means it might trap) because the first load
+; proves it doesn't trap.
+define fastcc i32 @raise_load_4(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) readonly {
+entry:
+	%tmp2 = icmp sge i32 %start_arg, %a_len_arg		; <i1> [#uses=1]
+	br i1 %tmp2, label %if, label %else
+
+if:		; preds = %entry
+	ret i32 0
+
+else:		; preds = %entry
+	%nullcheck = icmp eq i32* %a_arg, null		; <i1> [#uses=1]
+	br i1 %nullcheck, label %unwind, label %recurse
+
+unwind:		; preds = %else
+	unwind
+
+recurse:		; preds = %else
+	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
+	%first = load i32* %a_arg		; <i32> [#uses=1]
+	%tmp8 = call fastcc i32 @raise_load_4(i32* %a_arg, i32 %first, i32 %tmp7)		; <i32> [#uses=1]
+	%second = load i32* %a_arg		; <i32> [#uses=1]
+	%tmp10 = add i32 %second, %tmp8		; <i32> [#uses=1]
+	ret i32 %tmp10
+}
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 5c1ee351a2a0..7191d8035412 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -9,6 +9,7 @@ endif()
 add_subdirectory(opt)
 add_subdirectory(llvm-as)
 add_subdirectory(llvm-dis)
+add_subdirectory(llvm-mc)
 
 add_subdirectory(llc)
 add_subdirectory(llvm-ranlib)
diff --git a/tools/Makefile b/tools/Makefile
index b3c015f307cb..5ed090ea0e0e 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -20,7 +20,8 @@ PARALLEL_DIRS := opt llvm-as llvm-dis \
                  llc llvm-ranlib llvm-ar llvm-nm \
                  llvm-ld llvm-prof llvm-link \
                  lli gccas gccld llvm-extract llvm-db \
-                 bugpoint llvm-bcanalyzer llvm-stub llvmc
+                 bugpoint llvm-bcanalyzer llvm-stub llvmc \
+                 llvm-mc
 
 # Let users override the set of tools to build from the command line.
 ifdef ONLY_TOOLS
diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp
index 46b1717fca69..8d8fcd2f44dd 100644
--- a/tools/gold/gold-plugin.cpp
+++ b/tools/gold/gold-plugin.cpp
@@ -18,6 +18,7 @@
 
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Path.h"
+#include "llvm/System/Program.h"
 
 #include <cerrno>
 #include <cstdlib>
@@ -44,7 +45,6 @@ namespace {
   int gold_version = 0;
 
   bool generate_api_file = false;
-  const char *gcc_path = NULL;
   const char *as_path = NULL;
 
   struct claimed_file {
@@ -103,13 +103,6 @@ ld_plugin_status onload(ld_plugin_tv *tv) {
       case LDPT_OPTION:
         if (strcmp("generate-api-file", tv->tv_u.tv_string) == 0) {
           generate_api_file = true;
-        } else if (strncmp("gcc=", tv->tv_u.tv_string, 4) == 0) {
-          if (gcc_path) {
-            (*message)(LDPL_WARNING, "Path to gcc specified twice. "
-                       "Discarding %s", tv->tv_u.tv_string);
-          } else {
-            gcc_path = strdup(tv->tv_u.tv_string + 4);
-          }
         } else if (strncmp("as=", tv->tv_u.tv_string, 3) == 0) {
           if (as_path) {
             (*message)(LDPL_WARNING, "Path to as specified twice. "
@@ -352,10 +345,10 @@ ld_plugin_status all_symbols_read_hook(void) {
 
   lto_codegen_set_pic_model(cg, output_type);
   lto_codegen_set_debug_model(cg, LTO_DEBUG_MODEL_DWARF);
-  if (gcc_path)
-    lto_codegen_set_gcc_path(cg, gcc_path);
-  if (as_path)
-    lto_codegen_set_assembler_path(cg, as_path);
+  if (as_path) {
+    sys::Path p = sys::Program::FindProgramByName(as_path);
+    lto_codegen_set_assembler_path(cg, p.c_str());
+  }
 
   size_t bufsize = 0;
   const char *buffer = static_cast<const char *>(lto_codegen_compile(cg,
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp
index c630331d684c..e71b37832363 100644
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -38,6 +38,7 @@
 #include "llvm/System/Signals.h"
 #include "llvm/Config/config.h"
 #include "llvm/LinkAllVMCore.h"
+#include "llvm/Target/TargetSelect.h"
 #include <fstream>
 #include <iostream>
 #include <memory>
@@ -214,6 +215,9 @@ int main(int argc, char **argv) {
   llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
   cl::ParseCommandLineOptions(argc, argv, "llvm system compiler\n");
 
+  InitializeAllTargets();
+  InitializeAllAsmPrinters();
+  
   // Load the module to be compiled...
   std::string ErrorMessage;
   std::auto_ptr<Module> M;
diff --git a/tools/lli/lli.cpp b/tools/lli/lli.cpp
index 6d3cbbc1f5fc..afd3c5a71fa4 100644
--- a/tools/lli/lli.cpp
+++ b/tools/lli/lli.cpp
@@ -28,6 +28,7 @@
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/System/Process.h"
 #include "llvm/System/Signals.h"
+#include "llvm/Target/TargetSelect.h"
 #include <iostream>
 #include <cerrno>
 using namespace llvm;
@@ -137,6 +138,10 @@ int main(int argc, char **argv, char * const *envp) {
   case '2': OLvl = CodeGenOpt::Default; break;
   case '3': OLvl = CodeGenOpt::Aggressive; break;
   }
+  
+  // If we have a native target, initialize it to ensure it is linked in and
+  // usable by the JIT.
+  InitializeNativeTarget();
 
   EE = ExecutionEngine::create(MP, ForceInterpreter, &ErrorMsg, OLvl);
   if (!EE && !ErrorMsg.empty()) {
diff --git a/tools/llvm-mc/AsmLexer.cpp b/tools/llvm-mc/AsmLexer.cpp
new file mode 100644
index 000000000000..0828594a35b5
--- /dev/null
+++ b/tools/llvm-mc/AsmLexer.cpp
@@ -0,0 +1,258 @@
+//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements the lexer for assembly files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AsmLexer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <cerrno>
+#include <cstdio>
+#include <cstdlib>
+using namespace llvm;
+
+AsmLexer::AsmLexer(SourceMgr &SM) : SrcMgr(SM) {
+  CurBuffer = 0;
+  CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
+  CurPtr = CurBuf->getBufferStart();
+  TokStart = 0;
+}
+
+SMLoc AsmLexer::getLoc() const {
+  return SMLoc::getFromPointer(TokStart);
+}
+
+void AsmLexer::PrintMessage(SMLoc Loc, const std::string &Msg) const {
+  SrcMgr.PrintMessage(Loc, Msg);
+}
+
+/// ReturnError - Set the error to the specified string at the specified
+/// location.  This is defined to always return asmtok::Error.
+asmtok::TokKind AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
+  SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), Msg);
+  return asmtok::Error;
+}
+
+int AsmLexer::getNextChar() {
+  char CurChar = *CurPtr++;
+  switch (CurChar) {
+  default:
+    return (unsigned char)CurChar;
+  case 0: {
+    // A nul character in the stream is either the end of the current buffer or
+    // a random nul in the file.  Disambiguate that here.
+    if (CurPtr-1 != CurBuf->getBufferEnd())
+      return 0;  // Just whitespace.
+    
+    // If this is the end of an included file, pop the parent file off the
+    // include stack.
+    SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
+    if (ParentIncludeLoc != SMLoc()) {
+      CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
+      CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
+      CurPtr = ParentIncludeLoc.getPointer();
+      return getNextChar();
+    }
+    
+    // Otherwise, return end of file.
+    --CurPtr;  // Another call to lex will return EOF again.  
+    return EOF;
+  }
+  }
+}
+
+/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
+asmtok::TokKind AsmLexer::LexIdentifier() {
+  while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' ||
+         *CurPtr == '.' || *CurPtr == '@')
+    ++CurPtr;
+  CurStrVal.assign(TokStart, CurPtr);   // Include %
+  return asmtok::Identifier;
+}
+
+/// LexPercent: Register: %[a-zA-Z0-9]+
+asmtok::TokKind AsmLexer::LexPercent() {
+  if (!isalnum(*CurPtr))
+    return ReturnError(TokStart, "invalid register name");
+  while (isalnum(*CurPtr))
+    ++CurPtr;
+  CurStrVal.assign(TokStart, CurPtr);   // Skip %
+  return asmtok::Register;
+}
+
+/// LexSlash: Slash: /
+///           C-Style Comment: /* ... */
+asmtok::TokKind AsmLexer::LexSlash() {
+  if (*CurPtr != '*')
+    return asmtok::Slash;
+
+  // C Style comment.
+  ++CurPtr;  // skip the star.
+  while (1) {
+    int CurChar = getNextChar();
+    switch (CurChar) {
+    case EOF:
+      return ReturnError(TokStart, "unterminated comment");
+    case '*':
+      // End of the comment?
+      if (CurPtr[0] != '/') break;
+      
+      ++CurPtr;   // End the */.
+      return LexToken();
+    }
+  }
+}
+
+/// LexHash: Comment: #[^\n]*
+asmtok::TokKind AsmLexer::LexHash() {
+  int CurChar = getNextChar();
+  while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF)
+    CurChar = getNextChar();
+  
+  if (CurChar == EOF)
+    return asmtok::Eof;
+  return asmtok::EndOfStatement;
+}
+
+
+/// LexDigit: First character is [0-9].
+///   Local Label: [0-9][:]
+///   Forward/Backward Label: [0-9][fb]
+///   Binary integer: 0b[01]+
+///   Octal integer: 0[0-7]+
+///   Hex integer: 0x[0-9a-fA-F]+
+///   Decimal integer: [1-9][0-9]*
+/// TODO: FP literal.
+asmtok::TokKind AsmLexer::LexDigit() {
+  if (*CurPtr == ':')
+    return ReturnError(TokStart, "FIXME: local label not implemented");
+  if (*CurPtr == 'f' || *CurPtr == 'b')
+    return ReturnError(TokStart, "FIXME: directional label not implemented");
+  
+  // Decimal integer: [1-9][0-9]*
+  if (CurPtr[-1] != '0') {
+    while (isdigit(*CurPtr))
+      ++CurPtr;
+    CurIntVal = strtoll(TokStart, 0, 10);
+    return asmtok::IntVal;
+  }
+  
+  if (*CurPtr == 'b') {
+    ++CurPtr;
+    const char *NumStart = CurPtr;
+    while (CurPtr[0] == '0' || CurPtr[0] == '1')
+      ++CurPtr;
+    
+    // Requires at least one binary digit.
+    if (CurPtr == NumStart)
+      return ReturnError(CurPtr-2, "Invalid binary number");
+    CurIntVal = strtoll(NumStart, 0, 2);
+    return asmtok::IntVal;
+  }
+ 
+  if (*CurPtr == 'x') {
+    ++CurPtr;
+    const char *NumStart = CurPtr;
+    while (isxdigit(CurPtr[0]))
+      ++CurPtr;
+    
+    // Requires at least one hex digit.
+    if (CurPtr == NumStart)
+      return ReturnError(CurPtr-2, "Invalid hexadecimal number");
+    
+    errno = 0;
+    CurIntVal = strtoll(NumStart, 0, 16);
+    if (errno == EINVAL)
+      return ReturnError(CurPtr-2, "Invalid hexadecimal number");
+    if (errno == ERANGE) {
+      errno = 0;
+      CurIntVal = (int64_t)strtoull(NumStart, 0, 16);
+      if (errno == EINVAL)
+        return ReturnError(CurPtr-2, "Invalid hexadecimal number");
+      if (errno == ERANGE)
+        return ReturnError(CurPtr-2, "Hexadecimal number out of range");
+    }
+    return asmtok::IntVal;
+  }
+  
+  // Must be an octal number, it starts with 0.
+  while (*CurPtr >= '0' && *CurPtr <= '7')
+    ++CurPtr;
+  CurIntVal = strtoll(TokStart, 0, 8);
+  return asmtok::IntVal;
+}
+
+/// LexQuote: String: "..."
+asmtok::TokKind AsmLexer::LexQuote() {
+  int CurChar = getNextChar();
+  // TODO: does gas allow multiline string constants?
+  while (CurChar != '"') {
+    if (CurChar == '\\') {
+      // Allow \", etc.
+      CurChar = getNextChar();
+    }
+    
+    if (CurChar == EOF)
+      return ReturnError(TokStart, "unterminated string constant");
+
+    CurChar = getNextChar();
+  }
+  
+  CurStrVal.assign(TokStart, CurPtr);   // include quotes.
+  return asmtok::String;
+}
+
+
+asmtok::TokKind AsmLexer::LexToken() {
+  TokStart = CurPtr;
+  // This always consumes at least one character.
+  int CurChar = getNextChar();
+  
+  switch (CurChar) {
+  default:
+    // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
+    if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
+      return LexIdentifier();
+    
+    // Unknown character, emit an error.
+    return ReturnError(TokStart, "invalid character in input");
+  case EOF: return asmtok::Eof;
+  case 0:
+  case ' ':
+  case '\t':
+    // Ignore whitespace.
+    return LexToken();
+  case '\n': // FALL THROUGH.
+  case '\r': // FALL THROUGH.
+  case ';': return asmtok::EndOfStatement;
+  case ':': return asmtok::Colon;
+  case '+': return asmtok::Plus;
+  case '-': return asmtok::Minus;
+  case '~': return asmtok::Tilde;
+  case '(': return asmtok::LParen;
+  case ')': return asmtok::RParen;
+  case '*': return asmtok::Star;
+  case ',': return asmtok::Comma;
+  case '$': return asmtok::Dollar;
+  case '%': return LexPercent();
+  case '/': return LexSlash();
+  case '#': return LexHash();
+  case '"': return LexQuote();
+  case '0': case '1': case '2': case '3': case '4':
+  case '5': case '6': case '7': case '8': case '9':
+    return LexDigit();
+      
+  // TODO: Quoted identifiers (objc methods etc)
+  // local labels: [0-9][:]
+  // Forward/backward labels: [0-9][fb]
+  // Integers, fp constants, character constants.
+  }
+}
diff --git a/tools/llvm-mc/AsmLexer.h b/tools/llvm-mc/AsmLexer.h
new file mode 100644
index 000000000000..a6c93230c6cd
--- /dev/null
+++ b/tools/llvm-mc/AsmLexer.h
@@ -0,0 +1,109 @@
+//===- AsmLexer.h - Lexer for Assembly Files --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class declares the lexer for assembly files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ASMLEXER_H
+#define ASMLEXER_H
+
+#include "llvm/Support/DataTypes.h"
+#include <string>
+#include <cassert>
+
+namespace llvm {
+class MemoryBuffer;
+class SourceMgr;
+class SMLoc;
+
+namespace asmtok {
+  enum TokKind {
+    // Markers
+    Eof, Error,
+
+    // String values.
+    Identifier,
+    Register,
+    String,
+    
+    // Integer values.
+    IntVal,
+    
+    // No-value.
+    EndOfStatement,
+    Colon,
+    Plus, Minus, Tilde,
+    Slash,    // '/'
+    LParen, RParen,
+    Star, Comma, Dollar
+  };
+}
+
+/// AsmLexer - Lexer class for assembly files.
+class AsmLexer {
+  SourceMgr &SrcMgr;
+  
+  const char *CurPtr;
+  const MemoryBuffer *CurBuf;
+  
+  // Information about the current token.
+  const char *TokStart;
+  asmtok::TokKind CurKind;
+  std::string CurStrVal;  // This is valid for Identifier.
+  int64_t CurIntVal;
+  
+  /// CurBuffer - This is the current buffer index we're lexing from as managed
+  /// by the SourceMgr object.
+  int CurBuffer;
+  
+public:
+  AsmLexer(SourceMgr &SrcMgr);
+  ~AsmLexer() {}
+  
+  asmtok::TokKind Lex() {
+    return CurKind = LexToken();
+  }
+  
+  asmtok::TokKind getKind() const { return CurKind; }
+  bool is(asmtok::TokKind K) const { return CurKind == K; }
+  bool isNot(asmtok::TokKind K) const { return CurKind != K; }
+  
+  const std::string &getCurStrVal() const {
+    assert((CurKind == asmtok::Identifier || CurKind == asmtok::Register ||
+            CurKind == asmtok::String) &&
+           "This token doesn't have a string value");
+    return CurStrVal;
+  }
+  int64_t getCurIntVal() const {
+    assert(CurKind == asmtok::IntVal && "This token isn't an integer");
+    return CurIntVal;
+  }
+  
+  SMLoc getLoc() const;
+  
+  void PrintMessage(SMLoc Loc, const std::string &Msg) const;
+  
+private:
+  int getNextChar();
+  asmtok::TokKind ReturnError(const char *Loc, const std::string &Msg);
+
+  /// LexToken - Read the next token and return its code.
+  asmtok::TokKind LexToken();
+  asmtok::TokKind LexIdentifier();
+  asmtok::TokKind LexPercent();
+  asmtok::TokKind LexSlash();
+  asmtok::TokKind LexHash();
+  asmtok::TokKind LexDigit();
+  asmtok::TokKind LexQuote();
+};
+  
+} // end namespace llvm
+
+#endif
diff --git a/tools/llvm-mc/AsmParser.cpp b/tools/llvm-mc/AsmParser.cpp
new file mode 100644
index 000000000000..715ff3932bc6
--- /dev/null
+++ b/tools/llvm-mc/AsmParser.cpp
@@ -0,0 +1,351 @@
+//===- AsmParser.cpp - Parser for Assembly Files --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements the parser for assembly files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AsmParser.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+bool AsmParser::Error(SMLoc L, const char *Msg) {
+  Lexer.PrintMessage(L, Msg);
+  return true;
+}
+
+bool AsmParser::TokError(const char *Msg) {
+  Lexer.PrintMessage(Lexer.getLoc(), Msg);
+  return true;
+}
+
+bool AsmParser::Run() {
+  // Prime the lexer.
+  Lexer.Lex();
+  
+  while (Lexer.isNot(asmtok::Eof))
+    if (ParseStatement())
+      return true;
+  
+  return false;
+}
+
+/// EatToEndOfStatement - Throw away the rest of the line for testing purposes.
+void AsmParser::EatToEndOfStatement() {
+  while (Lexer.isNot(asmtok::EndOfStatement) &&
+         Lexer.isNot(asmtok::Eof))
+    Lexer.Lex();
+  
+  // Eat EOL.
+  if (Lexer.is(asmtok::EndOfStatement))
+    Lexer.Lex();
+}
+
+
+struct AsmParser::X86Operand {
+  enum {
+    Register,
+    Immediate,
+    Memory
+  } Kind;
+  
+  union {
+    struct {
+      unsigned RegNo;
+    } Reg;
+
+    struct {
+      // FIXME: Should be a general expression.
+      int64_t Val;
+    } Imm;
+    
+    struct {
+      unsigned SegReg;
+      int64_t Disp;     // FIXME: Should be a general expression.
+      unsigned BaseReg;
+      unsigned Scale;
+      unsigned ScaleReg;
+    } Mem;
+  };
+  
+  static X86Operand CreateReg(unsigned RegNo) {
+    X86Operand Res;
+    Res.Kind = Register;
+    Res.Reg.RegNo = RegNo;
+    return Res;
+  }
+  static X86Operand CreateImm(int64_t Val) {
+    X86Operand Res;
+    Res.Kind = Immediate;
+    Res.Imm.Val = Val;
+    return Res;
+  }
+  static X86Operand CreateMem(unsigned SegReg, int64_t Disp, unsigned BaseReg,
+                              unsigned Scale, unsigned ScaleReg) {
+    X86Operand Res;
+    Res.Kind = Memory;
+    Res.Mem.SegReg   = SegReg;
+    Res.Mem.Disp     = Disp;
+    Res.Mem.BaseReg  = BaseReg;
+    Res.Mem.Scale    = Scale;
+    Res.Mem.ScaleReg = ScaleReg;
+    return Res;
+  }
+};
+
+bool AsmParser::ParseX86Operand(X86Operand &Op) {
+  switch (Lexer.getKind()) {
+  default:
+    return ParseX86MemOperand(Op);
+  case asmtok::Register:
+    // FIXME: Decode reg #.
+    // FIXME: if a segment register, this could either be just the seg reg, or
+    // the start of a memory operand.
+    Op = X86Operand::CreateReg(123);
+    Lexer.Lex(); // Eat register.
+    return false;
+  case asmtok::Dollar: {
+    // $42 -> immediate.
+    Lexer.Lex();
+    int64_t Val;
+    if (ParseExpression(Val))
+      return TokError("expected integer constant");
+    Op = X86Operand::CreateReg(Val);
+    return false;
+  case asmtok::Star:
+    Lexer.Lex(); // Eat the star.
+    
+    if (Lexer.is(asmtok::Register)) {
+      Op = X86Operand::CreateReg(123);
+      Lexer.Lex(); // Eat register.
+    } else if (ParseX86MemOperand(Op))
+      return true;
+
+    // FIXME: Note that these are 'dereferenced' so that clients know the '*' is
+    // there.
+    return false;
+  }
+  }
+}
+
+/// ParseX86MemOperand: segment: disp(basereg, indexreg, scale)
+bool AsmParser::ParseX86MemOperand(X86Operand &Op) {
+  // FIXME: If SegReg ':'  (e.g. %gs:), eat and remember.
+  unsigned SegReg = 0;
+  
+  
+  // We have to disambiguate a parenthesized expression "(4+5)" from the start
+  // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)".  The
+  // only way to do this without lookahead is to eat the ( and see what is after
+  // it.
+  int64_t Disp = 0;
+  if (Lexer.isNot(asmtok::LParen)) {
+    if (ParseExpression(Disp)) return true;
+    
+    // After parsing the base expression we could either have a parenthesized
+    // memory address or not.  If not, return now.  If so, eat the (.
+    if (Lexer.isNot(asmtok::LParen)) {
+      Op = X86Operand::CreateMem(SegReg, Disp, 0, 0, 0);
+      return false;
+    }
+    
+    // Eat the '('.
+    Lexer.Lex();
+  } else {
+    // Okay, we have a '('.  We don't know if this is an expression or not, but
+    // so we have to eat the ( to see beyond it.
+    Lexer.Lex(); // Eat the '('.
+    
+    if (Lexer.is(asmtok::Register) || Lexer.is(asmtok::Comma)) {
+      // Nothing to do here, fall into the code below with the '(' part of the
+      // memory operand consumed.
+    } else {
+      // It must be an parenthesized expression, parse it now.
+      if (ParseParenExpr(Disp)) return true;
+      
+      // After parsing the base expression we could either have a parenthesized
+      // memory address or not.  If not, return now.  If so, eat the (.
+      if (Lexer.isNot(asmtok::LParen)) {
+        Op = X86Operand::CreateMem(SegReg, Disp, 0, 0, 0);
+        return false;
+      }
+      
+      // Eat the '('.
+      Lexer.Lex();
+    }
+  }
+  
+  // If we reached here, then we just ate the ( of the memory operand.  Process
+  // the rest of the memory operand.
+  unsigned BaseReg = 0, ScaleReg = 0, Scale = 0;
+  
+  if (Lexer.is(asmtok::Register)) {
+    BaseReg = 123; // FIXME: decode reg #
+    Lexer.Lex();  // eat the register.
+  }
+  
+  if (Lexer.is(asmtok::Comma)) {
+    Lexer.Lex(); // eat the comma.
+    
+    if (Lexer.is(asmtok::Register)) {
+      ScaleReg = 123; // FIXME: decode reg #
+      Lexer.Lex();  // eat the register.
+      Scale = 1;      // If not specified, the scale defaults to 1.
+    }
+    
+    if (Lexer.is(asmtok::Comma)) {
+      Lexer.Lex(); // eat the comma.
+
+      // If present, get and validate scale amount.
+      if (Lexer.is(asmtok::IntVal)) {
+        int64_t ScaleVal = Lexer.getCurIntVal();
+        if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8)
+          return TokError("scale factor in address must be 1, 2, 4 or 8");
+        Lexer.Lex();  // eat the scale.
+        Scale = (unsigned)ScaleVal;
+      }
+    }
+  }
+  
+  // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
+  if (Lexer.isNot(asmtok::RParen))
+    return TokError("unexpected token in memory operand");
+  Lexer.Lex(); // Eat the ')'.
+  
+  Op = X86Operand::CreateMem(SegReg, Disp, BaseReg, Scale, ScaleReg);
+  return false;
+}
+
+/// ParseParenExpr - Parse a paren expression and return it.
+/// NOTE: This assumes the leading '(' has already been consumed.
+///
+/// parenexpr ::= expr)
+///
+bool AsmParser::ParseParenExpr(int64_t &Res) {
+  if (ParseExpression(Res)) return true;
+  if (Lexer.isNot(asmtok::RParen))
+    return TokError("expected ')' in parentheses expression");
+  Lexer.Lex();
+  return false;
+}
+
+/// ParsePrimaryExpr - Parse a primary expression and return it.
+///  primaryexpr ::= (parenexpr
+///  primaryexpr ::= symbol
+///  primaryexpr ::= number
+///  primaryexpr ::= ~,+,- primaryexpr
+bool AsmParser::ParsePrimaryExpr(int64_t &Res) {
+  switch (Lexer.getKind()) {
+  default:
+    return TokError("unknown token in expression");
+  case asmtok::Identifier:
+    // This is a label, this should be parsed as part of an expression, to
+    // handle things like LFOO+4
+    Res = 0; // FIXME.
+    Lexer.Lex(); // Eat identifier.
+    return false;
+  case asmtok::IntVal:
+    Res = Lexer.getCurIntVal();
+    Lexer.Lex(); // Eat identifier.
+    return false;
+  case asmtok::LParen:
+    Lexer.Lex(); // Eat the '('.
+    return ParseParenExpr(Res);
+  case asmtok::Tilde:
+  case asmtok::Plus:
+  case asmtok::Minus:
+    Lexer.Lex(); // Eat the operator.
+    return ParsePrimaryExpr(Res);
+  }
+}
+
+/// ParseExpression - Parse an expression and return it.
+/// 
+///  expr ::= expr +,- expr          -> lowest.
+///  expr ::= expr |,^,&,! expr      -> middle.
+///  expr ::= expr *,/,%,<<,>> expr  -> highest.
+///  expr ::= primaryexpr
+///
+bool AsmParser::ParseExpression(int64_t &Res) {
+  return ParsePrimaryExpr(Res);
+}
+  
+  
+  
+  
+/// ParseStatement:
+///   ::= EndOfStatement
+///   ::= Label* Directive ...Operands... EndOfStatement
+///   ::= Label* Identifier OperandList* EndOfStatement
+bool AsmParser::ParseStatement() {
+  switch (Lexer.getKind()) {
+  default:
+    return TokError("unexpected token at start of statement");
+  case asmtok::EndOfStatement:
+    Lexer.Lex();
+    return false;
+  case asmtok::Identifier:
+    break;
+  // TODO: Recurse on local labels etc.
+  }
+  
+  // If we have an identifier, handle it as the key symbol.
+  SMLoc IDLoc = Lexer.getLoc();
+  std::string IDVal = Lexer.getCurStrVal();
+  
+  // Consume the identifier, see what is after it.
+  if (Lexer.Lex() == asmtok::Colon) {
+    // identifier ':'   -> Label.
+    Lexer.Lex();
+    return ParseStatement();
+  }
+  
+  // Otherwise, we have a normal instruction or directive.  
+  if (IDVal[0] == '.') {
+    Lexer.PrintMessage(IDLoc, "warning: ignoring directive for now");
+    EatToEndOfStatement();
+    return false;
+  }
+
+  // If it's an instruction, parse an operand list.
+  std::vector<X86Operand> Operands;
+  
+  // Read the first operand, if present.  Note that we require a newline at the
+  // end of file, so we don't have to worry about Eof here.
+  if (Lexer.isNot(asmtok::EndOfStatement)) {
+    X86Operand Op;
+    if (ParseX86Operand(Op))
+      return true;
+    Operands.push_back(Op);
+  }
+
+  while (Lexer.is(asmtok::Comma)) {
+    Lexer.Lex();  // Eat the comma.
+    
+    // Parse and remember the operand.
+    X86Operand Op;
+    if (ParseX86Operand(Op))
+      return true;
+    Operands.push_back(Op);
+  }
+  
+  if (Lexer.isNot(asmtok::EndOfStatement))
+    return TokError("unexpected token in operand list");
+
+  // Eat the end of statement marker.
+  Lexer.Lex();
+  
+  // Instruction is good, process it.
+  outs() << "Found instruction: " << IDVal << " with " << Operands.size()
+         << " operands.\n";
+  
+  // Skip to end of line for now.
+  return false;
+}
diff --git a/tools/llvm-mc/AsmParser.h b/tools/llvm-mc/AsmParser.h
new file mode 100644
index 000000000000..82eb433b61ef
--- /dev/null
+++ b/tools/llvm-mc/AsmParser.h
@@ -0,0 +1,48 @@
+//===- AsmParser.h - Parser for Assembly Files ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class declares the parser for assembly files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ASMPARSER_H
+#define ASMPARSER_H
+
+#include "AsmLexer.h"
+
+namespace llvm {
+  
+class AsmParser {
+  AsmLexer Lexer;
+  struct X86Operand;
+  
+public:
+  AsmParser(SourceMgr &SM) : Lexer(SM) {}
+  ~AsmParser() {}
+  
+  bool Run();
+  
+private:
+  bool ParseStatement();
+  
+  bool Error(SMLoc L, const char *Msg);
+  bool TokError(const char *Msg);
+  
+  void EatToEndOfStatement();
+  
+  bool ParseX86Operand(X86Operand &Op);
+  bool ParseX86MemOperand(X86Operand &Op);
+  bool ParseExpression(int64_t &Res);
+  bool ParsePrimaryExpr(int64_t &Res);
+  bool ParseParenExpr(int64_t &Res);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/tools/llvm-mc/CMakeLists.txt b/tools/llvm-mc/CMakeLists.txt
new file mode 100644
index 000000000000..d8195e7418c3
--- /dev/null
+++ b/tools/llvm-mc/CMakeLists.txt
@@ -0,0 +1,7 @@
+set(LLVM_LINK_COMPONENTS support)
+
+add_llvm_tool(llvm-mc
+  llvm-mc.cpp
+  AsmLexer.cpp
+  AsmParser.cpp
+  )
diff --git a/tools/llvm-mc/Makefile b/tools/llvm-mc/Makefile
new file mode 100644
index 000000000000..7b4d94445656
--- /dev/null
+++ b/tools/llvm-mc/Makefile
@@ -0,0 +1,17 @@
+##===- tools/llvm-mc/Makefile ------------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TOOLNAME = llvm-mc
+LINK_COMPONENTS := support
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp
new file mode 100644
index 000000000000..52205c48d0fb
--- /dev/null
+++ b/tools/llvm-mc/llvm-mc.cpp
@@ -0,0 +1,161 @@
+//===-- llvm-mc.cpp - Machine Code Hacking Driver -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This utility is a simple driver that allows command line hacking on machine
+// code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Signals.h"
+#include "AsmParser.h"
+using namespace llvm;
+
+static cl::opt<std::string>
+InputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-"));
+
+static cl::opt<std::string>
+OutputFilename("o", cl::desc("Output filename"),
+               cl::value_desc("filename"));
+
+static cl::list<std::string>
+IncludeDirs("I", cl::desc("Directory of include files"),
+            cl::value_desc("directory"), cl::Prefix);
+
+enum ActionType {
+  AC_AsLex,
+  AC_Assemble
+};
+
+static cl::opt<ActionType>
+Action(cl::desc("Action to perform:"),
+       cl::init(AC_Assemble),
+       cl::values(clEnumValN(AC_AsLex, "as-lex",
+                             "Lex tokens from a .s file"),
+                  clEnumValN(AC_Assemble, "assemble",
+                             "Assemble a .s file (default)"),
+                  clEnumValEnd));
+
+static int AsLexInput(const char *ProgName) {
+  std::string ErrorMessage;
+  MemoryBuffer *Buffer = MemoryBuffer::getFileOrSTDIN(InputFilename,
+                                                      &ErrorMessage);
+  if (Buffer == 0) {
+    errs() << ProgName << ": ";
+    if (ErrorMessage.size())
+      errs() << ErrorMessage << "\n";
+    else
+      errs() << "input file didn't read correctly.\n";
+    return 1;
+  }
+
+  SourceMgr SrcMgr;
+  
+  // Tell SrcMgr about this buffer, which is what TGParser will pick up.
+  SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
+  
+  // Record the location of the include directories so that the lexer can find
+  // it later.
+  SrcMgr.setIncludeDirs(IncludeDirs);
+
+  AsmLexer Lexer(SrcMgr);
+  
+  bool Error = false;
+  
+  asmtok::TokKind Tok = Lexer.Lex();
+  while (Tok != asmtok::Eof) {
+    switch (Tok) {
+    default:
+      Lexer.PrintMessage(Lexer.getLoc(), "driver: unknown token");
+      Error = true;
+      break;
+    case asmtok::Error:
+      Error = true; // error already printed.
+      break;
+    case asmtok::Identifier:
+      outs() << "identifier: " << Lexer.getCurStrVal() << '\n';
+      break;
+    case asmtok::Register:
+      outs() << "register: " << Lexer.getCurStrVal() << '\n';
+      break;
+    case asmtok::String:
+      outs() << "string: " << Lexer.getCurStrVal() << '\n';
+      break;
+    case asmtok::IntVal:
+      outs() << "int: " << Lexer.getCurIntVal() << '\n';
+      break;
+    case asmtok::EndOfStatement: outs() << "EndOfStatement\n"; break;
+    case asmtok::Colon:  outs() << "Colon\n"; break;
+    case asmtok::Plus:   outs() << "Plus\n"; break;
+    case asmtok::Minus:  outs() << "Minus\n"; break;
+    case asmtok::Tilde:  outs() << "Tilde\n"; break;
+    case asmtok::Slash:  outs() << "Slash\n"; break;
+    case asmtok::LParen: outs() << "LParen\n"; break;
+    case asmtok::RParen: outs() << "RParen\n"; break;
+    case asmtok::Star:   outs() << "Star\n"; break;
+    case asmtok::Comma:  outs() << "Comma\n"; break;
+    case asmtok::Dollar: outs() << "Dollar\n"; break;
+    }
+    
+    Tok = Lexer.Lex();
+  }
+  
+  return Error;
+}
+
+static int AssembleInput(const char *ProgName) {
+  std::string ErrorMessage;
+  MemoryBuffer *Buffer = MemoryBuffer::getFileOrSTDIN(InputFilename,
+                                                      &ErrorMessage);
+  if (Buffer == 0) {
+    errs() << ProgName << ": ";
+    if (ErrorMessage.size())
+      errs() << ErrorMessage << "\n";
+    else
+      errs() << "input file didn't read correctly.\n";
+    return 1;
+  }
+  
+  SourceMgr SrcMgr;
+  
+  // Tell SrcMgr about this buffer, which is what TGParser will pick up.
+  SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
+  
+  // Record the location of the include directories so that the lexer can find
+  // it later.
+  SrcMgr.setIncludeDirs(IncludeDirs);
+  
+  AsmParser Parser(SrcMgr);
+  return Parser.Run();
+}  
+
+
+int main(int argc, char **argv) {
+  // Print a stack trace if we signal out.
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+  cl::ParseCommandLineOptions(argc, argv, "llvm machine code playground\n");
+
+  switch (Action) {
+  default:
+  case AC_AsLex:
+    return AsLexInput(argv[0]);
+  case AC_Assemble:
+    return AssembleInput(argv[0]);
+  }
+  
+  return 0;
+}
+
diff --git a/tools/llvmc/doc/LLVMC-Reference.rst b/tools/llvmc/doc/LLVMC-Reference.rst
index d99fa0cc6f60..7befe8f02d97 100644
--- a/tools/llvmc/doc/LLVMC-Reference.rst
+++ b/tools/llvmc/doc/LLVMC-Reference.rst
@@ -33,7 +33,7 @@ example, as a build tool for game resources.
 Because LLVMC employs TableGen_ as its configuration language, you
 need to be familiar with it to customize LLVMC.
 
-.. _TableGen: http://llvm.cs.uiuc.edu/docs/TableGenFundamentals.html
+.. _TableGen: http://llvm.org/docs/TableGenFundamentals.html
 
 
 Compiling with LLVMC
@@ -48,12 +48,12 @@ you shouldn't be able to notice them::
      $ ./a.out
      hello
 
-One nice feature of LLVMC is that one doesn't have to distinguish
-between different compilers for different languages (think ``g++`` and
-``gcc``) - the right toolchain is chosen automatically based on input
-language names (which are, in turn, determined from file
-extensions). If you want to force files ending with ".c" to compile as
-C++, use the ``-x`` option, just like you would do it with ``gcc``::
+One nice feature of LLVMC is that one doesn't have to distinguish between
+different compilers for different languages (think ``g++`` vs.  ``gcc``) - the
+right toolchain is chosen automatically based on input language names (which
+are, in turn, determined from file extensions). If you want to force files
+ending with ".c" to compile as C++, use the ``-x`` option, just like you would
+do it with ``gcc``::
 
       $ # hello.c is really a C++ file
       $ llvmc -x c++ hello.c
@@ -94,9 +94,9 @@ configuration libraries:
 
 * ``--check-graph`` - Check the compilation for common errors like mismatched
   output/input language names, multiple default edges and cycles. Because of
-  plugins, these checks can't be performed at compile-time. Exit with code zero if
-  no errors were found, and return the number of found errors otherwise. Hidden
-  option, useful for debugging LLVMC plugins.
+  plugins, these checks can't be performed at compile-time. Exit with code zero
+  if no errors were found, and return the number of found errors
+  otherwise. Hidden option, useful for debugging LLVMC plugins.
 
 * ``--view-graph`` - Show a graphical representation of the compilation graph
   and exit. Requires that you have ``dot`` and ``gv`` programs installed. Hidden
@@ -104,8 +104,9 @@ configuration libraries:
 
 * ``--write-graph`` - Write a ``compilation-graph.dot`` file in the current
   directory with the compilation graph description in Graphviz format (identical
-  to the file used by the ``--view-graph`` option). The ``-o`` option can be used
-  to set the output file name. Hidden option, useful for debugging LLVMC plugins.
+  to the file used by the ``--view-graph`` option). The ``-o`` option can be
+  used to set the output file name. Hidden option, useful for debugging LLVMC
+  plugins.
 
 * ``--save-temps`` - Write temporary files to the current directory
   and do not delete them on exit. Hidden option, useful for debugging.
@@ -113,7 +114,6 @@ configuration libraries:
 * ``--help``, ``--help-hidden``, ``--version`` - These options have
   their standard meaning.
 
-
 Compiling LLVMC plugins
 =======================
 
@@ -146,29 +146,55 @@ generic::
 
    $ mv Simple.td MyPlugin.td
 
-Note that the plugin source directory must be placed under
-``$LLVMC_DIR/plugins`` to make use of the existing build
-infrastructure. To build a version of the LLVMC executable called
-``mydriver`` with your plugin compiled in, use the following command::
-
-   $ cd $LLVMC_DIR
-   $ make BUILTIN_PLUGINS=MyPlugin DRIVER_NAME=mydriver
-
 To build your plugin as a dynamic library, just ``cd`` to its source
 directory and run ``make``. The resulting file will be called
-``LLVMC$(LLVMC_PLUGIN).$(DLL_EXTENSION)`` (in our case,
-``LLVMCMyPlugin.so``). This library can be then loaded in with the
+``plugin_llvmc_$(LLVMC_PLUGIN).$(DLL_EXTENSION)`` (in our case,
+``plugin_llvmc_MyPlugin.so``). This library can be then loaded in with the
 ``-load`` option. Example::
 
     $ cd $LLVMC_DIR/plugins/Simple
     $ make
-    $ llvmc -load $LLVM_DIR/Release/lib/LLVMCSimple.so
+    $ llvmc -load $LLVM_DIR/Release/lib/plugin_llvmc_Simple.so
+
+Compiling standalone LLVMC-based drivers
+========================================
+
+By default, the ``llvmc`` executable consists of a driver core plus several
+statically linked plugins (``Base`` and ``Clang`` at the moment). You can
+produce a standalone LLVMC-based driver executable by linking the core with your
+own plugins. The recommended way to do this is by starting with the provided
+``Skeleton`` example (``$LLVMC_DIR/example/Skeleton``)::
+
+    $ cd $LLVMC_DIR/example/
+    $ cp -r Skeleton mydriver
+    $ cd mydriver
+    $ vim Makefile
+    [...]
+    $ make
+
+If you're compiling LLVM with different source and object directories, then you
+must perform the following additional steps before running ``make``::
+
+    # LLVMC_SRC_DIR = $LLVM_SRC_DIR/tools/llvmc/
+    # LLVMC_OBJ_DIR = $LLVM_OBJ_DIR/tools/llvmc/
+    $ cp $LLVMC_SRC_DIR/example/mydriver/Makefile \
+      $LLVMC_OBJ_DIR/example/mydriver/
+    $ cd $LLVMC_OBJ_DIR/example/mydriver
+    $ make
+
+Another way to do the same thing is by using the following command::
+
+    $ cd $LLVMC_DIR
+    $ make LLVMC_BUILTIN_PLUGINS=MyPlugin LLVMC_BASED_DRIVER_NAME=mydriver
+
+This works with both srcdir == objdir and srcdir != objdir, but assumes that the
+plugin source directory was placed under ``$LLVMC_DIR/plugins``.
 
 Sometimes, you will want a 'bare-bones' version of LLVMC that has no
 built-in plugins. It can be compiled with the following command::
 
     $ cd $LLVMC_DIR
-    $ make BUILTIN_PLUGINS=""
+    $ make LLVMC_BUILTIN_PLUGINS=""
 
 
 Customizing LLVMC: the compilation graph
diff --git a/tools/llvmc/doc/LLVMC-Tutorial.rst b/tools/llvmc/doc/LLVMC-Tutorial.rst
index 6f0647784245..e7e8f081e0f1 100644
--- a/tools/llvmc/doc/LLVMC-Tutorial.rst
+++ b/tools/llvmc/doc/LLVMC-Tutorial.rst
@@ -46,23 +46,28 @@ Using LLVMC to generate toolchain drivers
 LLVMC plugins are written mostly using TableGen_, so you need to
 be familiar with it to get anything done.
 
-.. _TableGen: http://llvm.cs.uiuc.edu/docs/TableGenFundamentals.html
+.. _TableGen: http://llvm.org/docs/TableGenFundamentals.html
 
-Start by compiling ``plugins/Simple/Simple.td``, which is a primitive
-wrapper for ``gcc``::
+Start by compiling ``example/Simple``, which is a primitive wrapper for
+``gcc``::
 
     $ cd $LLVM_DIR/tools/llvmc
-    $ make DRIVER_NAME=mygcc BUILTIN_PLUGINS=Simple
+    $ cp -r example/Simple plugins/Simple
+
+      # NB: A less verbose way to compile standalone LLVMC-based drivers is
+      # described in the reference manual.
+
+    $ make LLVMC_BASED_DRIVER_NAME=mygcc LLVMC_BUILTIN_PLUGINS=Simple
     $ cat > hello.c
     [...]
     $ mygcc hello.c
     $ ./hello.out
     Hello
 
-Here we link our plugin with the LLVMC core statically to form an
-executable file called ``mygcc``. It is also possible to build our
-plugin as a standalone dynamic library; this is described in the
-reference manual.
+Here we link our plugin with the LLVMC core statically to form an executable
+file called ``mygcc``. It is also possible to build our plugin as a dynamic
+library to be loaded by the ``llvmc`` executable (or any other LLVMC-based
+standalone driver); this is described in the reference manual.
 
 Contents of the file ``Simple.td`` look like this::
 
diff --git a/tools/llvmc/driver/Makefile b/tools/llvmc/driver/Makefile
index 3dd373a51ed2..5f5ec533dd0d 100644
--- a/tools/llvmc/driver/Makefile
+++ b/tools/llvmc/driver/Makefile
@@ -10,10 +10,10 @@
 LEVEL = ../../..
 
 TOOLNAME = $(LLVMC_BASED_DRIVER_NAME)
-LLVMLIBS = CompilerDriver
+LLVMLIBS = CompilerDriver.a
 
 ifneq ($(LLVMC_BUILTIN_PLUGINS),)
-USEDLIBS += $(patsubst %,plugin_llvmc_%,$(LLVMC_BUILTIN_PLUGINS))
+USEDLIBS += $(patsubst %,plugin_llvmc_%.a,$(LLVMC_BUILTIN_PLUGINS))
 endif
 
 LINK_COMPONENTS = support system
diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp
index 6f8a02867c37..11e0e5551741 100644
--- a/tools/lto/LTOCodeGenerator.cpp
+++ b/tools/lto/LTOCodeGenerator.cpp
@@ -16,13 +16,18 @@
 #include "LTOCodeGenerator.h"
 
 
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
-#include "llvm/Linker.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/Linker.h"
+#include "llvm/Module.h"
 #include "llvm/ModuleProvider.h"
+#include "llvm/PassManager.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/Verifier.h"
 #include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/CodeGen/FileWriters.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Mangler.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -30,25 +35,21 @@
 #include "llvm/Support/SystemUtils.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Signals.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/CodeGen/FileWriters.h"
 #include "llvm/Target/SubtargetFeature.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetAsmInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetMachineRegistry.h"
-#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetSelect.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/Config/config.h"
 
 
+#include <cstdlib>
 #include <fstream>
 #include <unistd.h>
-#include <stdlib.h>
 #include <fcntl.h>
 
 
@@ -74,6 +75,8 @@ LTOCodeGenerator::LTOCodeGenerator()
       _codeModel(LTO_CODEGEN_PIC_MODEL_DYNAMIC),
       _nativeObjectFile(NULL), _gccPath(NULL), _assemblerPath(NULL)
 {
+  InitializeAllTargets();
+  InitializeAllAsmPrinters();
 
 }
 
diff --git a/utils/NewNightlyTest.pl b/utils/NewNightlyTest.pl
index feac974be871..00d4038d8ad9 100755
--- a/utils/NewNightlyTest.pl
+++ b/utils/NewNightlyTest.pl
@@ -1,6 +1,7 @@
 #!/usr/bin/perl
 use POSIX qw(strftime);
 use File::Copy;
+use File::Find;
 use Socket;
 
 #
@@ -99,6 +100,8 @@ use Socket;
 my $HOME       = $ENV{'HOME'};
 my $SVNURL     = $ENV{"SVNURL"};
 $SVNURL        = 'http://llvm.org/svn/llvm-project' unless $SVNURL;
+my $TestSVNURL = $ENV{"TestSVNURL"};
+$TestSVNURL    = 'https://llvm.org/svn/llvm-project' unless $TestSVNURL;
 my $CVSRootDir = $ENV{'CVSROOT'};
 $CVSRootDir    = "/home/vadve/shared/PublicCVS" unless $CVSRootDir;
 my $BuildDir   = $ENV{'BUILDDIR'};
@@ -537,19 +540,20 @@ if (!$NOCHECKOUT) {
   if ( $VERBOSE ) { print "CHECKOUT STAGE:\n"; }
   if ($USESVN) {
       my $SVNCMD = "$NICE svn co --non-interactive $SVNURL";
+      my $SVNCMD2 = "$NICE svn co --non-interactive $TestSVNURL";
       if ($VERBOSE) {
         print "( time -p $SVNCMD/llvm/trunk llvm; cd llvm/projects ; " .
-              "$SVNCMD/test-suite/trunk llvm-test ) > $COLog 2>&1\n";
+              "$SVNCMD2/test-suite/trunk llvm-test ) > $COLog 2>&1\n";
       }
       system "( time -p $SVNCMD/llvm/trunk llvm; cd llvm/projects ; " .
-            "$SVNCMD/test-suite/trunk llvm-test ) > $COLog 2>&1\n";
-	if ($WITHCLANG) {
-	  my $SVNCMD = "$NICE svn co --non-interactive $SVNURL/cfe/trunk";
-	  if ($VERBOSE) {
-	   print "( time -p cd llvm/tools ; $SVNCMD clang ) > $COLog 2>&1\n"; 
-	}
-	system "( time -p cd llvm/tools ; $SVNCMD clang ) > $COLog 2>&1\n";
-	} 
+            "$SVNCMD2/test-suite/trunk llvm-test ) > $COLog 2>&1\n";
+      if ($WITHCLANG) {
+        my $SVNCMD = "$NICE svn co --non-interactive $SVNURL/cfe/trunk";
+        if ($VERBOSE) {
+          print "( time -p cd llvm/tools ; $SVNCMD clang ) > $COLog 2>&1\n"; 
+        }
+          system "( time -p cd llvm/tools ; $SVNCMD clang ) > $COLog 2>&1\n";
+      }
   } else {
     my $CVSOPT = "";
     $CVSOPT = "-z3" # Use compression if going over ssh.
@@ -784,40 +788,20 @@ if (!$BuildError) {
   print "Organizing size of .o and .a files\n"
     if ( $VERBOSE );
   ChangeDir( "$BuildDir/llvm", "Build Directory" );
-  $afiles.= `find utils/ -iname '*.a' -ls`;
-  $afiles.= `find lib/ -iname '*.a' -ls`;
-  $afiles.= `find tools/ -iname '*.a' -ls`;
-  if($BUILDTYPE eq "release"){
-    $afiles.= `find Release/ -iname '*.a' -ls`;
-  } elsif($BUILDTYPE eq "release-asserts") {
-   $afiles.= `find Release-Asserts/ -iname '*.a' -ls`;
-  } else {
-   $afiles.= `find Debug/ -iname '*.a' -ls`;
-  }
 
-  $ofiles.= `find utils/ -iname '*.o' -ls`;
-  $ofiles.= `find lib/ -iname '*.o' -ls`;
-  $ofiles.= `find tools/ -iname '*.o' -ls`;
+  my @dirs = ('utils', 'lib', 'tools');
   if($BUILDTYPE eq "release"){
-    $ofiles.= `find Release/ -iname '*.o' -ls`;
+    push @dirs, 'Release';
   } elsif($BUILDTYPE eq "release-asserts") {
-    $ofiles.= `find Release-Asserts/ -iname '*.o' -ls`;
+    push @dirs, 'Release-Asserts';
   } else {
-    $ofiles.= `find Debug/ -iname '*.o' -ls`;
+    push @dirs, 'Debug';
   }
 
-  @AFILES = split "\n", $afiles;
-  $a_file_sizes="";
-  foreach $x (@AFILES){
-    $x =~ m/.+\s+.+\s+.+\s+.+\s+.+\s+.+\s+(.+)\s+.+\s+.+\s+.+\s+(.+)/;
-    $a_file_sizes.="$1 $2 $BUILDTYPE\n";
-  }
-  @OFILES = split "\n", $ofiles;
-  $o_file_sizes="";
-  foreach $x (@OFILES){
-    $x =~ m/.+\s+.+\s+.+\s+.+\s+.+\s+.+\s+(.+)\s+.+\s+.+\s+.+\s+(.+)/;
-    $o_file_sizes.="$1 $2 $BUILDTYPE\n";
-  }
+  find(sub {
+      $a_file_sizes .= (-s $_)." $File::Find::name $BUILDTYPE\n" if /\.a$/i;
+      $o_file_sizes .= (-s $_)." $File::Find::name $BUILDTYPE\n" if /\.o$/i;
+    }, @dirs);
 } else {
   $a_file_sizes="No data due to a bad build.";
   $o_file_sizes="No data due to a bad build.";
diff --git a/utils/TableGen/AsmWriterEmitter.cpp b/utils/TableGen/AsmWriterEmitter.cpp
index c615abad5068..183c6912bae9 100644
--- a/utils/TableGen/AsmWriterEmitter.cpp
+++ b/utils/TableGen/AsmWriterEmitter.cpp
@@ -651,6 +651,8 @@ void AsmWriterEmitter::run(std::ostream &O) {
 
   O << "  processDebugLoc(MI->getDebugLoc());\n\n";
 
+  O << "\n#ifndef NO_ASM_WRITER_BOILERPLATE\n";
+  
   O << "  if (MI->getOpcode() == TargetInstrInfo::INLINEASM) {\n"
     << "    O << \"\\t\";\n"
     << "    printInlineAsm(MI);\n"
@@ -665,7 +667,9 @@ void AsmWriterEmitter::run(std::ostream &O) {
     << "    printImplicitDef(MI);\n"
     << "    return true;\n"
     << "  }\n\n";
-  
+
+  O << "\n#endif\n";
+
   O << "  O << \"\\t\";\n\n";
 
   O << "  // Emit the opcode for the instruction.\n"
diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt
index 3f982e0c77ae..6ec1d9967922 100644
--- a/utils/TableGen/CMakeLists.txt
+++ b/utils/TableGen/CMakeLists.txt
@@ -17,7 +17,6 @@ add_executable(tblgen
   SubtargetEmitter.cpp
   TGLexer.cpp
   TGParser.cpp
-  TGSourceMgr.cpp
   TGValueTypes.cpp
   TableGen.cpp
   TableGenBackend.cpp
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
index e668468772c6..839059db3aa9 100644
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -720,7 +720,7 @@ TreePatternNode *TreePatternNode::InlinePatternFragments(TreePattern &TP) {
 }
 
 /// getImplicitType - Check to see if the specified record has an implicit
-/// type which should be applied to it.  This infer the type of register
+/// type which should be applied to it.  This will infer the type of register
 /// references from the register file information, for example.
 ///
 static std::vector<unsigned char> getImplicitType(Record *R, bool NotRegisters,
@@ -833,8 +833,8 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
                          getEnumName(getTypeNum(0)) + "'!");
               }
             }
-         }
-       }
+          }
+        }
       }
       
       return MadeChange;
diff --git a/utils/TableGen/Record.h b/utils/TableGen/Record.h
index ac06cae809db..5f45ea09cafa 100644
--- a/utils/TableGen/Record.h
+++ b/utils/TableGen/Record.h
@@ -15,7 +15,7 @@
 #ifndef RECORD_H
 #define RECORD_H
 
-#include "TGSourceMgr.h"
+#include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/DataTypes.h"
 #include <map>
 #include <ostream>
@@ -1214,19 +1214,19 @@ inline std::ostream &operator<<(std::ostream &OS, const RecordVal &RV) {
 
 class Record {
   std::string Name;
-  TGLoc Loc;
+  SMLoc Loc;
   std::vector<std::string> TemplateArgs;
   std::vector<RecordVal> Values;
   std::vector<Record*> SuperClasses;
 public:
 
-  explicit Record(const std::string &N, TGLoc loc) : Name(N), Loc(loc) {}
+  explicit Record(const std::string &N, SMLoc loc) : Name(N), Loc(loc) {}
   ~Record() {}
   
   const std::string &getName() const { return Name; }
   void setName(const std::string &Name);  // Also updates RecordKeeper.
   
-  TGLoc getLoc() const { return Loc; }
+  SMLoc getLoc() const { return Loc; }
   
   const std::vector<std::string> &getTemplateArgs() const {
     return TemplateArgs;
@@ -1381,7 +1381,7 @@ struct MultiClass {
 
   void dump() const;
 
-  MultiClass(const std::string &Name, TGLoc Loc) : Rec(Name, Loc) {}
+  MultiClass(const std::string &Name, SMLoc Loc) : Rec(Name, Loc) {}
 };
 
 class RecordKeeper {
@@ -1461,12 +1461,12 @@ struct LessRecordFieldName {
 
 
 class TGError {
-  TGLoc Loc;
+  SMLoc Loc;
   std::string Message;
 public:
-  TGError(TGLoc loc, const std::string &message) : Loc(loc), Message(message) {}
+  TGError(SMLoc loc, const std::string &message) : Loc(loc), Message(message) {}
   
-  TGLoc getLoc() const { return Loc; }
+  SMLoc getLoc() const { return Loc; }
   const std::string &getMessage() const { return Message; }
 };
   
@@ -1475,7 +1475,7 @@ std::ostream &operator<<(std::ostream &OS, const RecordKeeper &RK);
 
 extern RecordKeeper Records;
 
-void PrintError(TGLoc ErrorLoc, const std::string &Msg);
+void PrintError(SMLoc ErrorLoc, const std::string &Msg);
 
   
 } // End llvm namespace
diff --git a/utils/TableGen/TGLexer.cpp b/utils/TableGen/TGLexer.cpp
index 758d499a8b5f..6fe8d821e5f3 100644
--- a/utils/TableGen/TGLexer.cpp
+++ b/utils/TableGen/TGLexer.cpp
@@ -12,10 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "TGLexer.h"
-#include "TGSourceMgr.h"
+#include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/Streams.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include <ostream>
 #include "llvm/Config/config.h"
 #include <cctype>
 #include <cstdio>
@@ -24,15 +23,15 @@
 #include <cerrno>
 using namespace llvm;
 
-TGLexer::TGLexer(TGSourceMgr &SM) : SrcMgr(SM) {
+TGLexer::TGLexer(SourceMgr &SM) : SrcMgr(SM) {
   CurBuffer = 0;
   CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
   CurPtr = CurBuf->getBufferStart();
   TokStart = 0;
 }
 
-TGLoc TGLexer::getLoc() const {
-  return TGLoc::getFromPointer(TokStart);
+SMLoc TGLexer::getLoc() const {
+  return SMLoc::getFromPointer(TokStart);
 }
 
 
@@ -45,11 +44,11 @@ tgtok::TokKind TGLexer::ReturnError(const char *Loc, const std::string &Msg) {
 
 
 void TGLexer::PrintError(const char *Loc, const std::string &Msg) const {
-  SrcMgr.PrintError(TGLoc::getFromPointer(Loc), Msg);
+  SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), Msg);
 }
 
-void TGLexer::PrintError(TGLoc Loc, const std::string &Msg) const {
-  SrcMgr.PrintError(Loc, Msg);
+void TGLexer::PrintError(SMLoc Loc, const std::string &Msg) const {
+  SrcMgr.PrintMessage(Loc, Msg);
 }
 
 
@@ -66,8 +65,8 @@ int TGLexer::getNextChar() {
     
     // If this is the end of an included file, pop the parent file off the
     // include stack.
-    TGLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
-    if (ParentIncludeLoc != TGLoc()) {
+    SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
+    if (ParentIncludeLoc != SMLoc()) {
       CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
       CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
       CurPtr = ParentIncludeLoc.getPointer();
@@ -278,24 +277,15 @@ bool TGLexer::LexInclude() {
   // Get the string.
   std::string Filename = CurStrVal;
 
-  // Try to find the file.
-  MemoryBuffer *NewBuf = MemoryBuffer::getFile(Filename.c_str());
-
-  // If the file didn't exist directly, see if it's in an include path.
-  for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBuf; ++i) {
-    std::string IncFile = IncludeDirectories[i] + "/" + Filename;
-    NewBuf = MemoryBuffer::getFile(IncFile.c_str());
-  }
-    
-  if (NewBuf == 0) {
+  
+  CurBuffer = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr));
+  if (CurBuffer == -1) {
     PrintError(getLoc(), "Could not find include file '" + Filename + "'");
     return true;
   }
   
   // Save the line number and lex buffer of the includer.
-  CurBuffer = SrcMgr.AddNewSourceBuffer(NewBuf, TGLoc::getFromPointer(CurPtr));
-  
-  CurBuf = NewBuf;
+  CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
   CurPtr = CurBuf->getBufferStart();
   return false;
 }
@@ -362,19 +352,19 @@ tgtok::TokKind TGLexer::LexNumber() {
       
       // Requires at least one hex digit.
       if (CurPtr == NumStart)
-        return ReturnError(CurPtr-2, "Invalid hexadecimal number");
+        return ReturnError(TokStart, "Invalid hexadecimal number");
 
       errno = 0;
       CurIntVal = strtoll(NumStart, 0, 16);
       if (errno == EINVAL)
-        return ReturnError(CurPtr-2, "Invalid hexadecimal number");
+        return ReturnError(TokStart, "Invalid hexadecimal number");
       if (errno == ERANGE) {
         errno = 0;
         CurIntVal = (int64_t)strtoull(NumStart, 0, 16);
         if (errno == EINVAL)
-          return ReturnError(CurPtr-2, "Invalid hexadecimal number");
+          return ReturnError(TokStart, "Invalid hexadecimal number");
         if (errno == ERANGE)
-          return ReturnError(CurPtr-2, "Hexadecimal number out of range");
+          return ReturnError(TokStart, "Hexadecimal number out of range");
       }
       return tgtok::IntVal;
     } else if (CurPtr[0] == 'b') {
diff --git a/utils/TableGen/TGLexer.h b/utils/TableGen/TGLexer.h
index ac3b9840039b..80405ac0bf4a 100644
--- a/utils/TableGen/TGLexer.h
+++ b/utils/TableGen/TGLexer.h
@@ -17,13 +17,12 @@
 #include "llvm/Support/DataTypes.h"
 #include <vector>
 #include <string>
-#include <iosfwd>
 #include <cassert>
 
 namespace llvm {
 class MemoryBuffer;
-class TGSourceMgr;
-class TGLoc;
+class SourceMgr;
+class SMLoc;
   
 namespace tgtok {
   enum TokKind {
@@ -58,7 +57,7 @@ namespace tgtok {
 
 /// TGLexer - TableGen Lexer class.
 class TGLexer {
-  TGSourceMgr &SrcMgr;
+  SourceMgr &SrcMgr;
   
   const char *CurPtr;
   const MemoryBuffer *CurBuf;
@@ -73,17 +72,10 @@ class TGLexer {
   /// by the SourceMgr object.
   int CurBuffer;
   
-  // IncludeDirectories - This is the list of directories we should search for
-  // include files in.
-  std::vector<std::string> IncludeDirectories;
 public:
-  TGLexer(TGSourceMgr &SrcMgr);
+  TGLexer(SourceMgr &SrcMgr);
   ~TGLexer() {}
   
-  void setIncludeDirs(const std::vector<std::string> &Dirs) {
-    IncludeDirectories = Dirs;
-  }
-  
   tgtok::TokKind Lex() {
     return CurCode = LexToken();
   }
@@ -101,10 +93,10 @@ public:
     return CurIntVal;
   }
 
-  TGLoc getLoc() const;
+  SMLoc getLoc() const;
 
   void PrintError(const char *Loc, const std::string &Msg) const;
-  void PrintError(TGLoc Loc, const std::string &Msg) const;
+  void PrintError(SMLoc Loc, const std::string &Msg) const;
   
 private:
   /// LexToken - Read the next token and return its code.
diff --git a/utils/TableGen/TGParser.cpp b/utils/TableGen/TGParser.cpp
index cdd285703be0..28ebdb58c0f0 100644
--- a/utils/TableGen/TGParser.cpp
+++ b/utils/TableGen/TGParser.cpp
@@ -26,7 +26,7 @@ using namespace llvm;
 
 namespace llvm {
 struct SubClassReference {
-  TGLoc RefLoc;
+  SMLoc RefLoc;
   Record *Rec;
   std::vector<Init*> TemplateArgs;
   SubClassReference() : Rec(0) {}
@@ -35,7 +35,7 @@ struct SubClassReference {
 };
 
 struct SubMultiClassReference {
-  TGLoc RefLoc;
+  SMLoc RefLoc;
   MultiClass *MC;
   std::vector<Init*> TemplateArgs;
   SubMultiClassReference() : MC(0) {}
@@ -60,7 +60,7 @@ void SubMultiClassReference::dump() const {
 
 } // end namespace llvm
 
-bool TGParser::AddValue(Record *CurRec, TGLoc Loc, const RecordVal &RV) {
+bool TGParser::AddValue(Record *CurRec, SMLoc Loc, const RecordVal &RV) {
   if (CurRec == 0)
     CurRec = &CurMultiClass->Rec;
   
@@ -79,7 +79,7 @@ bool TGParser::AddValue(Record *CurRec, TGLoc Loc, const RecordVal &RV) {
 
 /// SetValue -
 /// Return true on error, false on success.
-bool TGParser::SetValue(Record *CurRec, TGLoc Loc, const std::string &ValName, 
+bool TGParser::SetValue(Record *CurRec, SMLoc Loc, const std::string &ValName, 
                         const std::vector<unsigned> &BitList, Init *V) {
   if (!V) return false;
 
@@ -527,7 +527,7 @@ bool TGParser::ParseOptionalRangeList(std::vector<unsigned> &Ranges) {
   if (Lex.getCode() != tgtok::less)
     return false;
   
-  TGLoc StartLoc = Lex.getLoc();
+  SMLoc StartLoc = Lex.getLoc();
   Lex.Lex(); // eat the '<'
   
   // Parse the range list.
@@ -549,7 +549,7 @@ bool TGParser::ParseOptionalBitList(std::vector<unsigned> &Ranges) {
   if (Lex.getCode() != tgtok::l_brace)
     return false;
   
-  TGLoc StartLoc = Lex.getLoc();
+  SMLoc StartLoc = Lex.getLoc();
   Lex.Lex(); // eat the '{'
   
   // Parse the range list.
@@ -634,7 +634,7 @@ RecTy *TGParser::ParseType() {
 Init *TGParser::ParseIDValue(Record *CurRec) {
   assert(Lex.getCode() == tgtok::Id && "Expected ID in ParseIDValue");
   std::string Name = Lex.getCurStrVal();
-  TGLoc Loc = Lex.getLoc();
+  SMLoc Loc = Lex.getLoc();
   Lex.Lex();
   return ParseIDValue(CurRec, Name, Loc);
 }
@@ -642,7 +642,7 @@ Init *TGParser::ParseIDValue(Record *CurRec) {
 /// ParseIDValue - This is just like ParseIDValue above, but it assumes the ID
 /// has already been read.
 Init *TGParser::ParseIDValue(Record *CurRec, 
-                             const std::string &Name, TGLoc NameLoc) {
+                             const std::string &Name, SMLoc NameLoc) {
   if (CurRec) {
     if (const RecordVal *RV = CurRec->getValue(Name))
       return new VarInit(Name, RV->getType());
@@ -1041,7 +1041,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType) {
       R = new CodeInit(Lex.getCurStrVal()); Lex.Lex(); break;
   case tgtok::question: R = new UnsetInit(); Lex.Lex(); break;
   case tgtok::Id: {
-    TGLoc NameLoc = Lex.getLoc();
+    SMLoc NameLoc = Lex.getLoc();
     std::string Name = Lex.getCurStrVal();
     if (Lex.Lex() != tgtok::less)  // consume the Id.
       return ParseIDValue(CurRec, Name, NameLoc);    // Value ::= IDValue
@@ -1087,7 +1087,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType) {
     return new DefInit(NewRec);
   }    
   case tgtok::l_brace: {           // Value ::= '{' ValueList '}'
-    TGLoc BraceLoc = Lex.getLoc();
+    SMLoc BraceLoc = Lex.getLoc();
     Lex.Lex(); // eat the '{'
     std::vector<Init*> Vals;
     
@@ -1295,7 +1295,7 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType) {
     switch (Lex.getCode()) {
     default: return Result;
     case tgtok::l_brace: {
-      TGLoc CurlyLoc = Lex.getLoc();
+      SMLoc CurlyLoc = Lex.getLoc();
       Lex.Lex(); // eat the '{'
       std::vector<unsigned> Ranges = ParseRangeList();
       if (Ranges.empty()) return 0;
@@ -1317,7 +1317,7 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType) {
       break;
     }
     case tgtok::l_square: {
-      TGLoc SquareLoc = Lex.getLoc();
+      SMLoc SquareLoc = Lex.getLoc();
       Lex.Lex(); // eat the '['
       std::vector<unsigned> Ranges = ParseRangeList();
       if (Ranges.empty()) return 0;
@@ -1449,7 +1449,7 @@ std::string TGParser::ParseDeclaration(Record *CurRec,
     return "";
   }
   
-  TGLoc IdLoc = Lex.getLoc();
+  SMLoc IdLoc = Lex.getLoc();
   std::string DeclName = Lex.getCurStrVal();
   Lex.Lex();
   
@@ -1470,7 +1470,7 @@ std::string TGParser::ParseDeclaration(Record *CurRec,
   // If a value is present, parse it.
   if (Lex.getCode() == tgtok::equal) {
     Lex.Lex();
-    TGLoc ValLoc = Lex.getLoc();
+    SMLoc ValLoc = Lex.getLoc();
     Init *Val = ParseValue(CurRec, Type);
     if (Val == 0 ||
         SetValue(CurRec, ValLoc, DeclName, std::vector<unsigned>(), Val))
@@ -1536,7 +1536,7 @@ bool TGParser::ParseBodyItem(Record *CurRec) {
   if (Lex.Lex() != tgtok::Id)
     return TokError("expected field identifier after let");
   
-  TGLoc IdLoc = Lex.getLoc();
+  SMLoc IdLoc = Lex.getLoc();
   std::string FieldName = Lex.getCurStrVal();
   Lex.Lex();  // eat the field name.
   
@@ -1640,7 +1640,7 @@ bool TGParser::ParseObjectBody(Record *CurRec) {
 ///   DefInst ::= DEF ObjectName ObjectBody
 ///
 llvm::Record *TGParser::ParseDef(MultiClass *CurMultiClass) {
-  TGLoc DefLoc = Lex.getLoc();
+  SMLoc DefLoc = Lex.getLoc();
   assert(Lex.getCode() == tgtok::Def && "Unknown tok");
   Lex.Lex();  // Eat the 'def' token.  
 
@@ -1728,7 +1728,7 @@ std::vector<LetRecord> TGParser::ParseLetList() {
       return std::vector<LetRecord>();
     }
     std::string Name = Lex.getCurStrVal();
-    TGLoc NameLoc = Lex.getLoc();
+    SMLoc NameLoc = Lex.getLoc();
     Lex.Lex();  // Eat the identifier. 
 
     // Check for an optional RangeList.
@@ -1780,7 +1780,7 @@ bool TGParser::ParseTopLevelLet() {
     if (ParseObject())
       return true;
   } else {   // Object ::= LETCommand '{' ObjectList '}'
-    TGLoc BraceLoc = Lex.getLoc();
+    SMLoc BraceLoc = Lex.getLoc();
     // Otherwise, this is a group let.
     Lex.Lex();  // eat the '{'.
     
@@ -1905,7 +1905,7 @@ bool TGParser::ParseDefm() {
   if (Lex.Lex() != tgtok::Id)  // eat the defm.
     return TokError("expected identifier after defm");
   
-  TGLoc DefmPrefixLoc = Lex.getLoc();
+  SMLoc DefmPrefixLoc = Lex.getLoc();
   std::string DefmPrefix = Lex.getCurStrVal();
   if (Lex.Lex() != tgtok::colon)
     return TokError("expected ':' after defm identifier");
@@ -1913,7 +1913,7 @@ bool TGParser::ParseDefm() {
   // eat the colon.
   Lex.Lex();
 
-  TGLoc SubClassLoc = Lex.getLoc();
+  SMLoc SubClassLoc = Lex.getLoc();
   SubClassReference Ref = ParseSubClassReference(0, true);
 
   while (1) {
diff --git a/utils/TableGen/TGParser.h b/utils/TableGen/TGParser.h
index 3af467d16ab0..9f4b63460647 100644
--- a/utils/TableGen/TGParser.h
+++ b/utils/TableGen/TGParser.h
@@ -15,7 +15,7 @@
 #define TGPARSER_H
 
 #include "TGLexer.h"
-#include "TGSourceMgr.h"
+#include "llvm/Support/SourceMgr.h"
 #include <map>
 
 namespace llvm {
@@ -31,9 +31,9 @@ namespace llvm {
     std::string Name;
     std::vector<unsigned> Bits;
     Init *Value;
-    TGLoc Loc;
+    SMLoc Loc;
     LetRecord(const std::string &N, const std::vector<unsigned> &B, Init *V,
-              TGLoc L)
+              SMLoc L)
       : Name(N), Bits(B), Value(V), Loc(L) {
     }
   };
@@ -47,15 +47,13 @@ class TGParser {
   /// current value.
   MultiClass *CurMultiClass;
 public:
-  TGParser(TGSourceMgr &SrcMgr) : Lex(SrcMgr), CurMultiClass(0) {}
+  TGParser(SourceMgr &SrcMgr) : Lex(SrcMgr), CurMultiClass(0) {}
   
-  void setIncludeDirs(const std::vector<std::string> &D){Lex.setIncludeDirs(D);}
-
   /// ParseFile - Main entrypoint for parsing a tblgen file.  These parser
   /// routines return true on error, or false on success.
   bool ParseFile();
   
-  bool Error(TGLoc L, const std::string &Msg) const {
+  bool Error(SMLoc L, const std::string &Msg) const {
     Lex.PrintError(L, Msg);
     return true;
   }
@@ -63,8 +61,8 @@ public:
     return Error(Lex.getLoc(), Msg);
   }
 private:  // Semantic analysis methods.
-  bool AddValue(Record *TheRec, TGLoc Loc, const RecordVal &RV);
-  bool SetValue(Record *TheRec, TGLoc Loc, const std::string &ValName, 
+  bool AddValue(Record *TheRec, SMLoc Loc, const RecordVal &RV);
+  bool SetValue(Record *TheRec, SMLoc Loc, const std::string &ValName, 
                 const std::vector<unsigned> &BitList, Init *V);
   bool AddSubClass(Record *Rec, SubClassReference &SubClass);
   bool AddSubMultiClass(MultiClass *CurMC,
@@ -92,7 +90,7 @@ private:  // Parser methods.
   SubMultiClassReference ParseSubMultiClassReference(MultiClass *CurMC);
 
   Init *ParseIDValue(Record *CurRec);
-  Init *ParseIDValue(Record *CurRec, const std::string &Name, TGLoc NameLoc);
+  Init *ParseIDValue(Record *CurRec, const std::string &Name, SMLoc NameLoc);
   Init *ParseSimpleValue(Record *CurRec, RecTy *ItemType = 0);
   Init *ParseValue(Record *CurRec, RecTy *ItemType = 0);
   std::vector<Init*> ParseValueList(Record *CurRec, Record *ArgsRec = 0, RecTy *EltTy = 0);
diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp
index dbc4d33e816b..038cde2cb780 100644
--- a/utils/TableGen/TableGen.cpp
+++ b/utils/TableGen/TableGen.cpp
@@ -124,10 +124,10 @@ namespace {
 // FIXME: Eliminate globals from tblgen.
 RecordKeeper llvm::Records;
 
-static TGSourceMgr SrcMgr;
+static SourceMgr SrcMgr;
 
-void llvm::PrintError(TGLoc ErrorLoc, const std::string &Msg) {
-  SrcMgr.PrintError(ErrorLoc, Msg);
+void llvm::PrintError(SMLoc ErrorLoc, const std::string &Msg) {
+  SrcMgr.PrintMessage(ErrorLoc, Msg);
 }
 
 
@@ -136,7 +136,7 @@ void llvm::PrintError(TGLoc ErrorLoc, const std::string &Msg) {
 /// file.
 static bool ParseFile(const std::string &Filename,
                       const std::vector<std::string> &IncludeDirs,
-                      TGSourceMgr &SrcMgr) {
+                      SourceMgr &SrcMgr) {
   std::string ErrorStr;
   MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrorStr);
   if (F == 0) {
@@ -145,13 +145,13 @@ static bool ParseFile(const std::string &Filename,
   }
   
   // Tell SrcMgr about this buffer, which is what TGParser will pick up.
-  SrcMgr.AddNewSourceBuffer(F, TGLoc());
-  
-  TGParser Parser(SrcMgr);
+  SrcMgr.AddNewSourceBuffer(F, SMLoc());
 
   // Record the location of the include directory so that the lexer can find
   // it later.
-  Parser.setIncludeDirs(IncludeDirs);
+  SrcMgr.setIncludeDirs(IncludeDirs);
+  
+  TGParser Parser(SrcMgr);
 
   return Parser.ParseFile();
 }