441 files changed, 7854 insertions, 9909 deletions
diff --git a/Makefile.rules b/Makefile.rules
index 162fd23..98c72c0 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -123,9 +123,6 @@ $(BUILT_SOURCES) : $(ObjMakefiles)
 reconfigure:
 	$(Echo) Reconfiguring $(PROJ_OBJ_ROOT)
 	$(Verb) cd $(PROJ_OBJ_ROOT) && \
-	  if test -w $(PROJ_OBJ_ROOT)/config.cache ; then \
-	    $(RM) $(PROJ_OBJ_ROOT)/config.cache ; \
-	  fi ; \
 	  $(ConfigStatusScript) --recheck $(ConfigureScriptFLAGS) && \
 	  $(ConfigStatusScript)
 
@@ -133,9 +130,6 @@ reconfigure:
 $(ConfigStatusScript): $(ConfigureScript)
 	$(Echo) Reconfiguring with $<
 	$(Verb) cd $(PROJ_OBJ_ROOT) && \
-	  if test -w $(PROJ_OBJ_ROOT)/config.cache ; then \
-	    $(RM) $(PROJ_OBJ_ROOT)/config.cache ; \
-	  fi ; \
 	  $(ConfigStatusScript) --recheck $(ConfigureScriptFLAGS) && \
 	  $(ConfigStatusScript)
 
diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake
index e725684..d0427ac 100644
--- a/cmake/modules/HandleLLVMOptions.cmake
+++ b/cmake/modules/HandleLLVMOptions.cmake
@@ -36,13 +36,8 @@ if( LLVM_ENABLE_ASSERTIONS )
   # explicitly undefine it:
   if( uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE" )
     add_definitions( -UNDEBUG )
-    set(LLVM_BUILD_MODE "Release")
-  else()
-    set(LLVM_BUILD_MODE "Debug")
   endif()
-  set(LLVM_BUILD_MODE "${LLVM_BUILD_MODE}+Asserts")
 else()
-  set(LLVM_BUILD_MODE "Release")
   if( NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE" )
     if( NOT MSVC_IDE AND NOT XCODE )
       add_definitions( -DNDEBUG )
diff --git a/include/llvm/ADT/ArrayRef.h b/include/llvm/ADT/ArrayRef.h
index 97e42cb..a7e268b 100644
--- a/include/llvm/ADT/ArrayRef.h
+++ b/include/llvm/ADT/ArrayRef.h
@@ -125,6 +125,13 @@ namespace llvm {
     }
     
     /// @}
+    /// @name Conversion operators
+    /// @{
+    operator std::vector<T>() const {
+      return std::vector<T>(Data, Data+Length);
+    }
+    
+    /// @}
   };
   
   // ArrayRefs can be treated like a POD type.
diff --git a/include/llvm/ADT/StringMap.h b/include/llvm/ADT/StringMap.h
index 934cacc..95c973b 100644
--- a/include/llvm/ADT/StringMap.h
+++ b/include/llvm/ADT/StringMap.h
@@ -307,7 +307,7 @@ public:
     return ValueTy();
   }
 
-  ValueTy& operator[](StringRef Key) {
+  ValueTy &operator[](StringRef Key) {
     return GetOrCreateValue(Key).getValue();
   }
 
@@ -355,8 +355,7 @@ public:
   /// exists, return it.  Otherwise, default construct a value, insert it, and
   /// return.
   template <typename InitTy>
-  StringMapEntry<ValueTy> &GetOrCreateValue(StringRef Key,
-                                            InitTy Val) {
+  MapEntryTy &GetOrCreateValue(StringRef Key, InitTy Val) {
     unsigned BucketNo = LookupBucketFor(Key);
     ItemBucket &Bucket = TheTable[BucketNo];
     if (Bucket.Item && Bucket.Item != getTombstoneVal())
@@ -378,22 +377,10 @@ public:
     return *NewItem;
   }
 
-  StringMapEntry<ValueTy> &GetOrCreateValue(StringRef Key) {
+  MapEntryTy &GetOrCreateValue(StringRef Key) {
     return GetOrCreateValue(Key, ValueTy());
   }
 
-  template <typename InitTy>
-  StringMapEntry<ValueTy> &GetOrCreateValue(const char *KeyStart,
-                                            const char *KeyEnd,
-                                            InitTy Val) {
-    return GetOrCreateValue(StringRef(KeyStart, KeyEnd - KeyStart), Val);
-  }
-
-  StringMapEntry<ValueTy> &GetOrCreateValue(const char *KeyStart,
-                                            const char *KeyEnd) {
-    return GetOrCreateValue(StringRef(KeyStart, KeyEnd - KeyStart));
-  }
-
   /// remove - Remove the specified key/value pair from the map, but do not
   /// erase it.  This aborts if the key is not in the map.
   void remove(MapEntryTy *KeyValue) {
diff --git a/include/llvm/Analysis/IVUsers.h b/include/llvm/Analysis/IVUsers.h
index 1b78fe4..e56d24d 100644
--- a/include/llvm/Analysis/IVUsers.h
+++ b/include/llvm/Analysis/IVUsers.h
@@ -37,8 +37,8 @@ class TargetData;
 class IVStrideUse : public CallbackVH, public ilist_node<IVStrideUse> {
   friend class IVUsers;
 public:
-  IVStrideUse(IVUsers *P, Instruction* U, Value *O, Value *PN)
-    : CallbackVH(U), Parent(P), OperandValToReplace(O), Phi(PN) {
+  IVStrideUse(IVUsers *P, Instruction* U, Value *O)
+    : CallbackVH(U), Parent(P), OperandValToReplace(O) {
   }
 
   /// getUser - Return the user instruction for this use.
@@ -51,11 +51,6 @@ public:
     setValPtr(NewUser);
   }
 
-  /// getPhi - Return the phi node that represents this IV.
-  PHINode *getPhi() const {
-    return cast<PHINode>(Phi);
-  }
-
   /// getOperandValToReplace - Return the Value of the operand in the user
   /// instruction that this IVStrideUse is representing.
   Value *getOperandValToReplace() const {
@@ -86,9 +81,6 @@ private:
   /// that this IVStrideUse is representing.
   WeakVH OperandValToReplace;
 
-  /// Phi - The loop header phi that represents this IV.
-  WeakVH Phi;
-
   /// PostIncLoops - The set of loops for which Expr has been adjusted to
   /// use post-inc mode. This corresponds with SCEVExpander's post-inc concept.
   PostIncLoopSet PostIncLoops;
@@ -151,9 +143,9 @@ public:
   /// AddUsersIfInteresting - Inspect the specified Instruction.  If it is a
   /// reducible SCEV, recursively add its users to the IVUsesByStride set and
   /// return true.  Otherwise, return false.
-  bool AddUsersIfInteresting(Instruction *I, PHINode *Phi);
+  bool AddUsersIfInteresting(Instruction *I);
 
-  IVStrideUse &AddUser(Instruction *User, Value *Operand, PHINode *Phi);
+  IVStrideUse &AddUser(Instruction *User, Value *Operand);
 
   /// getReplacementExpr - Return a SCEV expression which computes the
   /// value of the OperandValToReplace of the given IVStrideUse.
diff --git a/include/llvm/Assembly/Writer.h b/include/llvm/Assembly/Writer.h
index c5b2390..78c27f0 100644
--- a/include/llvm/Assembly/Writer.h
+++ b/include/llvm/Assembly/Writer.h
@@ -17,46 +17,12 @@
 #ifndef LLVM_ASSEMBLY_WRITER_H
 #define LLVM_ASSEMBLY_WRITER_H
 
-#include <string>
-
 namespace llvm {
 
 class Type;
 class Module;
 class Value;
 class raw_ostream;
-template <typename T> class SmallVectorImpl;
-  
-/// TypePrinting - Type printing machinery.
-class TypePrinting {
-  void *TypeNames;  // A map to remember type names.
-  TypePrinting(const TypePrinting &);   // DO NOT IMPLEMENT
-  void operator=(const TypePrinting&);  // DO NOT IMPLEMENT
-public:
-  TypePrinting();
-  ~TypePrinting();
-  
-  void clear();
-  
-  void print(const Type *Ty, raw_ostream &OS, bool IgnoreTopLevelName = false);
-  
-  void printAtLeastOneLevel(const Type *Ty, raw_ostream &OS) {
-    print(Ty, OS, true);
-  }
-  
-  /// hasTypeName - Return true if the type has a name in TypeNames, false
-  /// otherwise.
-  bool hasTypeName(const Type *Ty) const;
-  
-  /// addTypeName - Add a name for the specified type if it doesn't already have
-  /// one.  This name will be printed instead of the structural version of the
-  /// type in order to make the output more concise.
-  void addTypeName(const Type *Ty, const std::string &N);
-  
-private:
-  void CalcTypeName(const Type *Ty, SmallVectorImpl<const Type *> &TypeStack,
-                    raw_ostream &OS, bool IgnoreTopLevelName = false);
-};
 
 // WriteTypeSymbolic - This attempts to write the specified type as a symbolic
 // type, if there is an entry in the Module's symbol table for the specified
diff --git a/include/llvm/BasicBlock.h b/include/llvm/BasicBlock.h
index 7e7c9e7..b02c249 100644
--- a/include/llvm/BasicBlock.h
+++ b/include/llvm/BasicBlock.h
@@ -110,7 +110,7 @@ public:
         Function *getParent()       { return Parent; }
 
   /// use_back - Specialize the methods defined in Value, as we know that an
-  /// BasicBlock can only be used by Users (specifically PHI nodes, terminators,
+  /// BasicBlock can only be used by Users (specifically terminators
   /// and BlockAddress's).
   User       *use_back()       { return cast<User>(*use_begin());}
   const User *use_back() const { return cast<User>(*use_begin());}
@@ -248,6 +248,10 @@ public:
   /// other than direct branches, switches, etc. to it.
   bool hasAddressTaken() const { return getSubclassDataFromValue() != 0; }
 
+  /// replaceSuccessorsPhiUsesWith - Update all phi nodes in all our successors
+  /// to refer to basic block New instead of to us.
+  void replaceSuccessorsPhiUsesWith(BasicBlock *New);
+
 private:
   /// AdjustBlockAddressRefCount - BasicBlock stores the number of BlockAddress
   /// objects using it.  This is almost always 0, sometimes one, possibly but
diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h
index 7692bd2..d3fee54 100644
--- a/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/include/llvm/Bitcode/LLVMBitCodes.h
@@ -112,20 +112,16 @@ namespace bitc {
 
   enum MetadataCodes {
     METADATA_STRING        = 1,   // MDSTRING:      [values]
-    // FIXME: Remove NODE in favor of NODE2 in LLVM 3.0
-    METADATA_NODE          = 2,   // NODE with potentially invalid metadata
-    // FIXME: Remove FN_NODE in favor of FN_NODE2 in LLVM 3.0
-    METADATA_FN_NODE       = 3,   // FN_NODE with potentially invalid metadata
+    // 2 is unused.
+    // 3 is unused.
     METADATA_NAME          = 4,   // STRING:        [values]
-    // FIXME: Remove NAMED_NODE in favor of NAMED_NODE2 in LLVM 3.0
-    METADATA_NAMED_NODE    = 5,   // NAMED_NODE with potentially invalid metadata
+    // 5 is unused.
     METADATA_KIND          = 6,   // [n x [id, name]]
-    // FIXME: Remove ATTACHMENT in favor of ATTACHMENT2 in LLVM 3.0
-    METADATA_ATTACHMENT    = 7,   // ATTACHMENT with potentially invalid metadata
-    METADATA_NODE2         = 8,   // NODE2:         [n x (type num, value num)]
-    METADATA_FN_NODE2      = 9,   // FN_NODE2:      [n x (type num, value num)]
-    METADATA_NAMED_NODE2   = 10,  // NAMED_NODE2:   [n x mdnodes]
-    METADATA_ATTACHMENT2   = 11   // [m x [value, [n x [id, mdnode]]]
+    // 7 is unused.
+    METADATA_NODE          = 8,   // NODE:          [n x (type num, value num)]
+    METADATA_FN_NODE       = 9,   // FN_NODE:       [n x (type num, value num)]
+    METADATA_NAMED_NODE    = 10,  // NAMED_NODE:    [n x mdnodes]
+    METADATA_ATTACHMENT    = 11   // [m x [value, [n x [id, mdnode]]]
   };
   // The constants block (CONSTANTS_BLOCK_ID) describes emission for each
   // constant and maintains an implicit current type value.
@@ -227,21 +223,18 @@ namespace bitc {
     FUNC_CODE_INST_UNREACHABLE = 15, // UNREACHABLE
 
     FUNC_CODE_INST_PHI         = 16, // PHI:        [ty, val0,bb0, ...]
-    FUNC_CODE_INST_MALLOC      = 17, // MALLOC:     [instty, op, align]
-    FUNC_CODE_INST_FREE        = 18, // FREE:       [opty, op]
+    // 17 is unused.
+    // 18 is unused.
     FUNC_CODE_INST_ALLOCA      = 19, // ALLOCA:     [instty, op, align]
     FUNC_CODE_INST_LOAD        = 20, // LOAD:       [opty, op, align, vol]
-    // FIXME: Remove STORE in favor of STORE2 in LLVM 3.0
-    FUNC_CODE_INST_STORE       = 21, // STORE:      [valty,val,ptr, align, vol]
-    // FIXME: Remove CALL in favor of CALL2 in LLVM 3.0
-    FUNC_CODE_INST_CALL        = 22, // CALL with potentially invalid metadata
+    // 21 is unused.
+    // 22 is unused.
     FUNC_CODE_INST_VAARG       = 23, // VAARG:      [valistty, valist, instty]
     // This store code encodes the pointer type, rather than the value type
     // this is so information only available in the pointer type (e.g. address
     // spaces) is retained.
-    FUNC_CODE_INST_STORE2      = 24, // STORE:      [ptrty,ptr,val, align, vol]
-    // FIXME: Remove GETRESULT in favor of EXTRACTVAL in LLVM 3.0
-    FUNC_CODE_INST_GETRESULT   = 25, // GETRESULT:  [ty, opval, n]
+    FUNC_CODE_INST_STORE       = 24, // STORE:      [ptrty,ptr,val, align, vol]
+    // 25 is unused.
     FUNC_CODE_INST_EXTRACTVAL  = 26, // EXTRACTVAL: [n x operands]
     FUNC_CODE_INST_INSERTVAL   = 27, // INSERTVAL:  [n x operands]
     // fcmp/icmp returning Int1TY or vector of Int1Ty. Same as CMP, exists to
@@ -251,14 +244,12 @@ namespace bitc {
     FUNC_CODE_INST_VSELECT     = 29, // VSELECT:    [ty,opval,opval,predty,pred]
     FUNC_CODE_INST_INBOUNDS_GEP= 30, // INBOUNDS_GEP: [n x operands]
     FUNC_CODE_INST_INDIRECTBR  = 31, // INDIRECTBR: [opty, op0, op1, ...]
-    
-    // FIXME: Remove DEBUG_LOC in favor of DEBUG_LOC2 in LLVM 3.0
-    FUNC_CODE_DEBUG_LOC        = 32, // DEBUG_LOC with potentially invalid metadata
+    // 32 is unused.
     FUNC_CODE_DEBUG_LOC_AGAIN  = 33, // DEBUG_LOC_AGAIN
 
-    FUNC_CODE_INST_CALL2       = 34, // CALL2:      [attr, fnty, fnid, args...]
+    FUNC_CODE_INST_CALL        = 34, // CALL:       [attr, fnty, fnid, args...]
 
-    FUNC_CODE_DEBUG_LOC2       = 35  // DEBUG_LOC2: [Line,Col,ScopeVal, IAVal]
+    FUNC_CODE_DEBUG_LOC        = 35  // DEBUG_LOC:  [Line,Col,ScopeVal, IAVal]
   };
 } // End bitc namespace
 } // End llvm namespace
diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h
index 5eea099..06c5c83 100644
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@@ -465,8 +465,8 @@ namespace llvm {
     void EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
                             const MachineBasicBlock *MBB,
                             unsigned uid) const;
-    void EmitLLVMUsedList(Constant *List);
-    void EmitXXStructorList(Constant *List);
+    void EmitLLVMUsedList(const Constant *List);
+    void EmitXXStructorList(const Constant *List);
     GCMetadataPrinter *GetOrCreateGCPrinter(GCStrategy *C);
   };
 }
diff --git a/include/llvm/CodeGen/RegAllocPBQP.h b/include/llvm/CodeGen/RegAllocPBQP.h
index 8139c65..bce3ec7 100644
--- a/include/llvm/CodeGen/RegAllocPBQP.h
+++ b/include/llvm/CodeGen/RegAllocPBQP.h
@@ -161,7 +161,8 @@ namespace llvm {
                             PBQP::PBQPNum benefit);
   };
 
-  FunctionPass* createPBQPRegisterAllocator(std::auto_ptr<PBQPBuilder> builder);
+  FunctionPass* createPBQPRegisterAllocator(std::auto_ptr<PBQPBuilder> builder,
+                                            char *customPassID=0);
 }
 
 #endif /* LLVM_CODEGEN_REGALLOCPBQP_H */
diff --git a/include/llvm/CodeGen/RuntimeLibcalls.h b/include/llvm/CodeGen/RuntimeLibcalls.h
index 576be82..44d9477 100644
--- a/include/llvm/CodeGen/RuntimeLibcalls.h
+++ b/include/llvm/CodeGen/RuntimeLibcalls.h
@@ -22,7 +22,7 @@ namespace RTLIB {
   /// RTLIB::Libcall enum - This enum defines all of the runtime library calls
   /// the backend can emit.  The various long double types cannot be merged,
   /// because 80-bit library functions use "xf" and 128-bit use "tf".
-  /// 
+  ///
   /// When adding PPCF128 functions here, note that their names generally need
   /// to be overridden for Darwin with the xxx$LDBL128 form.  See
   /// PPCISelLowering.cpp.
@@ -46,6 +46,9 @@ namespace RTLIB {
     MUL_I32,
     MUL_I64,
     MUL_I128,
+    MULO_I32,
+    MULO_I64,
+    MULO_I128,
     SDIV_I8,
     SDIV_I16,
     SDIV_I32,
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index 1c42bef..3ec5ada 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -96,8 +96,12 @@ public:
     return DbgValues.empty() && ByvalParmDbgValues.empty();
   }
 
-  SmallVector<SDDbgValue*,2> &getSDDbgValues(const SDNode *Node) {
-    return DbgValMap[Node];
+  ArrayRef<SDDbgValue*> getSDDbgValues(const SDNode *Node) {
+    DenseMap<const SDNode*, SmallVector<SDDbgValue*, 2> >::iterator I =
+      DbgValMap.find(Node);
+    if (I != DbgValMap.end())
+      return I->second;
+    return ArrayRef<SDDbgValue*>();
   }
 
   typedef SmallVector<SDDbgValue*,32>::iterator DbgIterator;
@@ -898,7 +902,7 @@ public:
   void AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter);
 
   /// GetDbgValues - Get the debug values which reference the given SDNode.
-  SmallVector<SDDbgValue*,2> &GetDbgValues(const SDNode* SD) {
+  ArrayRef<SDDbgValue*> GetDbgValues(const SDNode* SD) {
     return DbgInfo->getSDDbgValues(SD);
   }
 
diff --git a/include/llvm/Constants.h b/include/llvm/Constants.h
index eabc3a5..ff59951 100644
--- a/include/llvm/Constants.h
+++ b/include/llvm/Constants.h
@@ -350,9 +350,7 @@ protected:
   ConstantArray(const ArrayType *T, const std::vector<Constant*> &Val);
 public:
   // ConstantArray accessors
-  static Constant *get(const ArrayType *T, const std::vector<Constant*> &V);
-  static Constant *get(const ArrayType *T, Constant *const *Vals, 
-                       unsigned NumVals);
+  static Constant *get(const ArrayType *T, ArrayRef<Constant*> V);
                              
   /// This method constructs a ConstantArray and initializes it with a text
   /// string. The default behavior (AddNull==true) causes a null terminator to
@@ -422,14 +420,29 @@ protected:
   ConstantStruct(const StructType *T, const std::vector<Constant*> &Val);
 public:
   // ConstantStruct accessors
-  static Constant *get(const StructType *T, const std::vector<Constant*> &V);
-  static Constant *get(LLVMContext &Context, 
-                       const std::vector<Constant*> &V, bool Packed);
-  static Constant *get(LLVMContext &Context,
-                       Constant *const *Vals, unsigned NumVals, bool Packed);
-  static Constant *get(LLVMContext &Context, bool Packed,
-                       Constant * Val, ...) END_WITH_NULL;
-
+  static Constant *get(const StructType *T, ArrayRef<Constant*> V);
+  static Constant *get(const StructType *T, ...) END_WITH_NULL;
+
+  /// getAnon - Return an anonymous struct that has the specified
+  /// elements.  If the struct is possibly empty, then you must specify a
+  /// context.
+  static Constant *getAnon(ArrayRef<Constant*> V, bool Packed = false) {
+    return get(getTypeForElements(V, Packed), V);
+  }
+  static Constant *getAnon(LLVMContext &Ctx, 
+                           ArrayRef<Constant*> V, bool Packed = false) {
+    return get(getTypeForElements(Ctx, V, Packed), V);
+  }
+
+  /// getTypeForElements - Return an anonymous struct type to use for a constant
+  /// with the specified set of elements.  The list must not be empty.
+  static StructType *getTypeForElements(ArrayRef<Constant*> V,
+                                        bool Packed = false);
+  /// getTypeForElements - This version of the method allows an empty list.
+  static StructType *getTypeForElements(LLVMContext &Ctx,
+                                        ArrayRef<Constant*> V,
+                                        bool Packed = false);
+  
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
 
@@ -476,8 +489,6 @@ protected:
 public:
   // ConstantVector accessors
   static Constant *get(ArrayRef<Constant*> V);
-  // FIXME: Eliminate this constructor form.
-  static Constant *get(const VectorType *T, const std::vector<Constant*> &V);
   
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
diff --git a/include/llvm/DerivedTypes.h b/include/llvm/DerivedTypes.h
index 9e79339..fe9f5f8 100644
--- a/include/llvm/DerivedTypes.h
+++ b/include/llvm/DerivedTypes.h
@@ -205,10 +205,10 @@ public:
   /// getTypeAtIndex - Given an index value into the type, return the type of
   /// the element.
   ///
-  virtual const Type *getTypeAtIndex(const Value *V) const = 0;
-  virtual const Type *getTypeAtIndex(unsigned Idx) const = 0;
-  virtual bool indexValid(const Value *V) const = 0;
-  virtual bool indexValid(unsigned Idx) const = 0;
+  const Type *getTypeAtIndex(const Value *V) const;
+  const Type *getTypeAtIndex(unsigned Idx) const;
+  bool indexValid(const Value *V) const;
+  bool indexValid(unsigned Idx) const;
 
   // Methods for support type inquiry through isa, cast, and dyn_cast.
   static inline bool classof(const CompositeType *) { return true; }
@@ -232,19 +232,18 @@ public:
   /// StructType::get - This static method is the primary way to create a
   /// StructType.
   ///
-  static StructType *get(LLVMContext &Context, ArrayRef<const Type*> Params,
+  static StructType *get(LLVMContext &Context, ArrayRef<const Type*> Elements,
                          bool isPacked = false);
 
   /// StructType::get - Create an empty structure type.
   ///
-  static StructType *get(LLVMContext &Context, bool isPacked=false);
+  static StructType *get(LLVMContext &Context, bool isPacked = false);
   
-  /// StructType::get - This static method is a convenience method for
-  /// creating structure types by specifying the elements as arguments.
-  /// Note that this method always returns a non-packed struct.  To get
-  /// an empty struct, pass NULL, NULL.
-  static StructType *get(LLVMContext &Context, 
-                         const Type *type, ...) END_WITH_NULL;
+  /// StructType::get - This static method is a convenience method for creating
+  /// structure types by specifying the elements as arguments.  Note that this
+  /// method always returns a non-packed struct, and requires at least one
+  /// element type.
+  static StructType *get(const Type *elt1, ...) END_WITH_NULL;
 
   /// isValidElementType - Return true if the specified type is valid as a
   /// element type.
@@ -257,6 +256,13 @@ public:
   element_iterator element_begin() const { return ContainedTys; }
   element_iterator element_end() const { return &ContainedTys[NumContainedTys];}
 
+  /// isLayoutIdentical - Return true if this is layout identical to the
+  /// specified struct.
+  bool isLayoutIdentical(const StructType *Other) const {
+    return this == Other;
+  }
+  
+  
   // Random access to the elements
   unsigned getNumElements() const { return NumContainedTys; }
   const Type *getElementType(unsigned N) const {
@@ -264,14 +270,6 @@ public:
     return ContainedTys[N];
   }
 
-  /// getTypeAtIndex - Given an index value into the type, return the type of
-  /// the element.  For a structure type, this must be a constant value...
-  ///
-  virtual const Type *getTypeAtIndex(const Value *V) const;
-  virtual const Type *getTypeAtIndex(unsigned Idx) const;
-  virtual bool indexValid(const Value *V) const;
-  virtual bool indexValid(unsigned Idx) const;
-
   // Implement the AbstractTypeUser interface.
   virtual void refineAbstractType(const DerivedType *OldTy, const Type *NewTy);
   virtual void typeBecameConcrete(const DerivedType *AbsTy);
@@ -306,22 +304,7 @@ protected:
   }
 
 public:
-  inline const Type *getElementType() const { return ContainedTys[0]; }
-
-  virtual bool indexValid(const Value *V) const;
-  virtual bool indexValid(unsigned) const {
-    return true;
-  }
-
-  /// getTypeAtIndex - Given an index value into the type, return the type of
-  /// the element.  For sequential types, there is only one subtype...
-  ///
-  virtual const Type *getTypeAtIndex(const Value *) const {
-    return ContainedTys[0];
-  }
-  virtual const Type *getTypeAtIndex(unsigned) const {
-    return ContainedTys[0];
-  }
+  const Type *getElementType() const { return ContainedTys[0]; }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast.
   static inline bool classof(const SequentialType *) { return true; }
diff --git a/include/llvm/GlobalVariable.h b/include/llvm/GlobalVariable.h
index 442e0c0..0fe8993 100644
--- a/include/llvm/GlobalVariable.h
+++ b/include/llvm/GlobalVariable.h
@@ -119,7 +119,7 @@ public:
   /// illegal to call this method if the global is external, because we cannot
   /// tell what the value is initialized to!
   ///
-  inline /*const FIXME*/ Constant *getInitializer() const {
+  inline const Constant *getInitializer() const {
     assert(hasInitializer() && "GV doesn't have initializer!");
     return static_cast<Constant*>(Op<0>().get());
   }
diff --git a/include/llvm/InlineAsm.h b/include/llvm/InlineAsm.h
index ed8f0f7..0d86086 100644
--- a/include/llvm/InlineAsm.h
+++ b/include/llvm/InlineAsm.h
@@ -25,15 +25,16 @@ class PointerType;
 class FunctionType;
 class Module;
 struct InlineAsmKeyType;
-template<class ValType, class TypeClass, class ConstantClass, bool HasLargeKey>
+template<class ValType, class ValRefType, class TypeClass, class ConstantClass,
+         bool HasLargeKey>
 class ConstantUniqueMap;
 template<class ConstantClass, class TypeClass, class ValType>
 struct ConstantCreator;
 
 class InlineAsm : public Value {
   friend struct ConstantCreator<InlineAsm, PointerType, InlineAsmKeyType>;
-  friend class ConstantUniqueMap<InlineAsmKeyType, PointerType, InlineAsm,
-                                 false>;
+  friend class ConstantUniqueMap<InlineAsmKeyType, const InlineAsmKeyType&,
+                                 PointerType, InlineAsm, false>;
 
   InlineAsm(const InlineAsm &);             // do not implement
   void operator=(const InlineAsm&);         // do not implement
diff --git a/include/llvm/Instructions.h b/include/llvm/Instructions.h
index 54dfe39..cc38010 100644
--- a/include/llvm/Instructions.h
+++ b/include/llvm/Instructions.h
@@ -1814,7 +1814,7 @@ class PHINode : public Instruction {
   explicit PHINode(const Type *Ty, unsigned NumReservedValues,
                    const Twine &NameStr = "", Instruction *InsertBefore = 0)
     : Instruction(Ty, Instruction::PHI, 0, 0, InsertBefore),
-      ReservedSpace(NumReservedValues * 2) {
+      ReservedSpace(NumReservedValues) {
     setName(NameStr);
     OperandList = allocHungoffUses(ReservedSpace);
   }
@@ -1822,11 +1822,16 @@ class PHINode : public Instruction {
   PHINode(const Type *Ty, unsigned NumReservedValues, const Twine &NameStr,
           BasicBlock *InsertAtEnd)
     : Instruction(Ty, Instruction::PHI, 0, 0, InsertAtEnd),
-      ReservedSpace(NumReservedValues * 2) {
+      ReservedSpace(NumReservedValues) {
     setName(NameStr);
     OperandList = allocHungoffUses(ReservedSpace);
   }
 protected:
+  // allocHungoffUses - this is more complicated than the generic
+  // User::allocHungoffUses, because we have to allocate Uses for the incoming
+  // values and pointers to the incoming blocks, all in one allocation.
+  Use *allocHungoffUses(unsigned) const;
+
   virtual PHINode *clone_impl() const;
 public:
   /// Constructors - NumReservedValues is a hint for the number of incoming
@@ -1845,32 +1850,55 @@ public:
   /// Provide fast operand accessors
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
+  // Block iterator interface. This provides access to the list of incoming
+  // basic blocks, which parallels the list of incoming values.
+
+  typedef BasicBlock **block_iterator;
+  typedef BasicBlock * const *const_block_iterator;
+
+  block_iterator block_begin() {
+    Use::UserRef *ref =
+      reinterpret_cast<Use::UserRef*>(op_begin() + ReservedSpace);
+    return reinterpret_cast<block_iterator>(ref + 1);
+  }
+
+  const_block_iterator block_begin() const {
+    const Use::UserRef *ref =
+      reinterpret_cast<const Use::UserRef*>(op_begin() + ReservedSpace);
+    return reinterpret_cast<const_block_iterator>(ref + 1);
+  }
+
+  block_iterator block_end() {
+    return block_begin() + getNumOperands();
+  }
+
+  const_block_iterator block_end() const {
+    return block_begin() + getNumOperands();
+  }
+
   /// getNumIncomingValues - Return the number of incoming edges
   ///
-  unsigned getNumIncomingValues() const { return getNumOperands()/2; }
+  unsigned getNumIncomingValues() const { return getNumOperands(); }
 
   /// getIncomingValue - Return incoming value number x
   ///
   Value *getIncomingValue(unsigned i) const {
-    assert(i*2 < getNumOperands() && "Invalid value number!");
-    return getOperand(i*2);
+    return getOperand(i);
   }
   void setIncomingValue(unsigned i, Value *V) {
-    assert(i*2 < getNumOperands() && "Invalid value number!");
-    setOperand(i*2, V);
+    setOperand(i, V);
   }
   static unsigned getOperandNumForIncomingValue(unsigned i) {
-    return i*2;
+    return i;
   }
   static unsigned getIncomingValueNumForOperand(unsigned i) {
-    assert(i % 2 == 0 && "Invalid incoming-value operand index!");
-    return i/2;
+    return i;
   }
 
   /// getIncomingBlock - Return incoming basic block number @p i.
   ///
   BasicBlock *getIncomingBlock(unsigned i) const {
-    return cast<BasicBlock>(getOperand(i*2+1));
+    return block_begin()[i];
   }
 
   /// getIncomingBlock - Return incoming basic block corresponding
@@ -1878,7 +1906,7 @@ public:
   ///
   BasicBlock *getIncomingBlock(const Use &U) const {
     assert(this == U.getUser() && "Iterator doesn't point to PHI's Uses?");
-    return cast<BasicBlock>((&U + 1)->get());
+    return getIncomingBlock(&U - op_begin());
   }
 
   /// getIncomingBlock - Return incoming basic block corresponding
@@ -1889,16 +1917,8 @@ public:
     return getIncomingBlock(I.getUse());
   }
 
-
   void setIncomingBlock(unsigned i, BasicBlock *BB) {
-    setOperand(i*2+1, (Value*)BB);
-  }
-  static unsigned getOperandNumForIncomingBlock(unsigned i) {
-    return i*2+1;
-  }
-  static unsigned getIncomingBlockNumForOperand(unsigned i) {
-    assert(i % 2 == 1 && "Invalid incoming-block operand index!");
-    return i/2;
+    block_begin()[i] = BB;
   }
 
   /// addIncoming - Add an incoming value to the end of the PHI list
@@ -1908,13 +1928,12 @@ public:
     assert(BB && "PHI node got a null basic block!");
     assert(getType() == V->getType() &&
            "All operands to PHI node must be the same type as the PHI node!");
-    unsigned OpNo = NumOperands;
-    if (OpNo+2 > ReservedSpace)
+    if (NumOperands == ReservedSpace)
       growOperands();  // Get more space!
     // Initialize some new operands.
-    NumOperands = OpNo+2;
-    OperandList[OpNo] = V;
-    OperandList[OpNo+1] = (Value*)BB;
+    ++NumOperands;
+    setIncomingValue(NumOperands - 1, V);
+    setIncomingBlock(NumOperands - 1, BB);
   }
 
   /// removeIncomingValue - Remove an incoming value.  This is useful if a
@@ -1937,14 +1956,16 @@ public:
   /// block in the value list for this PHI.  Returns -1 if no instance.
   ///
   int getBasicBlockIndex(const BasicBlock *BB) const {
-    Use *OL = OperandList;
-    for (unsigned i = 0, e = getNumOperands(); i != e; i += 2)
-      if (OL[i+1].get() == (const Value*)BB) return i/2;
+    for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+      if (block_begin()[i] == BB)
+        return i;
     return -1;
   }
 
   Value *getIncomingValueForBlock(const BasicBlock *BB) const {
-    return getIncomingValue(getBasicBlockIndex(BB));
+    int Idx = getBasicBlockIndex(BB);
+    assert(Idx >= 0 && "Invalid basic block argument!");
+    return getIncomingValue(Idx);
   }
 
   /// hasConstantValue - If the specified PHI node always merges together the
diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td
index c1fbce4..1439624 100644
--- a/include/llvm/Intrinsics.td
+++ b/include/llvm/Intrinsics.td
@@ -312,7 +312,7 @@ let Properties = [IntrNoMem] in {
   def int_eh_sjlj_lsda    : Intrinsic<[llvm_ptr_ty]>;
   def int_eh_sjlj_callsite: Intrinsic<[], [llvm_i32_ty]>;
 }
-def int_eh_sjlj_dispatch_setup : Intrinsic<[], [llvm_i32_ty], [IntrReadMem]>;
+def int_eh_sjlj_dispatch_setup : Intrinsic<[], [llvm_i32_ty]>;
 def int_eh_sjlj_setjmp  : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>;
 def int_eh_sjlj_longjmp : Intrinsic<[], [llvm_ptr_ty]>;
 
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index c05a925..69be46b 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -547,6 +547,9 @@ namespace llvm {
   ///
   /// \param ShowInst - Whether to show the MCInst representation inline with
   /// the assembly.
+  ///
+  /// \param DecodeLSDA - If true, emit comments that translates the LSDA into a
+  /// human readable format. Only usable with CFI.
   MCStreamer *createAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
                                 bool isVerboseAsm,
                                 bool useLoc,
diff --git a/include/llvm/Support/CFG.h b/include/llvm/Support/CFG.h
index d2ea123..7e193ff 100644
--- a/include/llvm/Support/CFG.h
+++ b/include/llvm/Support/CFG.h
@@ -33,7 +33,7 @@ class PredIterator : public std::iterator<std::forward_iterator_tag,
   USE_iterator It;
 
   inline void advancePastNonTerminators() {
-    // Loop to ignore non terminator uses (for example PHI nodes).
+    // Loop to ignore non terminator uses (for example BlockAddresses).
     while (!It.atEnd() && !isa<TerminatorInst>(*It))
       ++It;
   }
diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td
index 616087c..bc8be89 100644
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@@ -133,11 +133,21 @@ class RegisterClass<string namespace, list<ValueType> regTypes, int alignment,
   // model instruction operand constraints, and should have isAllocatable = 0.
   bit isAllocatable = 1;
 
-  // MethodProtos/MethodBodies - These members can be used to insert arbitrary
-  // code into a generated register class.   The normal usage of this is to
-  // overload virtual methods.
-  code MethodProtos = [{}];
-  code MethodBodies = [{}];
+  // AltOrders - List of alternative allocation orders. The default order is
+  // MemberList itself, and that is good enough for most targets since the
+  // register allocators automatically remove reserved registers and move
+  // callee-saved registers to the end.
+  list<dag> AltOrders = [];
+
+  // AltOrderSelect - The body of a function that selects the allocation order
+  // to use in a given machine function. The code will be inserted in a
+  // function like this:
+  //
+  //   static inline unsigned f(const MachineFunction &MF) { ... }
+  //
+  // The function should return 0 to select the default order defined by
+  // MemberList, 1 to select the first AltOrders entry and so on.
+  code AltOrderSelect = [{}];
 }
 
 // The memberList in a RegisterClass is a dag of set operations. TableGen
@@ -174,6 +184,43 @@ class RegisterClass<string namespace, list<ValueType> regTypes, int alignment,
 def sequence;
 def decimate;
 
+// RegisterTuples - Automatically generate super-registers by forming tuples of
+// sub-registers. This is useful for modeling register sequence constraints
+// with pseudo-registers that are larger than the architectural registers.
+//
+// The sub-register lists are zipped together:
+//
+//   def EvenOdd : RegisterTuples<[sube, subo], [(add R0, R2), (add R1, R3)]>;
+//
+// Generates the same registers as:
+//
+//   let SubRegIndices = [sube, subo] in {
+//     def R0_R1 : RegisterWithSubRegs<"", [R0, R1]>;
+//     def R2_R3 : RegisterWithSubRegs<"", [R2, R3]>;
+//   }
+//
+// The generated pseudo-registers inherit super-classes and fields from their
+// first sub-register. Most fields from the Register class are inferred, and
+// the AsmName and Dwarf numbers are cleared.
+//
+// RegisterTuples instances can be used in other set operations to form
+// register classes and so on. This is the only way of using the generated
+// registers.
+class RegisterTuples<list<SubRegIndex> Indices, list<dag> Regs> {
+  // SubRegs - N lists of registers to be zipped up. Super-registers are
+  // synthesized from the first element of each SubRegs list, the second
+  // element and so on.
+  list<dag> SubRegs = Regs;
+
+  // SubRegIndices - N SubRegIndex instances. This provides the names of the
+  // sub-registers in the synthesized super-registers.
+  list<SubRegIndex> SubRegIndices = Indices;
+
+  // Compose sub-register indices like in a normal Register.
+  list<dag> CompositeIndices = [];
+}
+
+
 //===----------------------------------------------------------------------===//
 // DwarfRegNum - This class provides a mapping of the llvm register enumeration
 // to the register numbering used by gcc and gdb.  These values are used by a
diff --git a/include/llvm/Target/TargetAsmInfo.h b/include/llvm/Target/TargetAsmInfo.h
index 743a2d4..1dc725a 100644
--- a/include/llvm/Target/TargetAsmInfo.h
+++ b/include/llvm/Target/TargetAsmInfo.h
@@ -59,6 +59,10 @@ public:
     return TLOF->getEHFrameSection();
   }
 
+  const MCSection *getCompactUnwindSection() const {
+    return TLOF->getCompactUnwindSection();
+  }
+
   const MCSection *getDwarfFrameSection() const {
     return TLOF->getDwarfFrameSection();
   }
@@ -79,6 +83,10 @@ public:
     return TLOF->isFunctionEHFrameSymbolPrivate();
   }
 
+  bool getSupportsCompactUnwindInfo() const {
+    return TLOF->getSupportsCompactUnwindInfo();
+  }
+
   const unsigned *getCalleeSavedRegs(MachineFunction *MF = 0) const {
     return TRI->getCalleeSavedRegs(MF);
   }
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 093e79b..c3f5f2b 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -1836,9 +1836,8 @@ private:
 
         // Build a new vector type and check if it is legal.
         MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts);
-
         // Found a legal promoted vector type.
-        if (ValueTypeActions.getTypeAction(NVT) == TypeLegal)
+        if (NVT != MVT() && ValueTypeActions.getTypeAction(NVT) == TypeLegal)
           return LegalizeKind(TypePromoteInteger,
                               EVT::getVectorVT(Context, EltVT, NumElts));
       }
diff --git a/include/llvm/Target/TargetLoweringObjectFile.h b/include/llvm/Target/TargetLoweringObjectFile.h
index 3991035..a3d1f37 100644
--- a/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/include/llvm/Target/TargetLoweringObjectFile.h
@@ -68,6 +68,11 @@ protected:
   /// LSDASection - If exception handling is supported by the target, this is
   /// the section the Language Specific Data Area information is emitted to.
   const MCSection *LSDASection;
+
+  /// CompactUnwindSection - If exception handling is supported by the target
+  /// and the target can support a compact representation of the CIE and FDE,
+  /// this is the section to emit them into.
+  const MCSection *CompactUnwindSection;
   
   // Dwarf sections for debug info.  If a target supports debug info, these must
   // be set.
@@ -102,8 +107,12 @@ protected:
   /// private linkage, aka an L or .L label) or false if it should be a normal
   /// non-.globl label.  This defaults to true.
   bool IsFunctionEHFrameSymbolPrivate;
+
+  /// SupportsCompactUnwindInfo - This flag is set to true if the CIE and FDE
+  /// information should be emitted in a compact form.
+  bool SupportsCompactUnwindInfo;
+
 public:
-  
   MCContext &getContext() const { return *Ctx; }
   
   virtual ~TargetLoweringObjectFile();
@@ -121,10 +130,12 @@ public:
   bool getSupportsWeakOmittedEHFrame() const {
     return SupportsWeakOmittedEHFrame;
   }
-  
   bool getCommDirectiveSupportsAlignment() const {
     return CommDirectiveSupportsAlignment;
   }
+  bool getSupportsCompactUnwindInfo() const {
+    return SupportsCompactUnwindInfo;
+  }
 
   const MCSection *getTextSection() const { return TextSection; }
   const MCSection *getDataSection() const { return DataSection; }
@@ -132,6 +143,7 @@ public:
   const MCSection *getStaticCtorSection() const { return StaticCtorSection; }
   const MCSection *getStaticDtorSection() const { return StaticDtorSection; }
   const MCSection *getLSDASection() const { return LSDASection; }
+  const MCSection *getCompactUnwindSection() const{return CompactUnwindSection;}
   virtual const MCSection *getEHFrameSection() const = 0;
   virtual void emitPersonalityValue(MCStreamer &Streamer,
                                     const TargetMachine &TM,
diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h
index feb0929..840b048 100644
--- a/include/llvm/Target/TargetRegisterInfo.h
+++ b/include/llvm/Target/TargetRegisterInfo.h
@@ -237,29 +237,6 @@ public:
     return SuperClasses[0] != 0;
   }
 
-  /// allocation_order_begin/end - These methods define a range of registers
-  /// which specify the registers in this class that are valid to register
-  /// allocate, and the preferred order to allocate them in.  For example,
-  /// callee saved registers should be at the end of the list, because it is
-  /// cheaper to allocate caller saved registers.
-  ///
-  /// These methods take a MachineFunction argument, which can be used to tune
-  /// the allocatable registers based on the characteristics of the function,
-  /// subtarget, or other criteria.
-  ///
-  /// Register allocators should account for the fact that an allocation
-  /// order iterator may return a reserved register and always check
-  /// if the register is allocatable (getAllocatableSet()) before using it.
-  ///
-  /// By default, these methods return all registers in the class.
-  ///
-  virtual iterator allocation_order_begin(const MachineFunction &MF) const {
-    return begin();
-  }
-  virtual iterator allocation_order_end(const MachineFunction &MF)   const {
-    return end();
-  }
-
   /// getRawAllocationOrder - Returns the preferred order for allocating
   /// registers from this register class in MF. The raw order comes directly
   /// from the .td file and may include reserved registers that are not
@@ -276,9 +253,7 @@ public:
   ///
   virtual
   ArrayRef<unsigned> getRawAllocationOrder(const MachineFunction &MF) const {
-    iterator B = allocation_order_begin(MF);
-    iterator E = allocation_order_end(MF);
-    return ArrayRef<unsigned>(B, E - B);
+    return ArrayRef<unsigned>(begin(), getNumRegs());
   }
 
   /// getSize - Return the size of the register in bytes, which is also the size
diff --git a/include/llvm/Type.h b/include/llvm/Type.h
index 0bd4350..6110154 100644
--- a/include/llvm/Type.h
+++ b/include/llvm/Type.h
@@ -17,7 +17,6 @@
 
 #include "llvm/AbstractTypeUser.h"
 #include "llvm/Support/Casting.h"
-#include <string>
 #include <vector>
 
 namespace llvm {
@@ -60,7 +59,7 @@ template<class GraphType> struct GraphTraits;
 /// @brief Root of type hierarchy
 class Type : public AbstractTypeUser {
 public:
-  //===-------------------------------------------------------------------===//
+  //===--------------------------------------------------------------------===//
   /// Definitions of all of the base types for the Type system.  Based on this
   /// value, you can cast to a "DerivedType" subclass (see DerivedTypes.h)
   /// Note: If you add an element to this, you need to add an element to the
@@ -68,19 +67,19 @@ public:
   /// Also update LLVMTypeKind and LLVMGetTypeKind () in the C binding.
   ///
   enum TypeID {
-    // PrimitiveTypes .. make sure LastPrimitiveTyID stays up to date
+    // PrimitiveTypes - make sure LastPrimitiveTyID stays up to date.
     VoidTyID = 0,    ///<  0: type with no size
-    FloatTyID,       ///<  1: 32 bit floating point type
-    DoubleTyID,      ///<  2: 64 bit floating point type
-    X86_FP80TyID,    ///<  3: 80 bit floating point type (X87)
-    FP128TyID,       ///<  4: 128 bit floating point type (112-bit mantissa)
-    PPC_FP128TyID,   ///<  5: 128 bit floating point type (two 64-bits, PowerPC)
+    FloatTyID,       ///<  1: 32-bit floating point type
+    DoubleTyID,      ///<  2: 64-bit floating point type
+    X86_FP80TyID,    ///<  3: 80-bit floating point type (X87)
+    FP128TyID,       ///<  4: 128-bit floating point type (112-bit mantissa)
+    PPC_FP128TyID,   ///<  5: 128-bit floating point type (two 64-bits, PowerPC)
     LabelTyID,       ///<  6: Labels
     MetadataTyID,    ///<  7: Metadata
-    X86_MMXTyID,     ///<  8: MMX vectors (64 bits)
+    X86_MMXTyID,     ///<  8: MMX vectors (64 bits, X86 specific)
 
-    // Derived types... see DerivedTypes.h file...
-    // Make sure FirstDerivedTyID stays up to date!!!
+    // Derived types... see DerivedTypes.h file.
+    // Make sure FirstDerivedTyID stays up to date!
     IntegerTyID,     ///<  9: Arbitrary bit width integers
     FunctionTyID,    ///< 10: Functions
     StructTyID,      ///< 11: Structures
@@ -112,9 +111,8 @@ private:
 
   const Type *getForwardedTypeInternal() const;
 
-  // Some Type instances are allocated as arrays, some aren't. So we provide
-  // this method to get the right kind of destruction for the type of Type.
-  void destroy() const; // const is a lie, this does "delete this"!
+  // When the last reference to a forwarded type is removed, it is destroyed.
+  void destroy() const;
 
 protected:
   explicit Type(LLVMContext &C, TypeID id) :
@@ -179,17 +177,13 @@ public:
   LLVMContext &getContext() const { return Context; }
 
   //===--------------------------------------------------------------------===//
-  // Property accessors for dealing with types... Some of these virtual methods
-  // are defined in private classes defined in Type.cpp for primitive types.
+  // Accessors for working with types.
   //
 
-  /// getDescription - Return the string representation of the type.
-  std::string getDescription() const;
-
   /// getTypeID - Return the type id for the type.  This will return one
   /// of the TypeID enum elements defined above.
   ///
-  inline TypeID getTypeID() const { return ID; }
+  TypeID getTypeID() const { return ID; }
 
   /// isVoidTy - Return true if this is 'void'.
   bool isVoidTy() const { return ID == VoidTyID; }
@@ -523,7 +517,7 @@ inline void PATypeHolder::dropRef() {
 
 //===----------------------------------------------------------------------===//
 // Provide specializations of GraphTraits to be able to treat a type as a
-// graph of sub types...
+// graph of sub types.
 
 template <> struct GraphTraits<Type*> {
   typedef Type NodeType;
diff --git a/include/llvm/Use.h b/include/llvm/Use.h
index ccbdd7f..a496325 100644
--- a/include/llvm/Use.h
+++ b/include/llvm/Use.h
@@ -60,6 +60,10 @@ public:
   /// that also works with less standard-compliant compilers
   void swap(Use &RHS);
 
+  // A type for the word following an array of hung-off Uses in memory, which is
+  // a pointer back to their User with the bottom bit set.
+  typedef PointerIntPair<User*, 1, unsigned> UserRef;
+
 private:
   /// Copy ctor - do not implement
   Use(const Use &U);
@@ -108,13 +112,16 @@ public:
   Use *getNext() const { return Next; }
 
   
+  /// initTags - initialize the waymarking tags on an array of Uses, so that
+  /// getUser() can find the User from any of those Uses.
+  static Use *initTags(Use *Start, Use *Stop);
+
   /// zap - This is used to destroy Use operands when the number of operands of
   /// a User changes.
   static void zap(Use *Start, const Use *Stop, bool del = false);
 
 private:
   const Use* getImpliedUser() const;
-  static Use *initTags(Use *Start, Use *Stop);
   
   Value *Val;
   Use *Next;
@@ -136,7 +143,6 @@ private:
   }
 
   friend class Value;
-  friend class User;
 };
 
 // simplify_type - Allow clients to treat uses just like values when using
@@ -208,15 +214,6 @@ public:
   unsigned getOperandNo() const;
 };
 
-//===----------------------------------------------------------------------===//
-//                         AugmentedUse layout struct
-//===----------------------------------------------------------------------===//
-
-struct AugmentedUse : public Use {
-  PointerIntPair<User*, 1, unsigned> ref;
-  AugmentedUse(); // not implemented
-};
-
 } // End llvm namespace
 
 #endif
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 08a6065..b5fafd6 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -1399,7 +1399,7 @@ llvm::ConstantFoldCall(Function *F,
             ConstantInt::get(F->getContext(), Res),
             ConstantInt::get(Type::getInt1Ty(F->getContext()), Overflow)
           };
-          return ConstantStruct::get(F->getContext(), Ops, 2, false);
+          return ConstantStruct::get(cast<StructType>(F->getReturnType()), Ops);
         }
         }
       }
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index 67f8147..b42e946 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -727,37 +727,37 @@ void DIVariable::dump() const {
 
 /// fixupObjcLikeName - Replace contains special characters used
 /// in a typical Objective-C names with '.' in a given string.
-static void fixupObjcLikeName(std::string &Str) {
+static void fixupObjcLikeName(StringRef Str, SmallVectorImpl<char> &Out) {
+  bool isObjCLike = false;
   for (size_t i = 0, e = Str.size(); i < e; ++i) {
     char C = Str[i];
-    if (C == '[' || C == ']' || C == ' ' || C == ':' || C == '+' ||
-        C == '(' || C == ')')
-      Str[i] = '.';
+    if (C == '[')
+      isObjCLike = true;
+
+    if (isObjCLike && (C == '[' || C == ']' || C == ' ' || C == ':' ||
+                       C == '+' || C == '(' || C == ')'))
+      Out.push_back('.');
+    else
+      Out.push_back(C);
   }
 }
 
 /// getFnSpecificMDNode - Return a NameMDNode, if available, that is 
 /// suitable to hold function specific information.
 NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, StringRef FuncName) {
-  if (FuncName.find('[') == StringRef::npos)
-    return M.getNamedMetadata(Twine("llvm.dbg.lv.", FuncName));
-  std::string Name = FuncName;
-  fixupObjcLikeName(Name);
-  return M.getNamedMetadata(Twine("llvm.dbg.lv.", Name));
+  SmallString<32> Name = StringRef("llvm.dbg.lv.");
+  fixupObjcLikeName(FuncName, Name);
+
+  return M.getNamedMetadata(Name.str());
 }
 
 /// getOrInsertFnSpecificMDNode - Return a NameMDNode that is suitable
 /// to hold function specific information.
 NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, StringRef FuncName) {
-  SmallString<32> Out;
-  if (FuncName.find('[') == StringRef::npos)
-    return M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", FuncName)
-                                      .toStringRef(Out));
-  
-  std::string Name = FuncName;
-  fixupObjcLikeName(Name);
-  return M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", Name)
-                                    .toStringRef(Out));
+  SmallString<32> Name = StringRef("llvm.dbg.lv.");
+  fixupObjcLikeName(FuncName, Name);
+
+  return M.getOrInsertNamedMetadata(Name.str());
 }
 
 
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index a0c42f0..ba4419c 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -89,7 +89,7 @@ static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L,
 /// AddUsersIfInteresting - Inspect the specified instruction.  If it is a
 /// reducible SCEV, recursively add its users to the IVUsesByStride set and
 /// return true.  Otherwise, return false.
-bool IVUsers::AddUsersIfInteresting(Instruction *I, PHINode *Phi) {
+bool IVUsers::AddUsersIfInteresting(Instruction *I) {
   if (!SE->isSCEVable(I->getType()))
     return false;   // Void and FP expressions cannot be reduced.
 
@@ -136,13 +136,12 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I, PHINode *Phi) {
     bool AddUserToIVUsers = false;
     if (LI->getLoopFor(User->getParent()) != L) {
       if (isa<PHINode>(User) || Processed.count(User) ||
-          !AddUsersIfInteresting(User, Phi)) {
+          !AddUsersIfInteresting(User)) {
         DEBUG(dbgs() << "FOUND USER in other loop: " << *User << '\n'
                      << "   OF SCEV: " << *ISE << '\n');
         AddUserToIVUsers = true;
       }
-    } else if (Processed.count(User) ||
-               !AddUsersIfInteresting(User, Phi)) {
+    } else if (Processed.count(User) || !AddUsersIfInteresting(User)) {
       DEBUG(dbgs() << "FOUND USER: " << *User << '\n'
                    << "   OF SCEV: " << *ISE << '\n');
       AddUserToIVUsers = true;
@@ -150,7 +149,7 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I, PHINode *Phi) {
 
     if (AddUserToIVUsers) {
       // Okay, we found a user that we cannot reduce.
-      IVUses.push_back(new IVStrideUse(this, User, I, Phi));
+      IVUses.push_back(new IVStrideUse(this, User, I));
       IVStrideUse &NewUse = IVUses.back();
       // Autodetect the post-inc loop set, populating NewUse.PostIncLoops.
       // The regular return value here is discarded; instead of recording
@@ -165,8 +164,8 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I, PHINode *Phi) {
   return true;
 }
 
-IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand, PHINode *Phi) {
-  IVUses.push_back(new IVStrideUse(this, User, Operand, Phi));
+IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) {
+  IVUses.push_back(new IVStrideUse(this, User, Operand));
   return IVUses.back();
 }
 
@@ -194,7 +193,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
   // them by stride.  Start by finding all of the PHI nodes in the header for
   // this loop.  If they are induction variables, inspect their uses.
   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
-    (void)AddUsersIfInteresting(I, cast<PHINode>(I));
+    (void)AddUsersIfInteresting(I);
 
   return false;
 }
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 769c68c..53d4304 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -50,13 +50,8 @@ static bool isMallocCall(const CallInst *CI) {
   const FunctionType *FTy = Callee->getFunctionType();
   if (FTy->getNumParams() != 1)
     return false;
-  if (IntegerType *ITy = dyn_cast<IntegerType>(FTy->param_begin()->get())) {
-    if (ITy->getBitWidth() != 32 && ITy->getBitWidth() != 64)
-      return false;
-    return true;
-  }
-
-  return false;
+  return FTy->getParamType(0)->isIntegerTy(32) ||
+         FTy->getParamType(0)->isIntegerTy(64);
 }
 
 /// extractMallocCall - Returns the corresponding CallInst if the instruction
@@ -211,7 +206,7 @@ const CallInst *llvm::isFreeCall(const Value *I) {
     return 0;
   if (FTy->getNumParams() != 1)
     return 0;
-  if (FTy->param_begin()->get() != Type::getInt8PtrTy(Callee->getContext()))
+  if (FTy->getParamType(0) != Type::getInt8PtrTy(Callee->getContext()))
     return 0;
 
   return CI;
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 8e5a400..0549935 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -159,7 +159,8 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
   }
 
   // If we haven't found this binop, insert it.
-  Value *BO = Builder.CreateBinOp(Opcode, LHS, RHS, "tmp");
+  Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS, "tmp"));
+  BO->setDebugLoc(SaveInsertPt->getDebugLoc());
   rememberInstruction(BO);
 
   // Restore the original insert point.
@@ -1155,6 +1156,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
         Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One,
                                                      "indvar.next",
                                                      HP->getTerminator());
+        Add->setDebugLoc(HP->getTerminator()->getDebugLoc());
         rememberInstruction(Add);
         CanonicalIV->addIncoming(Add, HP);
       } else {
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 85defca..3c63106 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -471,7 +471,6 @@ lltok::Kind LLLexer::LexIdentifier() {
   if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) \
     return lltok::kw_##STR;
 
-  KEYWORD(begin);   KEYWORD(end);
   KEYWORD(true);    KEYWORD(false);
   KEYWORD(declare); KEYWORD(define);
   KEYWORD(global);  KEYWORD(constant);
@@ -636,7 +635,6 @@ lltok::Kind LLLexer::LexIdentifier() {
   INSTKEYWORD(extractelement, ExtractElement);
   INSTKEYWORD(insertelement,  InsertElement);
   INSTKEYWORD(shufflevector,  ShuffleVector);
-  INSTKEYWORD(getresult,      ExtractValue);
   INSTKEYWORD(extractvalue,   ExtractValue);
   INSTKEYWORD(insertvalue,    InsertValue);
 #undef INSTKEYWORD
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index fa1d97d..d985851 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -26,6 +26,13 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
+static std::string getTypeString(const Type *T) {
+  std::string Result;
+  raw_string_ostream Tmp(Result);
+  Tmp << *T;
+  return Tmp.str();
+}
+
 /// Run: module ::= toplevelentity*
 bool LLParser::Run() {
   // Prime the lexer.
@@ -163,9 +170,7 @@ bool LLParser::ParseTopLevelEntities() {
     case lltok::kw_module:  if (ParseModuleAsm()) return true; break;
     case lltok::kw_target:  if (ParseTargetDefinition()) return true; break;
     case lltok::kw_deplibs: if (ParseDepLibs()) return true; break;
-    case lltok::kw_type:    if (ParseUnnamedType()) return true; break;
     case lltok::LocalVarID: if (ParseUnnamedType()) return true; break;
-    case lltok::StringConstant: // FIXME: REMOVE IN LLVM 3.0
     case lltok::LocalVar:   if (ParseNamedType()) return true; break;
     case lltok::GlobalID:   if (ParseUnnamedGlobal()) return true; break;
     case lltok::GlobalVar:  if (ParseNamedGlobal()) return true; break;
@@ -285,24 +290,18 @@ bool LLParser::ParseDepLibs() {
 }
 
 /// ParseUnnamedType:
-///   ::= 'type' type
 ///   ::= LocalVarID '=' 'type' type
 bool LLParser::ParseUnnamedType() {
+  LocTy TypeLoc = Lex.getLoc();
   unsigned TypeID = NumberedTypes.size();
+  if (Lex.getUIntVal() != TypeID)
+    return Error(Lex.getLoc(), "type expected to be numbered '%" +
+                 Twine(TypeID) + "'");
+  Lex.Lex(); // eat LocalVarID;
 
-  // Handle the LocalVarID form.
-  if (Lex.getKind() == lltok::LocalVarID) {
-    if (Lex.getUIntVal() != TypeID)
-      return Error(Lex.getLoc(), "type expected to be numbered '%" +
-                   Twine(TypeID) + "'");
-    Lex.Lex(); // eat LocalVarID;
-
-    if (ParseToken(lltok::equal, "expected '=' after name"))
-      return true;
-  }
-
-  LocTy TypeLoc = Lex.getLoc();
-  if (ParseToken(lltok::kw_type, "expected 'type' after '='")) return true;
+  if (ParseToken(lltok::equal, "expected '=' after name") ||
+      ParseToken(lltok::kw_type, "expected 'type' after '='"))
+    return true;
 
   PATypeHolder Ty(Type::getVoidTy(Context));
   if (ParseType(Ty)) return true;
@@ -353,20 +352,15 @@ bool LLParser::ParseNamedType() {
     cast<DerivedType>(FI->second.first.get())->refineAbstractTypeTo(Ty);
     Ty = FI->second.first.get();
     ForwardRefTypes.erase(FI);
+    return false;
   }
 
   // Inserting a name that is already defined, get the existing name.
-  const Type *Existing = M->getTypeByName(Name);
-  assert(Existing && "Conflict but no matching type?!");
+  assert(M->getTypeByName(Name) && "Conflict but no matching type?!");
 
-  // Otherwise, this is an attempt to redefine a type. That's okay if
-  // the redefinition is identical to the original.
-  // FIXME: REMOVE REDEFINITIONS IN LLVM 3.0
-  if (Existing == Ty) return false;
-
-  // Any other kind of (non-equivalent) redefinition is an error.
+  // Otherwise, this is an attempt to redefine a type, report the error.
   return Error(NameLoc, "redefinition of type named '" + Name + "' of type '" +
-               Ty->getDescription() + "'");
+               getTypeString(Ty) + "'");
 }
 
 
@@ -792,7 +786,7 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, const Type *Ty,
   if (Val) {
     if (Val->getType() == Ty) return Val;
     Error(Loc, "'@" + Name + "' defined with type '" +
-          Val->getType()->getDescription() + "'");
+          getTypeString(Val->getType()) + "'");
     return 0;
   }
 
@@ -837,7 +831,7 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc) {
   if (Val) {
     if (Val->getType() == Ty) return Val;
     Error(Loc, "'@" + Twine(ID) + "' defined with type '" +
-          Val->getType()->getDescription() + "'");
+          getTypeString(Val->getType()) + "'");
     return 0;
   }
 
@@ -1263,7 +1257,7 @@ PATypeHolder LLParser::HandleUpRefs(const Type *ty) {
 
   PATypeHolder Ty(ty);
 #if 0
-  dbgs() << "Type '" << Ty->getDescription()
+  dbgs() << "Type '" << *Ty
          << "' newly formed.  Resolving upreferences.\n"
          << UpRefs.size() << " upreferences active!\n";
 #endif
@@ -1281,8 +1275,8 @@ PATypeHolder LLParser::HandleUpRefs(const Type *ty) {
                 UpRefs[i].LastContainedTy) != Ty->subtype_end();
 
 #if 0
-    dbgs() << "  UR#" << i << " - TypeContains(" << Ty->getDescription() << ", "
-           << UpRefs[i].LastContainedTy->getDescription() << ") = "
+    dbgs() << "  UR#" << i << " - TypeContains(" << *Ty << ", "
+           << *UpRefs[i].LastContainedTy << ") = "
            << (ContainsType ? "true" : "false")
            << " level=" << UpRefs[i].NestingLevel << "\n";
 #endif
@@ -1353,7 +1347,6 @@ bool LLParser::ParseTypeRec(PATypeHolder &Result) {
       return true;
     break;
   case lltok::LocalVar:
-  case lltok::StringConstant:  // FIXME: REMOVE IN LLVM 3.0
     // TypeRec ::= %foo
     if (const Type *T = M->getTypeByName(Lex.getStrVal())) {
       Result = T;
@@ -1513,8 +1506,7 @@ bool LLParser::ParseArgumentList(std::vector<ArgInfo> &ArgList,
     if (ArgTy->isVoidTy())
       return Error(TypeLoc, "argument can not have void type");
 
-    if (Lex.getKind() == lltok::LocalVar ||
-        Lex.getKind() == lltok::StringConstant) { // FIXME: REMOVE IN LLVM 3.0
+    if (Lex.getKind() == lltok::LocalVar) {
       Name = Lex.getStrVal();
       Lex.Lex();
     }
@@ -1539,8 +1531,7 @@ bool LLParser::ParseArgumentList(std::vector<ArgInfo> &ArgList,
       if (ArgTy->isVoidTy())
         return Error(TypeLoc, "argument can not have void type");
 
-      if (Lex.getKind() == lltok::LocalVar ||
-          Lex.getKind() == lltok::StringConstant) { // FIXME: REMOVE IN LLVM 3.0
+      if (Lex.getKind() == lltok::LocalVar) {
         Name = Lex.getStrVal();
         Lex.Lex();
       } else {
@@ -1567,22 +1558,16 @@ bool LLParser::ParseFunctionType(PATypeHolder &Result) {
 
   std::vector<ArgInfo> ArgList;
   bool isVarArg;
-  unsigned Attrs;
-  if (ParseArgumentList(ArgList, isVarArg, true) ||
-      // FIXME: Allow, but ignore attributes on function types!
-      // FIXME: Remove in LLVM 3.0
-      ParseOptionalAttrs(Attrs, 2))
+  if (ParseArgumentList(ArgList, isVarArg, true))
     return true;
 
   // Reject names on the arguments lists.
   for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
     if (!ArgList[i].Name.empty())
       return Error(ArgList[i].Loc, "argument name invalid in function type");
-    if (!ArgList[i].Attrs != 0) {
-      // Allow but ignore attributes on function types; this permits
-      // auto-upgrade.
-      // FIXME: REJECT ATTRIBUTES ON FUNCTION TYPES in LLVM 3.0
-    }
+    if (ArgList[i].Attrs != 0)
+      return Error(ArgList[i].Loc,
+                   "argument attributes invalid in function type");
   }
 
   std::vector<const Type*> ArgListTy;
@@ -1780,7 +1765,7 @@ Value *LLParser::PerFunctionState::GetVal(const std::string &Name,
       P.Error(Loc, "'%" + Name + "' is not a basic block");
     else
       P.Error(Loc, "'%" + Name + "' defined with type '" +
-              Val->getType()->getDescription() + "'");
+              getTypeString(Val->getType()) + "'");
     return 0;
   }
 
@@ -1822,7 +1807,7 @@ Value *LLParser::PerFunctionState::GetVal(unsigned ID, const Type *Ty,
       P.Error(Loc, "'%" + Twine(ID) + "' is not a basic block");
     else
       P.Error(Loc, "'%" + Twine(ID) + "' defined with type '" +
-              Val->getType()->getDescription() + "'");
+              getTypeString(Val->getType()) + "'");
     return 0;
   }
 
@@ -1870,7 +1855,7 @@ bool LLParser::PerFunctionState::SetInstName(int NameID,
     if (FI != ForwardRefValIDs.end()) {
       if (FI->second.first->getType() != Inst->getType())
         return P.Error(NameLoc, "instruction forward referenced with type '" +
-                       FI->second.first->getType()->getDescription() + "'");
+                       getTypeString(FI->second.first->getType()) + "'");
       FI->second.first->replaceAllUsesWith(Inst);
       delete FI->second.first;
       ForwardRefValIDs.erase(FI);
@@ -1886,7 +1871,7 @@ bool LLParser::PerFunctionState::SetInstName(int NameID,
   if (FI != ForwardRefVals.end()) {
     if (FI->second.first->getType() != Inst->getType())
       return P.Error(NameLoc, "instruction forward referenced with type '" +
-                     FI->second.first->getType()->getDescription() + "'");
+                     getTypeString(FI->second.first->getType()) + "'");
     FI->second.first->replaceAllUsesWith(Inst);
     delete FI->second.first;
     ForwardRefVals.erase(FI);
@@ -1969,7 +1954,6 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
     ID.Kind = ValID::t_LocalID;
     break;
   case lltok::LocalVar:  // %foo
-  case lltok::StringConstant:  // "foo" - FIXME: REMOVE IN LLVM 3.0
     ID.StrVal = Lex.getStrVal();
     ID.Kind = ValID::t_LocalName;
     break;
@@ -2003,8 +1987,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
         ParseToken(lltok::rbrace, "expected end of struct constant"))
       return true;
 
-    ID.ConstantVal = ConstantStruct::get(Context, Elts.data(),
-                                         Elts.size(), false);
+    // FIXME: Get this type from context instead of reconstructing it!
+    ID.ConstantVal = ConstantStruct::getAnon(Context, Elts);
     ID.Kind = ValID::t_Constant;
     return false;
   }
@@ -2023,8 +2007,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
       return true;
 
     if (isPackedStruct) {
-      ID.ConstantVal =
-        ConstantStruct::get(Context, Elts.data(), Elts.size(), true);
+      // FIXME: Get this type from context instead of reconstructing it!
+      ID.ConstantVal = ConstantStruct::getAnon(Context, Elts, true);
       ID.Kind = ValID::t_Constant;
       return false;
     }
@@ -2042,7 +2026,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
       if (Elts[i]->getType() != Elts[0]->getType())
         return Error(FirstEltLoc,
                      "vector element #" + Twine(i) +
-                    " is not of type '" + Elts[0]->getType()->getDescription());
+                    " is not of type '" + getTypeString(Elts[0]->getType()));
 
     ID.ConstantVal = ConstantVector::get(Elts);
     ID.Kind = ValID::t_Constant;
@@ -2066,7 +2050,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
 
     if (!Elts[0]->getType()->isFirstClassType())
       return Error(FirstEltLoc, "invalid array element type: " +
-                   Elts[0]->getType()->getDescription());
+                   getTypeString(Elts[0]->getType()));
 
     ArrayType *ATy = ArrayType::get(Elts[0]->getType(), Elts.size());
 
@@ -2075,10 +2059,10 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
       if (Elts[i]->getType() != Elts[0]->getType())
         return Error(FirstEltLoc,
                      "array element #" + Twine(i) +
-                     " is not of type '" +Elts[0]->getType()->getDescription());
+                     " is not of type '" + getTypeString(Elts[0]->getType()));
     }
 
-    ID.ConstantVal = ConstantArray::get(ATy, Elts.data(), Elts.size());
+    ID.ConstantVal = ConstantArray::get(ATy, Elts);
     ID.Kind = ValID::t_Constant;
     return false;
   }
@@ -2158,8 +2142,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
       return true;
     if (!CastInst::castIsValid((Instruction::CastOps)Opc, SrcVal, DestTy))
       return Error(ID.Loc, "invalid cast opcode for cast from '" +
-                   SrcVal->getType()->getDescription() + "' to '" +
-                   DestTy->getDescription() + "'");
+                   getTypeString(SrcVal->getType()) + "' to '" +
+                   getTypeString(DestTy) + "'");
     ID.ConstantVal = ConstantExpr::getCast((Instruction::CastOps)Opc,
                                                  SrcVal, DestTy);
     ID.Kind = ValID::t_Constant;
@@ -2568,7 +2552,7 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
 
     if (V->getType() != Ty)
       return Error(ID.Loc, "floating point constant does not have type '" +
-                   Ty->getDescription() + "'");
+                   getTypeString(Ty) + "'");
 
     return false;
   case ValID::t_Null:
@@ -2766,21 +2750,9 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
       
       ForwardRefVals.erase(FRVI);
     } else if ((Fn = M->getFunction(FunctionName))) {
-      // If this function already exists in the symbol table, then it is
-      // multiply defined.  We accept a few cases for old backwards compat.
-      // FIXME: Remove this stuff for LLVM 3.0.
-      if (Fn->getType() != PFT || Fn->getAttributes() != PAL ||
-          (!Fn->isDeclaration() && isDefine)) {
-        // If the redefinition has different type or different attributes,
-        // reject it.  If both have bodies, reject it.
-        return Error(NameLoc, "invalid redefinition of function '" +
-                     FunctionName + "'");
-      } else if (Fn->isDeclaration()) {
-        // Make sure to strip off any argument names so we can't get conflicts.
-        for (Function::arg_iterator AI = Fn->arg_begin(), AE = Fn->arg_end();
-             AI != AE; ++AI)
-          AI->setName("");
-      }
+      // Reject redefinitions.
+      return Error(NameLoc, "invalid redefinition of function '" +
+                   FunctionName + "'");
     } else if (M->getNamedValue(FunctionName)) {
       return Error(NameLoc, "redefinition of function '@" + FunctionName + "'");
     }
@@ -2819,10 +2791,6 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   // Add all of the arguments we parsed to the function.
   Function::arg_iterator ArgIt = Fn->arg_begin();
   for (unsigned i = 0, e = ArgList.size(); i != e; ++i, ++ArgIt) {
-    // If we run out of arguments in the Function prototype, exit early.
-    // FIXME: REMOVE THIS IN LLVM 3.0, this is just for the mismatch case above.
-    if (ArgIt == Fn->arg_end()) break;
-    
     // If the argument has a name, insert it into the argument symbol table.
     if (ArgList[i].Name.empty()) continue;
 
@@ -2840,10 +2808,9 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
 
 /// ParseFunctionBody
 ///   ::= '{' BasicBlock+ '}'
-///   ::= 'begin' BasicBlock+ 'end'  // FIXME: remove in LLVM 3.0
 ///
 bool LLParser::ParseFunctionBody(Function &Fn) {
-  if (Lex.getKind() != lltok::lbrace && Lex.getKind() != lltok::kw_begin)
+  if (Lex.getKind() != lltok::lbrace)
     return TokError("expected '{' in function body");
   Lex.Lex();  // eat the {.
 
@@ -2853,10 +2820,10 @@ bool LLParser::ParseFunctionBody(Function &Fn) {
   PerFunctionState PFS(*this, Fn, FunctionNumber);
 
   // We need at least one basic block.
-  if (Lex.getKind() == lltok::rbrace || Lex.getKind() == lltok::kw_end)
+  if (Lex.getKind() == lltok::rbrace)
     return TokError("function body requires at least one basic block");
   
-  while (Lex.getKind() != lltok::rbrace && Lex.getKind() != lltok::kw_end)
+  while (Lex.getKind() != lltok::rbrace)
     if (ParseBasicBlock(PFS)) return true;
 
   // Eat the }.
@@ -2897,9 +2864,7 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
       Lex.Lex();
       if (ParseToken(lltok::equal, "expected '=' after instruction id"))
         return true;
-    } else if (Lex.getKind() == lltok::LocalVar ||
-               // FIXME: REMOVE IN LLVM 3.0
-               Lex.getKind() == lltok::StringConstant) {
+    } else if (Lex.getKind() == lltok::LocalVar) {
       NameStr = Lex.getStrVal();
       Lex.Lex();
       if (ParseToken(lltok::equal, "expected '=' after instruction name"))
@@ -3032,7 +2997,6 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
       return ParseStore(Inst, PFS, true);
     else
       return TokError("expected 'load' or 'store'");
-  case lltok::kw_getresult:     return ParseGetResult(Inst, PFS);
   case lltok::kw_getelementptr: return ParseGetElementPtr(Inst, PFS);
   case lltok::kw_extractvalue:  return ParseExtractValue(Inst, PFS);
   case lltok::kw_insertvalue:   return ParseInsertValue(Inst, PFS);
@@ -3087,9 +3051,7 @@ bool LLParser::ParseCmpPredicate(unsigned &P, unsigned Opc) {
 /// ParseRet - Parse a return instruction.
 ///   ::= 'ret' void (',' !dbg, !1)*
 ///   ::= 'ret' TypeAndValue (',' !dbg, !1)*
-///   ::= 'ret' TypeAndValue (',' TypeAndValue)+  (',' !dbg, !1)*
-///         [[obsolete: LLVM 3.0]]
-int LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB,
+bool LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB,
                        PerFunctionState &PFS) {
   PATypeHolder Ty(Type::getVoidTy(Context));
   if (ParseType(Ty, true /*void allowed*/)) return true;
@@ -3102,38 +3064,8 @@ int LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB,
   Value *RV;
   if (ParseValue(Ty, RV, PFS)) return true;
 
-  bool ExtraComma = false;
-  if (EatIfPresent(lltok::comma)) {
-    // Parse optional custom metadata, e.g. !dbg
-    if (Lex.getKind() == lltok::MetadataVar) {
-      ExtraComma = true;
-    } else {
-      // The normal case is one return value.
-      // FIXME: LLVM 3.0 remove MRV support for 'ret i32 1, i32 2', requiring
-      // use of 'ret {i32,i32} {i32 1, i32 2}'
-      SmallVector<Value*, 8> RVs;
-      RVs.push_back(RV);
-
-      do {
-        // If optional custom metadata, e.g. !dbg is seen then this is the 
-        // end of MRV.
-        if (Lex.getKind() == lltok::MetadataVar)
-          break;
-        if (ParseTypeAndValue(RV, PFS)) return true;
-        RVs.push_back(RV);
-      } while (EatIfPresent(lltok::comma));
-
-      RV = UndefValue::get(PFS.getFunction().getReturnType());
-      for (unsigned i = 0, e = RVs.size(); i != e; ++i) {
-        Instruction *I = InsertValueInst::Create(RV, RVs[i], i, "mrv");
-        BB->getInstList().push_back(I);
-        RV = I;
-      }
-    }
-  }
-
   Inst = ReturnInst::Create(Context, RV);
-  return ExtraComma ? InstExtraComma : InstNormal;
+  return false;
 }
 
 
@@ -3321,7 +3253,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
 
     if (ExpectedTy && ExpectedTy != ArgList[i].V->getType())
       return Error(ArgList[i].Loc, "argument is not of expected type '" +
-                   ExpectedTy->getDescription() + "'");
+                   getTypeString(ExpectedTy) + "'");
     Args.push_back(ArgList[i].V);
     if (ArgList[i].Attrs != Attribute::None)
       Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs));
@@ -3447,8 +3379,8 @@ bool LLParser::ParseCast(Instruction *&Inst, PerFunctionState &PFS,
   if (!CastInst::castIsValid((Instruction::CastOps)Opc, Op, DestTy)) {
     CastInst::castIsValid((Instruction::CastOps)Opc, Op, DestTy);
     return Error(Loc, "invalid cast opcode for cast from '" +
-                 Op->getType()->getDescription() + "' to '" +
-                 DestTy->getDescription() + "'");
+                 getTypeString(Op->getType()) + "' to '" +
+                 getTypeString(DestTy) + "'");
   }
   Inst = CastInst::Create((Instruction::CastOps)Opc, Op, DestTy);
   return false;
@@ -3658,7 +3590,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
 
     if (ExpectedTy && ExpectedTy != ArgList[i].V->getType())
       return Error(ArgList[i].Loc, "argument is not of expected type '" +
-                   ExpectedTy->getDescription() + "'");
+                   getTypeString(ExpectedTy) + "'");
     Args.push_back(ArgList[i].V);
     if (ArgList[i].Attrs != Attribute::None)
       Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs));
@@ -3757,25 +3689,6 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS,
   return AteExtraComma ? InstExtraComma : InstNormal;
 }
 
-/// ParseGetResult
-///   ::= 'getresult' TypeAndValue ',' i32
-/// FIXME: Remove support for getresult in LLVM 3.0
-bool LLParser::ParseGetResult(Instruction *&Inst, PerFunctionState &PFS) {
-  Value *Val; LocTy ValLoc, EltLoc;
-  unsigned Element;
-  if (ParseTypeAndValue(Val, ValLoc, PFS) ||
-      ParseToken(lltok::comma, "expected ',' after getresult operand") ||
-      ParseUInt32(Element, EltLoc))
-    return true;
-
-  if (!Val->getType()->isStructTy() && !Val->getType()->isArrayTy())
-    return Error(ValLoc, "getresult inst requires an aggregate operand");
-  if (!ExtractValueInst::getIndexedType(Val->getType(), Element))
-    return Error(EltLoc, "invalid getresult index for value");
-  Inst = ExtractValueInst::Create(Val, Element);
-  return false;
-}
-
 /// ParseGetElementPtr
 ///   ::= 'getelementptr' 'inbounds'? TypeAndValue (',' TypeAndValue)*
 int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index bbc641c..c486799 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -340,7 +340,7 @@ namespace llvm {
                          PerFunctionState &PFS);
     bool ParseCmpPredicate(unsigned &Pred, unsigned Opc);
 
-    int ParseRet(Instruction *&Inst, BasicBlock *BB, PerFunctionState &PFS);
+    bool ParseRet(Instruction *&Inst, BasicBlock *BB, PerFunctionState &PFS);
     bool ParseBr(Instruction *&Inst, PerFunctionState &PFS);
     bool ParseSwitch(Instruction *&Inst, PerFunctionState &PFS);
     bool ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS);
@@ -361,7 +361,6 @@ namespace llvm {
     int ParseAlloc(Instruction *&I, PerFunctionState &PFS);
     int ParseLoad(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
     int ParseStore(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
-    bool ParseGetResult(Instruction *&I, PerFunctionState &PFS);
     int ParseGetElementPtr(Instruction *&I, PerFunctionState &PFS);
     int ParseExtractValue(Instruction *&I, PerFunctionState &PFS);
     int ParseInsertValue(Instruction *&I, PerFunctionState &PFS);
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index ea01264..a5f89fc 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -32,7 +32,6 @@ namespace lltok {
     exclaim,           // !
 
     kw_x,
-    kw_begin,   kw_end,
     kw_true,    kw_false,
     kw_declare, kw_define,
     kw_global,  kw_constant,
@@ -124,7 +123,7 @@ namespace lltok {
 
     kw_alloca, kw_load, kw_store, kw_getelementptr,
 
-    kw_extractelement, kw_insertelement, kw_shufflevector, kw_getresult,
+    kw_extractelement, kw_insertelement, kw_shufflevector,
     kw_extractvalue, kw_insertvalue, kw_blockaddress,
 
     // Unsigned Valued tokens (UIntVal).
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index bc995ae..963791f 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -292,11 +292,9 @@ void BitcodeReaderValueList::ResolveConstantForwardRefs() {
       // Make the new constant.
       Constant *NewC;
       if (ConstantArray *UserCA = dyn_cast<ConstantArray>(UserC)) {
-        NewC = ConstantArray::get(UserCA->getType(), &NewOps[0],
-                                        NewOps.size());
+        NewC = ConstantArray::get(UserCA->getType(), NewOps);
       } else if (ConstantStruct *UserCS = dyn_cast<ConstantStruct>(UserC)) {
-        NewC = ConstantStruct::get(Context, &NewOps[0], NewOps.size(),
-                                         UserCS->getType()->isPacked());
+        NewC = ConstantStruct::get(UserCS->getType(), NewOps);
       } else if (isa<ConstantVector>(UserC)) {
         NewC = ConstantVector::get(NewOps);
       } else {
@@ -790,13 +788,9 @@ bool BitcodeReader::ParseMetadata() {
       Record.clear();
       Code = Stream.ReadCode();
 
-      // METADATA_NAME is always followed by METADATA_NAMED_NODE2.
-      // Or METADATA_NAMED_NODE in LLVM 2.7. FIXME: Remove this in LLVM 3.0.
+      // METADATA_NAME is always followed by METADATA_NAMED_NODE.
       unsigned NextBitCode = Stream.ReadRecord(Code, Record);
-      if (NextBitCode == bitc::METADATA_NAMED_NODE) {
-        LLVM2_7MetadataDetected = true;
-      } else if (NextBitCode != bitc::METADATA_NAMED_NODE2)
-        assert ( 0 && "Invalid Named Metadata record");
+      assert(NextBitCode == bitc::METADATA_NAMED_NODE); (void)NextBitCode;
 
       // Read named metadata elements.
       unsigned Size = Record.size();
@@ -807,35 +801,20 @@ bool BitcodeReader::ParseMetadata() {
           return Error("Malformed metadata record");
         NMD->addOperand(MD);
       }
-      // Backwards compatibility hack: NamedMDValues used to be Values,
-      // and they got their own slots in the value numbering. They are no
-      // longer Values, however we still need to account for them in the
-      // numbering in order to be able to read old bitcode files.
-      // FIXME: Remove this in LLVM 3.0.
-      if (LLVM2_7MetadataDetected)
-        MDValueList.AssignValue(0, NextMDValueNo++);
       break;
     }
-    case bitc::METADATA_FN_NODE: // FIXME: Remove in LLVM 3.0.
-    case bitc::METADATA_FN_NODE2:
+    case bitc::METADATA_FN_NODE:
       IsFunctionLocal = true;
       // fall-through
-    case bitc::METADATA_NODE:    // FIXME: Remove in LLVM 3.0.
-    case bitc::METADATA_NODE2: {
-
-      // Detect 2.7-era metadata.
-      // FIXME: Remove in LLVM 3.0.
-      if (Code == bitc::METADATA_FN_NODE || Code == bitc::METADATA_NODE)
-        LLVM2_7MetadataDetected = true;
-
+    case bitc::METADATA_NODE: {
       if (Record.size() % 2 == 1)
-        return Error("Invalid METADATA_NODE2 record");
+        return Error("Invalid METADATA_NODE record");
 
       unsigned Size = Record.size();
       SmallVector<Value*, 8> Elts;
       for (unsigned i = 0; i != Size; i += 2) {
         const Type *Ty = getTypeByID(Record[i]);
-        if (!Ty) return Error("Invalid METADATA_NODE2 record");
+        if (!Ty) return Error("Invalid METADATA_NODE record");
         if (Ty->isMetadataTy())
           Elts.push_back(MDValueList.getValueFwdRef(Record[i+1]));
         else if (!Ty->isVoidTy())
@@ -1755,10 +1734,7 @@ bool BitcodeReader::ParseMetadataAttachment() {
     switch (Stream.ReadRecord(Code, Record)) {
     default:  // Default behavior: ignore.
       break;
-    // FIXME: Remove in LLVM 3.0.
-    case bitc::METADATA_ATTACHMENT:
-      LLVM2_7MetadataDetected = true;
-    case bitc::METADATA_ATTACHMENT2: {
+    case bitc::METADATA_ATTACHMENT: {
       unsigned RecordLength = Record.size();
       if (Record.empty() || (RecordLength - 1) % 2 == 1)
         return Error ("Invalid METADATA_ATTACHMENT reader!");
@@ -1870,10 +1846,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       I = 0;
       continue;
         
-    // FIXME: Remove this in LLVM 3.0.
-    case bitc::FUNC_CODE_DEBUG_LOC:
-      LLVM2_7MetadataDetected = true;
-    case bitc::FUNC_CODE_DEBUG_LOC2: {      // DEBUG_LOC: [line, col, scope, ia]
+    case bitc::FUNC_CODE_DEBUG_LOC: {      // DEBUG_LOC: [line, col, scope, ia]
       I = 0;     // Get the last instruction emitted.
       if (CurBB && !CurBB->empty())
         I = &CurBB->back();
@@ -2112,18 +2085,6 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       break;
     }
 
-    case bitc::FUNC_CODE_INST_GETRESULT: { // GETRESULT: [ty, val, n]
-      if (Record.size() != 2)
-        return Error("Invalid GETRESULT record");
-      unsigned OpNum = 0;
-      Value *Op;
-      getValueTypePair(Record, OpNum, NextValueNo, Op);
-      unsigned Index = Record[1];
-      I = ExtractValueInst::Create(Op, Index);
-      InstructionList.push_back(I);
-      break;
-    }
-
     case bitc::FUNC_CODE_INST_RET: // RET: [opty,opval<optional>]
       {
         unsigned Size = Record.size();
@@ -2134,33 +2095,13 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
         }
 
         unsigned OpNum = 0;
-        SmallVector<Value *,4> Vs;
-        do {
-          Value *Op = NULL;
-          if (getValueTypePair(Record, OpNum, NextValueNo, Op))
-            return Error("Invalid RET record");
-          Vs.push_back(Op);
-        } while(OpNum != Record.size());
-
-        const Type *ReturnType = F->getReturnType();
-        // Handle multiple return values. FIXME: Remove in LLVM 3.0.
-        if (Vs.size() > 1 ||
-            (ReturnType->isStructTy() &&
-             (Vs.empty() || Vs[0]->getType() != ReturnType))) {
-          Value *RV = UndefValue::get(ReturnType);
-          for (unsigned i = 0, e = Vs.size(); i != e; ++i) {
-            I = InsertValueInst::Create(RV, Vs[i], i, "mrv");
-            InstructionList.push_back(I);
-            CurBB->getInstList().push_back(I);
-            ValueList.AssignValue(I, NextValueNo++);
-            RV = I;
-          }
-          I = ReturnInst::Create(Context, RV);
-          InstructionList.push_back(I);
-          break;
-        }
+        Value *Op = NULL;
+        if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+          return Error("Invalid RET record");
+        if (OpNum != Record.size())
+          return Error("Invalid RET record");
 
-        I = ReturnInst::Create(Context, Vs[0]);
+        I = ReturnInst::Create(Context, Op);
         InstructionList.push_back(I);
         break;
       }
@@ -2307,47 +2248,14 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       break;
     }
 
-    case bitc::FUNC_CODE_INST_MALLOC: { // MALLOC: [instty, op, align]
-      // Autoupgrade malloc instruction to malloc call.
-      // FIXME: Remove in LLVM 3.0.
-      if (Record.size() < 3)
-        return Error("Invalid MALLOC record");
-      const PointerType *Ty =
-        dyn_cast_or_null<PointerType>(getTypeByID(Record[0]));
-      Value *Size = getFnValueByID(Record[1], Type::getInt32Ty(Context));
-      if (!Ty || !Size) return Error("Invalid MALLOC record");
-      if (!CurBB) return Error("Invalid malloc instruction with no BB");
-      const Type *Int32Ty = IntegerType::getInt32Ty(CurBB->getContext());
-      Constant *AllocSize = ConstantExpr::getSizeOf(Ty->getElementType());
-      AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, Int32Ty);
-      I = CallInst::CreateMalloc(CurBB, Int32Ty, Ty->getElementType(),
-                                 AllocSize, Size, NULL);
-      InstructionList.push_back(I);
-      break;
-    }
-    case bitc::FUNC_CODE_INST_FREE: { // FREE: [op, opty]
-      unsigned OpNum = 0;
-      Value *Op;
-      if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
-          OpNum != Record.size())
-        return Error("Invalid FREE record");
-      if (!CurBB) return Error("Invalid free instruction with no BB");
-      I = CallInst::CreateFree(Op, CurBB);
-      InstructionList.push_back(I);
-      break;
-    }
     case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, opty, op, align]
-      // For backward compatibility, tolerate a lack of an opty, and use i32.
-      // Remove this in LLVM 3.0.
-      if (Record.size() < 3 || Record.size() > 4)
+      if (Record.size() != 4)
         return Error("Invalid ALLOCA record");
-      unsigned OpNum = 0;
       const PointerType *Ty =
-        dyn_cast_or_null<PointerType>(getTypeByID(Record[OpNum++]));
-      const Type *OpTy = Record.size() == 4 ? getTypeByID(Record[OpNum++]) :
-                                              Type::getInt32Ty(Context);
-      Value *Size = getFnValueByID(Record[OpNum++], OpTy);
-      unsigned Align = Record[OpNum++];
+        dyn_cast_or_null<PointerType>(getTypeByID(Record[0]));
+      const Type *OpTy = getTypeByID(Record[1]);
+      Value *Size = getFnValueByID(Record[2], OpTy);
+      unsigned Align = Record[3];
       if (!Ty || !Size) return Error("Invalid ALLOCA record");
       I = new AllocaInst(Ty->getElementType(), Size, (1 << Align) >> 1);
       InstructionList.push_back(I);
@@ -2364,7 +2272,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       InstructionList.push_back(I);
       break;
     }
-    case bitc::FUNC_CODE_INST_STORE2: { // STORE2:[ptrty, ptr, val, align, vol]
+    case bitc::FUNC_CODE_INST_STORE: { // STORE2:[ptrty, ptr, val, align, vol]
       unsigned OpNum = 0;
       Value *Val, *Ptr;
       if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
@@ -2377,24 +2285,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       InstructionList.push_back(I);
       break;
     }
-    case bitc::FUNC_CODE_INST_STORE: { // STORE:[val, valty, ptr, align, vol]
-      // FIXME: Legacy form of store instruction. Should be removed in LLVM 3.0.
-      unsigned OpNum = 0;
-      Value *Val, *Ptr;
-      if (getValueTypePair(Record, OpNum, NextValueNo, Val) ||
-          getValue(Record, OpNum,
-                   PointerType::getUnqual(Val->getType()), Ptr)||
-          OpNum+2 != Record.size())
-        return Error("Invalid STORE record");
-
-      I = new StoreInst(Val, Ptr, Record[OpNum+1], (1 << Record[OpNum]) >> 1);
-      InstructionList.push_back(I);
-      break;
-    }
-    // FIXME: Remove this in LLVM 3.0.
-    case bitc::FUNC_CODE_INST_CALL:
-      LLVM2_7MetadataDetected = true;
-    case bitc::FUNC_CODE_INST_CALL2: {
+    case bitc::FUNC_CODE_INST_CALL: {
       // CALL: [paramattrs, cc, fnty, fnid, arg0, arg1...]
       if (Record.size() < 3)
         return Error("Invalid CALL record");
@@ -2513,23 +2404,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
     BlockAddrFwdRefs.erase(BAFRI);
   }
   
-  // FIXME: Remove this in LLVM 3.0.
-  unsigned NewMDValueListSize = MDValueList.size();
-
   // Trim the value list down to the size it was before we parsed this function.
   ValueList.shrinkTo(ModuleValueListSize);
   MDValueList.shrinkTo(ModuleMDValueListSize);
-
-  // Backwards compatibility hack: Function-local metadata numbers
-  // were previously not reset between functions. This is now fixed,
-  // however we still need to understand the old numbering in order
-  // to be able to read old bitcode files.
-  // FIXME: Remove this in LLVM 3.0.
-  if (LLVM2_7MetadataDetected)
-    MDValueList.resize(NewMDValueListSize);
-
   std::vector<BasicBlock*>().swap(FunctionBBs);
-
   return false;
 }
 
diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h
index f8fc079..9bab00e 100644
--- a/lib/Bitcode/Reader/BitcodeReader.h
+++ b/lib/Bitcode/Reader/BitcodeReader.h
@@ -174,17 +174,10 @@ class BitcodeReader : public GVMaterializer {
   typedef std::pair<unsigned, GlobalVariable*> BlockAddrRefTy;
   DenseMap<Function*, std::vector<BlockAddrRefTy> > BlockAddrFwdRefs;
 
-  /// LLVM2_7MetadataDetected - True if metadata produced by LLVM 2.7 or
-  /// earlier was detected, in which case we behave slightly differently,
-  /// for compatibility.
-  /// FIXME: Remove in LLVM 3.0.
-  bool LLVM2_7MetadataDetected;
-  
 public:
   explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C)
     : Context(C), TheModule(0), Buffer(buffer), BufferOwned(false),
-      ErrorString(0), ValueList(C), MDValueList(C),
-      LLVM2_7MetadataDetected(false) {
+      ErrorString(0), ValueList(C), MDValueList(C) {
     HasReversedFunctionsWithBodies = false;
   }
   ~BitcodeReader() {
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index bc218b3..24b5e2d 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -490,8 +490,8 @@ static void WriteMDNode(const MDNode *N,
       Record.push_back(0);
     }
   }
-  unsigned MDCode = N->isFunctionLocal() ? bitc::METADATA_FN_NODE2 :
-                                           bitc::METADATA_NODE2;
+  unsigned MDCode = N->isFunctionLocal() ? bitc::METADATA_FN_NODE :
+                                           bitc::METADATA_NODE;
   Stream.EmitRecord(MDCode, Record, 0);
   Record.clear();
 }
@@ -554,7 +554,7 @@ static void WriteModuleMetadata(const Module *M,
     // Write named metadata operands.
     for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
       Record.push_back(VE.getValueID(NMD->getOperand(i)));
-    Stream.EmitRecord(bitc::METADATA_NAMED_NODE2, Record, 0);
+    Stream.EmitRecord(bitc::METADATA_NAMED_NODE, Record, 0);
     Record.clear();
   }
 
@@ -590,7 +590,7 @@ static void WriteMetadataAttachment(const Function &F,
   SmallVector<uint64_t, 64> Record;
 
   // Write metadata attachments
-  // METADATA_ATTACHMENT2 - [m x [value, [n x [id, mdnode]]]
+  // METADATA_ATTACHMENT - [m x [value, [n x [id, mdnode]]]
   SmallVector<std::pair<unsigned, MDNode*>, 4> MDs;
   
   for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
@@ -608,7 +608,7 @@ static void WriteMetadataAttachment(const Function &F,
         Record.push_back(MDs[i].first);
         Record.push_back(VE.getValueID(MDs[i].second));
       }
-      Stream.EmitRecord(bitc::METADATA_ATTACHMENT2, Record, 0);
+      Stream.EmitRecord(bitc::METADATA_ATTACHMENT, Record, 0);
       Record.clear();
     }
 
@@ -1079,12 +1079,16 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
     AbbrevToUse = FUNCTION_INST_UNREACHABLE_ABBREV;
     break;
 
-  case Instruction::PHI:
+  case Instruction::PHI: {
+    const PHINode &PN = cast<PHINode>(I);
     Code = bitc::FUNC_CODE_INST_PHI;
-    Vals.push_back(VE.getTypeID(I.getType()));
-    for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
-      Vals.push_back(VE.getValueID(I.getOperand(i)));
+    Vals.push_back(VE.getTypeID(PN.getType()));
+    for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+      Vals.push_back(VE.getValueID(PN.getIncomingValue(i)));
+      Vals.push_back(VE.getValueID(PN.getIncomingBlock(i)));
+    }
     break;
+  }
 
   case Instruction::Alloca:
     Code = bitc::FUNC_CODE_INST_ALLOCA;
@@ -1103,7 +1107,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
     Vals.push_back(cast<LoadInst>(I).isVolatile());
     break;
   case Instruction::Store:
-    Code = bitc::FUNC_CODE_INST_STORE2;
+    Code = bitc::FUNC_CODE_INST_STORE;
     PushValueAndType(I.getOperand(1), InstID, Vals, VE);  // ptrty + ptr
     Vals.push_back(VE.getValueID(I.getOperand(0)));       // val.
     Vals.push_back(Log2_32(cast<StoreInst>(I).getAlignment())+1);
@@ -1114,7 +1118,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
     const PointerType *PTy = cast<PointerType>(CI.getCalledValue()->getType());
     const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
 
-    Code = bitc::FUNC_CODE_INST_CALL2;
+    Code = bitc::FUNC_CODE_INST_CALL;
 
     Vals.push_back(VE.getAttributeID(CI.getAttributes()));
     Vals.push_back((CI.getCallingConv() << 1) | unsigned(CI.isTailCall()));
@@ -1258,7 +1262,7 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,
         Vals.push_back(DL.getCol());
         Vals.push_back(Scope ? VE.getValueID(Scope)+1 : 0);
         Vals.push_back(IA ? VE.getValueID(IA)+1 : 0);
-        Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC2, Vals);
+        Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals);
         Vals.clear();
         
         LastDL = DL;
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index b544ff1..bfee679 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1228,9 +1228,9 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
 /// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each
 /// global in the specified llvm.used list for which emitUsedDirectiveFor
 /// is true, as being used with this directive.
-void AsmPrinter::EmitLLVMUsedList(Constant *List) {
+void AsmPrinter::EmitLLVMUsedList(const Constant *List) {
   // Should be an array of 'i8*'.
-  ConstantArray *InitList = dyn_cast<ConstantArray>(List);
+  const ConstantArray *InitList = dyn_cast<ConstantArray>(List);
   if (InitList == 0) return;
 
   for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
@@ -1243,11 +1243,11 @@ void AsmPrinter::EmitLLVMUsedList(Constant *List) {
 
 /// EmitXXStructorList - Emit the ctor or dtor list.  This just prints out the
 /// function pointers, ignoring the init priority.
-void AsmPrinter::EmitXXStructorList(Constant *List) {
+void AsmPrinter::EmitXXStructorList(const Constant *List) {
   // Should be an array of '{ int, void ()* }' structs.  The first value is the
   // init priority, which we ignore.
   if (!isa<ConstantArray>(List)) return;
-  ConstantArray *InitList = cast<ConstantArray>(List);
+  const ConstantArray *InitList = cast<ConstantArray>(List);
   for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
     if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){
       if (CS->getNumOperands() != 2) return;  // Not array of 2-element structs.
@@ -1533,6 +1533,13 @@ static void EmitGlobalConstantVector(const ConstantVector *CV,
                                      unsigned AddrSpace, AsmPrinter &AP) {
   for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
     EmitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP);
+
+  const TargetData &TD = *AP.TM.getTargetData();
+  unsigned Size = TD.getTypeAllocSize(CV->getType());
+  unsigned EmittedSize = TD.getTypeAllocSize(CV->getType()->getElementType()) *
+                         CV->getType()->getNumElements();
+  if (unsigned Padding = Size - EmittedSize)
+    AP.OutStreamer.EmitZeros(Padding, AddrSpace);
 }
 
 static void EmitGlobalConstantStruct(const ConstantStruct *CS,
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 967a278..1f992fa 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -512,6 +512,8 @@ void DwarfException::EmitExceptionTable() {
     SizeAlign = 0;
   }
 
+  bool VerboseAsm = Asm->OutStreamer.isVerboseAsm();
+
   // SjLj Exception handling
   if (IsSJLJ) {
     Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
@@ -525,14 +527,30 @@ void DwarfException::EmitExceptionTable() {
          I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) {
       const CallSiteEntry &S = *I;
 
+      if (VerboseAsm) {
+        // Emit comments that decode the call site.
+        Asm->OutStreamer.AddComment(Twine(">> Call Site ") +
+                                    llvm::utostr(idx) + " <<");
+        Asm->OutStreamer.AddComment(Twine("  On exception at call site ") +
+                                    llvm::utostr(idx));
+
+        if (S.Action == 0)
+          Asm->OutStreamer.AddComment("  Action: cleanup");
+        else
+          Asm->OutStreamer.AddComment(Twine("  Action: ") +
+                                      llvm::utostr((S.Action - 1) / 2 + 1));
+
+        Asm->OutStreamer.AddBlankLine();
+      }
+
       // Offset of the landing pad, counted in 16-byte bundles relative to the
       // @LPStart address.
-      Asm->EmitULEB128(idx, "Landing pad");
+      Asm->EmitULEB128(idx);
 
       // Offset of the first associated action record, relative to the start of
       // the action table. This value is biased by 1 (1 indicates the start of
       // the action table), and 0 indicates that there are no actions.
-      Asm->EmitULEB128(S.Action, "Action");
+      Asm->EmitULEB128(S.Action);
     }
   } else {
     // DWARF Exception handling
@@ -562,6 +580,7 @@ void DwarfException::EmitExceptionTable() {
     // Add extra padding if it wasn't added to the TType base offset.
     Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign);
 
+    unsigned Entry = 0;
     for (SmallVectorImpl<CallSiteEntry>::const_iterator
          I = CallSites.begin(), E = CallSites.end(); I != E; ++I) {
       const CallSiteEntry &S = *I;
@@ -576,19 +595,38 @@ void DwarfException::EmitExceptionTable() {
       if (EndLabel == 0)
         EndLabel = Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber());
 
+      if (VerboseAsm) {
+        // Emit comments that decode the call site.
+        Asm->OutStreamer.AddComment(Twine(">> Call Site ") +
+                                    llvm::utostr(++Entry) + " <<");
+        Asm->OutStreamer.AddComment(Twine("  Call between ") +
+                                    BeginLabel->getName() + " and " +
+                                    EndLabel->getName());
+
+        if (!S.PadLabel) {
+          Asm->OutStreamer.AddComment("    has no landing pad");
+        } else {
+          Asm->OutStreamer.AddComment(Twine("    jumps to ") +
+                                      S.PadLabel->getName());
+
+          if (S.Action == 0)
+            Asm->OutStreamer.AddComment("  On action: cleanup");
+          else
+            Asm->OutStreamer.AddComment(Twine("  On action: ") +
+                                        llvm::utostr((S.Action - 1) / 2 + 1));
+        }
+
+        Asm->OutStreamer.AddBlankLine();
+      }
+
       // Offset of the call site relative to the previous call site, counted in
       // number of 16-byte bundles. The first call site is counted relative to
       // the start of the procedure fragment.
-      Asm->OutStreamer.AddComment("Region start");
       Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4);
-
-      Asm->OutStreamer.AddComment("Region length");
       Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
 
-
       // Offset of the landing pad, counted in 16-byte bundles relative to the
       // @LPStart address.
-      Asm->OutStreamer.AddComment("Landing pad");
       if (!S.PadLabel)
         Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
       else
@@ -597,45 +635,63 @@ void DwarfException::EmitExceptionTable() {
       // Offset of the first associated action record, relative to the start of
       // the action table. This value is biased by 1 (1 indicates the start of
       // the action table), and 0 indicates that there are no actions.
-      Asm->EmitULEB128(S.Action, "Action");
+      Asm->EmitULEB128(S.Action);
     }
   }
 
   // Emit the Action Table.
-  if (Actions.size() != 0) {
-    Asm->OutStreamer.AddComment("-- Action Record Table --");
-    Asm->OutStreamer.AddBlankLine();
-  }
-
+  int Entry = 0;
   for (SmallVectorImpl<ActionEntry>::const_iterator
          I = Actions.begin(), E = Actions.end(); I != E; ++I) {
     const ActionEntry &Action = *I;
-    Asm->OutStreamer.AddComment("Action Record");
-    Asm->OutStreamer.AddBlankLine();
+
+    if (VerboseAsm) {
+      // Emit comments that decode the action table.
+      Asm->OutStreamer.AddComment(Twine(">> Action Record ") +
+                                  llvm::utostr(++Entry) + " <<");
+      if (Action.ValueForTypeID >= 0)
+        Asm->OutStreamer.AddComment(Twine("  Catch TypeInfo ") +
+                                    llvm::itostr(Action.ValueForTypeID));
+      else 
+        Asm->OutStreamer.AddComment(Twine("  Filter TypeInfo ") +
+                                    llvm::itostr(Action.ValueForTypeID));
+
+      if (Action.NextAction == 0) {
+        Asm->OutStreamer.AddComment("  No further actions");
+      } else {
+        unsigned NextAction = Entry + (Action.NextAction + 1) / 2;
+        Asm->OutStreamer.AddComment(Twine("  Continue to action ") +
+                                    llvm::utostr(NextAction));
+      }
+
+      Asm->OutStreamer.AddBlankLine();
+    }
 
     // Type Filter
     //
     //   Used by the runtime to match the type of the thrown exception to the
     //   type of the catch clauses or the types in the exception specification.
-    Asm->EmitSLEB128(Action.ValueForTypeID, "  TypeInfo index");
+    Asm->EmitSLEB128(Action.ValueForTypeID);
 
     // Action Record
     //
     //   Self-relative signed displacement in bytes of the next action record,
     //   or 0 if there is no next action record.
-    Asm->EmitSLEB128(Action.NextAction, "  Next action");
+    Asm->EmitSLEB128(Action.NextAction);
   }
 
   // Emit the Catch TypeInfos.
-  if (!TypeInfos.empty()) {
-    Asm->OutStreamer.AddComment("-- Catch TypeInfos --");
+  if (VerboseAsm && !TypeInfos.empty()) {
+    Asm->OutStreamer.AddComment(">> Catch TypeInfos <<");
     Asm->OutStreamer.AddBlankLine();
+    Entry = TypeInfos.size();
   }
+
   for (std::vector<const GlobalVariable *>::const_reverse_iterator
          I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
     const GlobalVariable *GV = *I;
-
-    Asm->OutStreamer.AddComment("TypeInfo");
+    if (VerboseAsm)
+      Asm->OutStreamer.AddComment(Twine("TypeInfo ") + llvm::utostr(Entry--));
     if (GV)
       Asm->EmitReference(GV, TTypeEncoding);
     else
@@ -644,14 +700,21 @@ void DwarfException::EmitExceptionTable() {
   }
 
   // Emit the Exception Specifications.
-  if (!FilterIds.empty()) {
-    Asm->OutStreamer.AddComment("-- Filter IDs --");
+  if (VerboseAsm && !FilterIds.empty()) {
+    Asm->OutStreamer.AddComment(">> Filter TypeInfos <<");
     Asm->OutStreamer.AddBlankLine();
+    Entry = 0;
   }
   for (std::vector<unsigned>::const_iterator
          I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) {
     unsigned TypeID = *I;
-    Asm->EmitULEB128(TypeID, TypeID != 0 ? "Exception specification" : 0);
+    if (VerboseAsm) {
+      --Entry;
+      if (TypeID != 0)
+        Asm->OutStreamer.AddComment(Twine("FilterInfo ") + llvm::itostr(Entry));
+    }
+
+    Asm->EmitULEB128(TypeID);
   }
 
   Asm->EmitAlignment(2);
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
index fa2319b..d977651 100644
--- a/lib/CodeGen/ELFWriter.cpp
+++ b/lib/CodeGen/ELFWriter.cpp
@@ -659,11 +659,11 @@ bool ELFWriter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
 
 /// EmitXXStructorList - Emit the ctor or dtor list.  This just emits out the 
 /// function pointers, ignoring the init priority.
-void ELFWriter::EmitXXStructorList(Constant *List, ELFSection &Xtor) {
+void ELFWriter::EmitXXStructorList(const Constant *List, ELFSection &Xtor) {
   // Should be an array of '{ i32, void ()* }' structs.  The first value is the
   // init priority, which we ignore.
   if (List->isNullValue()) return;
-  ConstantArray *InitList = cast<ConstantArray>(List);
+  const ConstantArray *InitList = cast<ConstantArray>(List);
   for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
     if (InitList->getOperand(i)->isNullValue())
       continue;
diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h
index b8bac55..6f7fbac 100644
--- a/lib/CodeGen/ELFWriter.h
+++ b/lib/CodeGen/ELFWriter.h
@@ -232,7 +232,7 @@ namespace llvm {
     void EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size, 
                                   ELFSection &GblS, int64_t Offset = 0);
     bool EmitSpecialLLVMGlobal(const GlobalVariable *GV);
-    void EmitXXStructorList(Constant *List, ELFSection &Xtor);
+    void EmitXXStructorList(const Constant *List, ELFSection &Xtor);
     void EmitRelocations();
     void EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel, bool HasRelA);
     void EmitSectionHeader(BinaryObject &SHdrTab, const ELFSection &SHdr);
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 613f0c4..c0f71d2 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -768,6 +768,9 @@ MachineBasicBlock::findDebugLoc(MachineBasicBlock::iterator &MBBI) {
 /// getSuccWeight - Return weight of the edge from this block to MBB.
 ///
 uint32_t MachineBasicBlock::getSuccWeight(MachineBasicBlock *succ) {
+  if (Weights.empty())
+    return 0;
+
   succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
   return *getWeightIterator(I);
 }
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index 65ebdf8..0f27dfc 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -86,7 +86,7 @@ namespace {
     // that is currently available in a physical register.
     LiveRegMap LiveVirtRegs;
 
-    DenseMap<unsigned, MachineInstr *> LiveDbgValueMap;
+    DenseMap<unsigned, SmallVector<MachineInstr *, 4> > LiveDbgValueMap;
 
     // RegState - Track the state of a physical register.
     enum RegState {
@@ -272,7 +272,9 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
     // If this register is used by DBG_VALUE then insert new DBG_VALUE to
     // identify spilled location as the place to find corresponding variable's
     // value.
-    if (MachineInstr *DBG = LiveDbgValueMap.lookup(LRI->first)) {
+    SmallVector<MachineInstr *, 4> &LRIDbgValues = LiveDbgValueMap[LRI->first];
+    for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) {
+      MachineInstr *DBG = LRIDbgValues[li];
       const MDNode *MDPtr =
         DBG->getOperand(DBG->getNumOperands()-1).getMetadata();
       int64_t Offset = 0;
@@ -291,9 +293,11 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
         MachineBasicBlock *MBB = DBG->getParent();
         MBB->insert(MI, NewDV);
         DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV);
-        LiveDbgValueMap[LRI->first] = NewDV;
       }
     }
+    // Now this register is spilled there is should not be any DBG_VALUE pointing
+    // to this register because they are all pointing to spilled value now.
+    LRIDbgValues.clear();
     if (SpillKill)
       LR.LastUse = 0; // Don't kill register again
   }
@@ -816,7 +820,7 @@ void RAFast::AllocateBasicBlock() {
           if (!MO.isReg()) continue;
           unsigned Reg = MO.getReg();
           if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
-          LiveDbgValueMap[Reg] = MI;
+          LiveDbgValueMap[Reg].push_back(MI);
           LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg);
           if (LRI != LiveVirtRegs.end())
             setPhysReg(MI, i, LRI->second.PhysReg);
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index 0818034..7a2ea6c 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -1440,8 +1440,12 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
   if (TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg))
     physReg = vrm_->getPhys(physReg);
 
-  ArrayRef<unsigned> Order = tri_->getRawAllocationOrder(RC, Hint.first,
-                                                         physReg, *mf_);
+  ArrayRef<unsigned> Order;
+  if (Hint.first)
+    Order = tri_->getRawAllocationOrder(RC, Hint.first, physReg, *mf_);
+  else
+    Order = RegClassInfo.getOrder(RC);
+
   assert(!Order.empty() && "No allocatable register in this register class!");
 
   // Scan for the first available register.
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 605507f..49f8fb4 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -84,8 +84,8 @@ public:
   static char ID;
 
   /// Construct a PBQP register allocator.
-  RegAllocPBQP(std::auto_ptr<PBQPBuilder> b)
-      : MachineFunctionPass(ID), builder(b) {
+  RegAllocPBQP(std::auto_ptr<PBQPBuilder> b, char *cPassID=0)
+      : MachineFunctionPass(ID), builder(b), customPassID(cPassID) {
     initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
     initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
     initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry());
@@ -122,6 +122,8 @@ private:
 
   std::auto_ptr<PBQPBuilder> builder;
 
+  char *customPassID;
+
   MachineFunction *mf;
   const TargetMachine *tm;
   const TargetRegisterInfo *tri;
@@ -449,6 +451,8 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
   au.addRequired<LiveIntervals>();
   //au.addRequiredID(SplitCriticalEdgesID);
   au.addRequired<RegisterCoalescer>();
+  if (customPassID)
+    au.addRequiredID(*customPassID);
   au.addRequired<CalculateSpillWeights>();
   au.addRequired<LiveStacks>();
   au.addPreserved<LiveStacks>();
@@ -702,8 +706,9 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
 }
 
 FunctionPass* llvm::createPBQPRegisterAllocator(
-                                           std::auto_ptr<PBQPBuilder> builder) {
-  return new RegAllocPBQP(builder);
+                                           std::auto_ptr<PBQPBuilder> builder,
+                                           char *customPassID) {
+  return new RegAllocPBQP(builder, customPassID);
 }
 
 FunctionPass* llvm::createDefaultPBQPRegisterAllocator() {
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4ac590a..e3d3906 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -238,6 +238,9 @@ namespace {
     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
     SDValue BuildSDIV(SDNode *N);
     SDValue BuildUDIV(SDNode *N);
+    SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
+                               bool DemandHighBits = true);
+    SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
     SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);
     SDValue ReduceLoadWidth(SDNode *N);
     SDValue ReduceLoadOpStoreWidth(SDNode *N);
@@ -2512,6 +2515,244 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   return SDValue();
 }
 
+/// MatchBSwapHWord - Match (a >> 8) | (a << 8) as (bswap a) >> 16
+///
+SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
+                                        bool DemandHighBits) {
+  if (!LegalOperations)
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+  if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
+    return SDValue();
+  if (!TLI.isOperationLegal(ISD::BSWAP, VT))
+    return SDValue();
+
+  // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
+  bool LookPassAnd0 = false;
+  bool LookPassAnd1 = false;
+  if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
+      std::swap(N0, N1);
+  if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
+      std::swap(N0, N1);
+  if (N0.getOpcode() == ISD::AND) {
+    if (!N0.getNode()->hasOneUse())
+      return SDValue();
+    ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+    if (!N01C || N01C->getZExtValue() != 0xFF00)
+      return SDValue();
+    N0 = N0.getOperand(0);
+    LookPassAnd0 = true;
+  }
+
+  if (N1.getOpcode() == ISD::AND) {
+    if (!N1.getNode()->hasOneUse())
+      return SDValue();
+    ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+    if (!N11C || N11C->getZExtValue() != 0xFF)
+      return SDValue();
+    N1 = N1.getOperand(0);
+    LookPassAnd1 = true;
+  }
+
+  if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
+    std::swap(N0, N1);
+  if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
+    return SDValue();
+  if (!N0.getNode()->hasOneUse() ||
+      !N1.getNode()->hasOneUse())
+    return SDValue();
+
+  ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+  ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+  if (!N01C || !N11C)
+    return SDValue();
+  if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
+    return SDValue();
+
+  // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
+  SDValue N00 = N0->getOperand(0);
+  if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
+    if (!N00.getNode()->hasOneUse())
+      return SDValue();
+    ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
+    if (!N001C || N001C->getZExtValue() != 0xFF)
+      return SDValue();
+    N00 = N00.getOperand(0);
+    LookPassAnd0 = true;
+  }
+
+  SDValue N10 = N1->getOperand(0);
+  if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
+    if (!N10.getNode()->hasOneUse())
+      return SDValue();
+    ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
+    if (!N101C || N101C->getZExtValue() != 0xFF00)
+      return SDValue();
+    N10 = N10.getOperand(0);
+    LookPassAnd1 = true;
+  }
+
+  if (N00 != N10)
+    return SDValue();
+
+  // Make sure everything beyond the low halfword is zero since the SRL 16
+  // will clear the top bits.
+  unsigned OpSizeInBits = VT.getSizeInBits();
+  if (DemandHighBits && OpSizeInBits > 16 &&
+      (!LookPassAnd0 || !LookPassAnd1) &&
+      !DAG.MaskedValueIsZero(N10, APInt::getHighBitsSet(OpSizeInBits, 16)))
+    return SDValue();
+  
+  SDValue Res = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, N00);
+  if (OpSizeInBits > 16)
+    Res = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Res,
+                      DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT)));
+  return Res;
+}
+
+/// isBSwapHWordElement - Return true if the specified node is an element
+/// that makes up a 32-bit packed halfword byteswap. i.e.
+/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8)
+static bool isBSwapHWordElement(SDValue N, SmallVector<SDNode*,4> &Parts) {
+  if (!N.getNode()->hasOneUse())
+    return false;
+
+  unsigned Opc = N.getOpcode();
+  if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
+    return false;
+
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+  if (!N1C)
+    return false;
+
+  unsigned Num;
+  switch (N1C->getZExtValue()) {
+  default:
+    return false;
+  case 0xFF:       Num = 0; break;
+  case 0xFF00:     Num = 1; break;
+  case 0xFF0000:   Num = 2; break;
+  case 0xFF000000: Num = 3; break;
+  }
+
+  // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
+  SDValue N0 = N.getOperand(0);
+  if (Opc == ISD::AND) {
+    if (Num == 0 || Num == 2) {
+      // (x >> 8) & 0xff
+      // (x >> 8) & 0xff0000
+      if (N0.getOpcode() != ISD::SRL)
+        return false;
+      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+      if (!C || C->getZExtValue() != 8)
+        return false;
+    } else {
+      // (x << 8) & 0xff00
+      // (x << 8) & 0xff000000
+      if (N0.getOpcode() != ISD::SHL)
+        return false;
+      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+      if (!C || C->getZExtValue() != 8)
+        return false;
+    }
+  } else if (Opc == ISD::SHL) {
+    // (x & 0xff) << 8
+    // (x & 0xff0000) << 8
+    if (Num != 0 && Num != 2)
+      return false;
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+    if (!C || C->getZExtValue() != 8)
+      return false;
+  } else { // Opc == ISD::SRL
+    // (x & 0xff00) >> 8
+    // (x & 0xff000000) >> 8
+    if (Num != 1 && Num != 3)
+      return false;
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+    if (!C || C->getZExtValue() != 8)
+      return false;
+  }
+
+  if (Parts[Num])
+    return false;
+
+  Parts[Num] = N0.getOperand(0).getNode();
+  return true;
+}
+
+/// MatchBSwapHWord - Match a 32-bit packed halfword bswap. That is
+/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8)
+/// => (rotl (bswap x), 16)
+SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
+  if (!LegalOperations)
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+  if (VT != MVT::i32)
+    return SDValue();
+  if (!TLI.isOperationLegal(ISD::BSWAP, VT))
+    return SDValue();
+
+  SmallVector<SDNode*,4> Parts(4, (SDNode*)0);
+  // Look for either
+  // (or (or (and), (and)), (or (and), (and)))
+  // (or (or (or (and), (and)), (and)), (and))
+  if (N0.getOpcode() != ISD::OR)
+    return SDValue();
+  SDValue N00 = N0.getOperand(0);
+  SDValue N01 = N0.getOperand(1);
+
+  if (N1.getOpcode() == ISD::OR) {
+    // (or (or (and), (and)), (or (and), (and)))
+    SDValue N000 = N00.getOperand(0);
+    if (!isBSwapHWordElement(N000, Parts))
+      return SDValue();
+
+    SDValue N001 = N00.getOperand(1);
+    if (!isBSwapHWordElement(N001, Parts))
+      return SDValue();
+    SDValue N010 = N01.getOperand(0);
+    if (!isBSwapHWordElement(N010, Parts))
+      return SDValue();
+    SDValue N011 = N01.getOperand(1);
+    if (!isBSwapHWordElement(N011, Parts))
+      return SDValue();
+  } else {
+    // (or (or (or (and), (and)), (and)), (and))
+    if (!isBSwapHWordElement(N1, Parts))
+      return SDValue();
+    if (!isBSwapHWordElement(N01, Parts))
+      return SDValue();
+    if (N00.getOpcode() != ISD::OR)
+      return SDValue();
+    SDValue N000 = N00.getOperand(0);
+    if (!isBSwapHWordElement(N000, Parts))
+      return SDValue();
+    SDValue N001 = N00.getOperand(1);
+    if (!isBSwapHWordElement(N001, Parts))
+      return SDValue();
+  }
+
+  // Make sure the parts are all coming from the same node.
+  if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
+    return SDValue();
+
+  SDValue BSwap = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT,
+                              SDValue(Parts[0],0));
+
+  // Result of the bswap should be rotated by 16. If it's not legal, than
+  // do  (x << 16) | (x >> 16).
+  SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT));
+  if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
+    return DAG.getNode(ISD::ROTL, N->getDebugLoc(), VT, BSwap, ShAmt);
+  else if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
+    return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, BSwap, ShAmt);
+  return DAG.getNode(ISD::OR, N->getDebugLoc(), VT,
+                     DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, BSwap, ShAmt),
+                     DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, BSwap, ShAmt));
+}
+
 SDValue DAGCombiner::visitOR(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -2547,6 +2788,15 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
   // fold (or x, c) -> c iff (x & ~c) == 0
   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
     return N1;
+
+  // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
+  SDValue BSwap = MatchBSwapHWord(N, N0, N1);
+  if (BSwap.getNode() != 0)
+    return BSwap;
+  BSwap = MatchBSwapHWordLow(N, N0, N1);
+  if (BSwap.getNode() != 0)
+    return BSwap;
+
   // reassociate or
   SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1);
   if (ROR.getNode() != 0)
@@ -4606,6 +4856,16 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   }
+
+  // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
+  if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
+    SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
+                                       N0.getOperand(1), false);
+    if (BSwap.getNode() != 0)
+      return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
+                         BSwap, N1);
+  }
+
   return SDValue();
 }
 
@@ -5231,7 +5491,8 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
   // fold (sint_to_fp c1) -> c1fp
   if (N0C && OpVT != MVT::ppcf128 &&
       // ...but only if the target supports immediate floating-point values
-      (Level == llvm::Unrestricted || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
+      (Level == llvm::Unrestricted ||
+       TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
     return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
 
   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
@@ -5255,7 +5516,8 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
   // fold (uint_to_fp c1) -> c1fp
   if (N0C && OpVT != MVT::ppcf128 &&
       // ...but only if the target supports immediate floating-point values
-      (Level == llvm::Unrestricted || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
+      (Level == llvm::Unrestricted ||
+       TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
     return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
 
   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
@@ -7208,7 +7470,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
         const TargetData &TD = *TLI.getTargetData();
 
         // Create a ConstantArray of the two constants.
-        Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts, 2);
+        Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
         SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(),
                                             TD.getPrefTypeAlignment(FPTy));
         unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index da75b8a..e7c77dd 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -19,6 +19,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "LegalizeTypes.h"
+#include "llvm/DerivedTypes.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -191,10 +192,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
     if (NOutVT.bitsEq(NInVT))
       // The input promotes to the same size.  Convert the promoted value.
       return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp));
-    if (NInVT.isVector())
-      // Promote vector element via memory load/store.
-      return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
-                         CreateStackStoreLoad(InOp, OutVT));
     break;
   case TargetLowering::TypeSoftenFloat:
     // Promote the integer operand by hand.
@@ -341,8 +338,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
   // (eg: because the value being converted is too big), then the result of the
   // original operation was undefined anyway, so the assert is still correct.
   return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ?
-                     ISD::AssertZext : ISD::AssertSext, dl,
-                     NVT, Res, DAG.getValueType(N->getValueType(0)));
+                     ISD::AssertZext : ISD::AssertSext, dl, NVT, Res,
+                     DAG.getValueType(N->getValueType(0).getScalarType()));
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_FP32_TO_FP16(SDNode *N) {
@@ -372,7 +369,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
         return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
                            DAG.getValueType(N->getOperand(0).getValueType()));
       if (N->getOpcode() == ISD::ZERO_EXTEND)
-        return DAG.getZeroExtendInReg(Res, dl, N->getOperand(0).getValueType());
+        return DAG.getZeroExtendInReg(Res, dl,
+                      N->getOperand(0).getValueType().getScalarType());
       assert(N->getOpcode() == ISD::ANY_EXTEND && "Unknown integer extension!");
       return Res;
     }
@@ -522,20 +520,44 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
 SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Res;
+  SDValue InOp = N->getOperand(0);
+  DebugLoc dl = N->getDebugLoc();
 
-  switch (getTypeAction(N->getOperand(0).getValueType())) {
+  switch (getTypeAction(InOp.getValueType())) {
   default: llvm_unreachable("Unknown type action!");
   case TargetLowering::TypeLegal:
   case TargetLowering::TypeExpandInteger:
-    Res = N->getOperand(0);
+    Res = InOp;
     break;
   case TargetLowering::TypePromoteInteger:
-    Res = GetPromotedInteger(N->getOperand(0));
+    Res = GetPromotedInteger(InOp);
     break;
+  case TargetLowering::TypeSplitVector:
+    EVT InVT = InOp.getValueType();
+    assert(InVT.isVector() && "Cannot split scalar types");
+    unsigned NumElts = InVT.getVectorNumElements();
+    assert(NumElts == NVT.getVectorNumElements() &&
+           "Dst and Src must have the same number of elements");
+    EVT EltVT = InVT.getScalarType();
+    assert(isPowerOf2_32(NumElts) &&
+           "Promoted vector type must be a power of two");
+
+    EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts/2);
+    EVT HalfNVT = EVT::getVectorVT(*DAG.getContext(), NVT.getScalarType(),
+                                   NumElts/2);
+
+    SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, InOp,
+                               DAG.getIntPtrConstant(0));
+    SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, InOp,
+                               DAG.getIntPtrConstant(NumElts/2));
+    EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1);
+    EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2);
+
+    return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, EOp1, EOp2);
   }
 
   // Truncate to NVT instead of VT
-  return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Res);
+  return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res);
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
@@ -1072,6 +1094,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break;
   case ISD::UADDO:
   case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break;
+  case ISD::UMULO:
+  case ISD::SMULO: ExpandIntRes_XMULO(N, Lo, Hi); break;
   }
 
   // If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -2149,6 +2173,86 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
   ReplaceValueWith(SDValue(N, 1), Ofl);
 }
 
+void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
+                                          SDValue &Lo, SDValue &Hi) {
+  EVT VT = N->getValueType(0);
+  const Type *RetTy = VT.getTypeForEVT(*DAG.getContext());
+  EVT PtrVT = TLI.getPointerTy();
+  const Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext());
+  DebugLoc dl = N->getDebugLoc();
+
+  // A divide for UMULO should be faster than a function call.
+  if (N->getOpcode() == ISD::UMULO) {
+    SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
+    DebugLoc DL = N->getDebugLoc();
+
+    SDValue MUL = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS);
+    SplitInteger(MUL, Lo, Hi);
+
+    // A divide for UMULO will be faster than a function call. Select to
+    // make sure we aren't using 0.
+    SDValue isZero = DAG.getSetCC(dl, TLI.getSetCCResultType(VT),
+				  RHS, DAG.getConstant(0, VT), ISD::SETNE);
+    SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero,
+				  DAG.getConstant(1, VT), RHS);
+    SDValue DIV = DAG.getNode(ISD::UDIV, DL, LHS.getValueType(), MUL, NotZero);
+    SDValue Overflow;
+    Overflow = DAG.getSetCC(DL, N->getValueType(1), DIV, LHS, ISD::SETNE);
+    ReplaceValueWith(SDValue(N, 1), Overflow);
+    return;
+  }
+
+  // Replace this with a libcall that will check overflow.
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  if (VT == MVT::i32)
+    LC = RTLIB::MULO_I32;
+  else if (VT == MVT::i64)
+    LC = RTLIB::MULO_I64;
+  else if (VT == MVT::i128)
+    LC = RTLIB::MULO_I128;
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XMULO!");
+
+  SDValue Temp = DAG.CreateStackTemporary(PtrVT);
+  // Temporary for the overflow value, default it to zero.
+  SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl,
+			       DAG.getConstant(0, PtrVT), Temp,
+			       MachinePointerInfo(), false, false, 0);
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    EVT ArgVT = N->getOperand(i).getValueType();
+    const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+    Entry.Node = N->getOperand(i);
+    Entry.Ty = ArgTy;
+    Entry.isSExt = true;
+    Entry.isZExt = false;
+    Args.push_back(Entry);
+  }
+
+  // Also pass the address of the overflow check.
+  Entry.Node = Temp;
+  Entry.Ty = PtrTy->getPointerTo();
+  Entry.isSExt = true;
+  Entry.isZExt = false;
+  Args.push_back(Entry);
+
+  SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT);
+  std::pair<SDValue, SDValue> CallInfo =
+    TLI.LowerCallTo(Chain, RetTy, true, false, false, false,
+		    0, TLI.getLibcallCallingConv(LC), false,
+		    true, Func, Args, DAG, dl);
+
+  SplitInteger(CallInfo.first, Lo, Hi);
+  SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp,
+			      MachinePointerInfo(), false, false, 0);
+  SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2,
+                             DAG.getConstant(0, PtrVT),
+                             ISD::SETNE);
+  // Use the overflow from the libcall everywhere.
+  ReplaceValueWith(SDValue(N, 1), Ofl);
+}
+
 void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
                                          SDValue &Lo, SDValue &Hi) {
   EVT VT = N->getValueType(0);
@@ -2641,18 +2745,18 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
 SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
   SDValue InOp0 = N->getOperand(0);
   EVT InVT = InOp0.getValueType();
-  EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
 
   EVT OutVT = N->getValueType(0);
   EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
   assert(NOutVT.isVector() && "This type must be promoted to a vector type");
-  unsigned OutNumElems = N->getValueType(0).getVectorNumElements();
+  unsigned OutNumElems = OutVT.getVectorNumElements();
   EVT NOutVTElem = NOutVT.getVectorElementType();
 
   DebugLoc dl = N->getDebugLoc();
   SDValue BaseIdx = N->getOperand(1);
 
   SmallVector<SDValue, 8> Ops;
+  Ops.reserve(OutNumElems);
   for (unsigned i = 0; i != OutNumElems; ++i) {
 
     // Extract the element from the original vector.
@@ -2684,18 +2788,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) {
 
   SDValue V0 = GetPromotedInteger(N->getOperand(0));
   SDValue V1 = GetPromotedInteger(N->getOperand(1));
-  EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  EVT OutVT = V0.getValueType();
 
-  return DAG.getVectorShuffle(OutVT, dl, V0,V1, &NewMask[0]);
+  return DAG.getVectorShuffle(OutVT, dl, V0, V1, &NewMask[0]);
 }
 
 
 SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) {
-
-  SDValue InOp0 = N->getOperand(0);
-  EVT InVT = InOp0.getValueType();
-  EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
-
   EVT OutVT = N->getValueType(0);
   EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
   assert(NOutVT.isVector() && "This type must be promoted to a vector type");
@@ -2705,6 +2804,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
 
   SmallVector<SDValue, 8> Ops;
+  Ops.reserve(NumElems);
   for (unsigned i = 0; i != NumElems; ++i) {
     SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i));
     Ops.push_back(Op);
@@ -2717,10 +2817,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {
 
   DebugLoc dl = N->getDebugLoc();
 
-  SDValue InOp0 = N->getOperand(0);
-  EVT InVT = InOp0.getValueType();
-  EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
-  assert(!InVT.isVector() && "Input must not be a scalar");
+  assert(!N->getOperand(0).getValueType().isVector() &&
+         "Input must be a scalar");
 
   EVT OutVT = N->getValueType(0);
   EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
@@ -2733,12 +2831,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) {
-
-  SDValue InOp0 = N->getOperand(0);
-  EVT InVT = InOp0.getValueType();
-  EVT InElVT = InVT.getVectorElementType();
-  EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
-
   EVT OutVT = N->getValueType(0);
   EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
   assert(NOutVT.isVector() && "This type must be promoted to a vector type");
@@ -2747,7 +2839,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) {
 
   DebugLoc dl = N->getDebugLoc();
 
-  SDValue ConvertedVector = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, InOp0);
+  SDValue ConvertedVector = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+                                        N->getOperand(0));
 
   SDValue ConvElem = DAG.getNode(ISD::ANY_EXTEND, dl,
     NOutVTElem, N->getOperand(1));
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 06dc40f..4597ec9 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -318,6 +318,7 @@ private:
 
   void ExpandIntRes_SADDSUBO          (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_UADDSUBO          (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_XMULO             (SDNode *N, SDValue &Lo, SDValue &Hi);
 
   void ExpandShiftByConstant(SDNode *N, unsigned Amt,
                              SDValue &Lo, SDValue &Hi);
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index f09b381..a827187 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -290,7 +290,17 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
   // Special handling for untyped values.  These values can only come from
   // the expansion of custom DAG-to-DAG patterns.
   if (VT == MVT::untyped) {
-    unsigned Opcode = RegDefPos.GetNode()->getMachineOpcode();
+    const SDNode *Node = RegDefPos.GetNode();
+    unsigned Opcode = Node->getMachineOpcode();
+
+    if (Opcode == TargetOpcode::REG_SEQUENCE) {
+      unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+      const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
+      RegClass = RC->getID();
+      Cost = 1;
+      return;
+    }
+
     unsigned Idx = RegDefPos.GetIdx();
     const TargetInstrDesc Desc = TII->get(Opcode);
     const TargetRegisterClass *RC = Desc.getRegClass(Idx, TRI);
@@ -1833,8 +1843,6 @@ bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
     }
     for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
          RegDefPos.IsValid(); RegDefPos.Advance()) {
-      EVT VT = RegDefPos.GetValue();
-
       unsigned RCId, Cost;
       GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
 
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 0d656ef..dbc623b 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -649,7 +649,7 @@ static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG,
   // order number right after the N.
   MachineBasicBlock *BB = Emitter.getBlock();
   MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
-  SmallVector<SDDbgValue*,2> &DVs = DAG->GetDbgValues(N);
+  ArrayRef<SDDbgValue*> DVs = DAG->GetDbgValues(N);
   for (unsigned i = 0, e = DVs.size(); i != e; ++i) {
     if (DVs[i]->isInvalidated())
       continue;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 68eeb60..77ce54f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -598,7 +598,7 @@ void SelectionDAG::DeallocateNode(SDNode *N) {
   Ordering->remove(N);
 
   // If any of the SDDbgValue nodes refer to this SDNode, invalidate them.
-  SmallVector<SDDbgValue*, 2> &DbgVals = DbgInfo->getSDDbgValues(N);
+  ArrayRef<SDDbgValue*> DbgVals = DbgInfo->getSDDbgValues(N);
   for (unsigned i = 0, e = DbgVals.size(); i != e; ++i)
     DbgVals[i]->setIsInvalidated();
 }
@@ -5508,9 +5508,9 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
     return;
   SDNode *FromNode = From.getNode();
   SDNode *ToNode = To.getNode();
-  SmallVector<SDDbgValue *, 2> &DVs = GetDbgValues(FromNode);
+  ArrayRef<SDDbgValue *> DVs = GetDbgValues(FromNode);
   SmallVector<SDDbgValue *, 2> ClonedDVs;
-  for (SmallVector<SDDbgValue *, 2>::iterator I = DVs.begin(), E = DVs.end();
+  for (ArrayRef<SDDbgValue *>::iterator I = DVs.begin(), E = DVs.end();
        I != E; ++I) {
     SDDbgValue *Dbg = *I;
     if (Dbg->getKind() == SDDbgValue::SDNODE) {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index d79a5ae..19bfa33 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -475,17 +475,9 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
                PartVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
 
       // Promoted vector extract
-      unsigned NumElts = ValueVT.getVectorNumElements();
-      SmallVector<SDValue, 8> NewOps;
-      for (unsigned i = 0; i < NumElts; ++i) {
-        SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
-                       ValueVT.getScalarType(), Val ,DAG.getIntPtrConstant(i));
-        SDValue Cast = DAG.getNode(ISD::ANY_EXTEND,
-                       DL, PartVT.getScalarType(), Ext);
-        NewOps.push_back(Cast);
-      }
-      Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT,
-                        &NewOps[0], NewOps.size());
+      bool Smaller = PartVT.bitsLE(ValueVT);
+      Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+                        DL, PartVT, Val);
     } else{
       // Vector -> scalar conversion.
       assert(ValueVT.getVectorNumElements() == 1 &&
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index efbfaa4..474dd7a 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -81,6 +81,9 @@ static void InitLibcallNames(const char **Names) {
   Names[RTLIB::MUL_I32] = "__mulsi3";
   Names[RTLIB::MUL_I64] = "__muldi3";
   Names[RTLIB::MUL_I128] = "__multi3";
+  Names[RTLIB::MULO_I32] = "__mulosi4";
+  Names[RTLIB::MULO_I64] = "__mulodi4";
+  Names[RTLIB::MULO_I128] = "__muloti4";
   Names[RTLIB::SDIV_I8] = "__divqi3";
   Names[RTLIB::SDIV_I16] = "__divhi3";
   Names[RTLIB::SDIV_I32] = "__divsi3";
@@ -1914,7 +1917,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
   // comparisons.
   if (isa<ConstantSDNode>(N0.getNode()))
     return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond));
-  
+
   if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
     const APInt &C1 = N1C->getAPIntValue();
 
@@ -2673,9 +2676,9 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
                                                   std::string &Constraint,
                                                   std::vector<SDValue> &Ops,
                                                   SelectionDAG &DAG) const {
-  
+
   if (Constraint.length() > 1) return;
-  
+
   char ConstraintLetter = Constraint[0];
   switch (ConstraintLetter) {
   default: break;
@@ -2865,7 +2868,7 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
           report_fatal_error("Indirect operand for inline asm not a pointer!");
         OpTy = PtrTy->getElementType();
       }
-      
+
       // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
       if (const StructType *STy = dyn_cast<StructType>(OpTy))
         if (STy->getNumElements() == 1)
diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp
index 6ab0cb0..ff58b22 100644
--- a/lib/CodeGen/ShadowStackGC.cpp
+++ b/lib/CodeGen/ShadowStackGC.cpp
@@ -194,27 +194,31 @@ Constant *ShadowStackGC::GetFrameMap(Function &F) {
 
   // Truncate the ShadowStackDescriptor if some metadata is null.
   unsigned NumMeta = 0;
-  SmallVector<Constant*,16> Metadata;
+  SmallVector<Constant*, 16> Metadata;
   for (unsigned I = 0; I != Roots.size(); ++I) {
     Constant *C = cast<Constant>(Roots[I].first->getArgOperand(1));
     if (!C->isNullValue())
       NumMeta = I + 1;
     Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr));
   }
+  Metadata.resize(NumMeta);
 
+  const Type *Int32Ty = Type::getInt32Ty(F.getContext());
+  
   Constant *BaseElts[] = {
-    ConstantInt::get(Type::getInt32Ty(F.getContext()), Roots.size(), false),
-    ConstantInt::get(Type::getInt32Ty(F.getContext()), NumMeta, false),
+    ConstantInt::get(Int32Ty, Roots.size(), false),
+    ConstantInt::get(Int32Ty, NumMeta, false),
   };
 
   Constant *DescriptorElts[] = {
-    ConstantStruct::get(F.getContext(), BaseElts, 2, false),
-    ConstantArray::get(ArrayType::get(VoidPtr, NumMeta),
-                       Metadata.begin(), NumMeta)
+    ConstantStruct::get(StructType::get(Int32Ty, Int32Ty, NULL), BaseElts),
+    ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), Metadata)
   };
 
-  Constant *FrameMap = ConstantStruct::get(F.getContext(), DescriptorElts, 2,
-                                           false);
+  Constant *FrameMap =
+    ConstantStruct::get(StructType::get(DescriptorElts[0]->getType(),
+                                        DescriptorElts[1]->getType(), NULL),
+                        DescriptorElts);
 
   std::string TypeName("gc_map.");
   TypeName += utostr(NumMeta);
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 92970e4..c2565af 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -91,8 +91,7 @@ bool SjLjEHPass::doInitialization(Module &M) {
           Type::getInt8PtrTy(M.getContext());
   const Type *Int32Ty = Type::getInt32Ty(M.getContext());
   FunctionContextTy =
-    StructType::get(M.getContext(),
-                    VoidPtrTy,                        // __prev
+    StructType::get(VoidPtrTy,                        // __prev
                     Int32Ty,                          // call_site
                     ArrayType::get(Int32Ty, 4),       // __data
                     VoidPtrTy,                        // __personality
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 90cb72f..e5c23b3 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -96,6 +96,12 @@ namespace {
     bool TailDuplicateBlocks(MachineFunction &MF);
     bool shouldTailDuplicate(const MachineFunction &MF,
                              MachineBasicBlock &TailBB);
+    bool isSimpleBB(MachineBasicBlock *TailBB);
+    bool canCompletelyDuplicateSimpleBB(MachineBasicBlock &BB);
+    bool duplicateSimpleBB(MachineBasicBlock *TailBB,
+                           SmallVector<MachineBasicBlock*, 8> &TDBBs,
+                           const DenseSet<unsigned> &RegsUsedByPhi,
+                           SmallVector<MachineInstr*, 16> &Copies);
     bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
                        SmallVector<MachineBasicBlock*, 8> &TDBBs,
                        SmallVector<MachineInstr*, 16> &Copies);
@@ -201,7 +207,7 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
       // TailBB's immediate successors are now successors of those predecessors
       // which duplicated TailBB. Add the predecessors as sources to the PHI
       // instructions.
-      bool isDead = MBB->pred_empty();
+      bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken();
       if (PreRegAlloc)
         UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs);
 
@@ -242,6 +248,14 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
             MachineOperand &UseMO = UI.getOperand();
             MachineInstr *UseMI = &*UI;
             ++UI;
+            if (UseMI->isDebugValue()) {
+              // SSAUpdate can replace the use with an undef. That creates
+              // a debug instruction that is a kill.
+              // FIXME: Should it SSAUpdate job to delete debug instructions
+              // instead of replacing the use with undef?
+              UseMI->eraseFromParent();
+              continue;
+            }
             if (UseMI->getParent() == DefBB && !UseMI->isPHI())
               continue;
             SSAUpdate.RewriteUse(UseMO);
@@ -283,6 +297,8 @@ static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB,
   for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
          UE = MRI->use_end(); UI != UE; ++UI) {
     MachineInstr *UseMI = &*UI;
+    if (UseMI->isDebugValue())
+      continue;
     if (UseMI->getParent() != BB)
       return true;
   }
@@ -547,6 +563,141 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
   return true;
 }
 
+/// isSimpleBB - True if this BB has only one unconditional jump.
+bool
+TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) {
+  if (TailBB->succ_size() != 1)
+    return false;
+  MachineBasicBlock::iterator I = TailBB->begin();
+  MachineBasicBlock::iterator E = TailBB->end();
+  while (I != E && I->isDebugValue())
+    ++I;
+  if (I == E)
+    return true;
+  return I->getDesc().isUnconditionalBranch();
+}
+
+static bool
+bothUsedInPHI(const MachineBasicBlock &A,
+              SmallPtrSet<MachineBasicBlock*, 8> SuccsB) {
+  for (MachineBasicBlock::const_succ_iterator SI = A.succ_begin(),
+         SE = A.succ_end(); SI != SE; ++SI) {
+    MachineBasicBlock *BB = *SI;
+    if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI())
+      return true;
+  }
+
+  return false;
+}
+
+bool
+TailDuplicatePass::canCompletelyDuplicateSimpleBB(MachineBasicBlock &BB) {
+  SmallPtrSet<MachineBasicBlock*, 8> Succs(BB.succ_begin(), BB.succ_end());
+
+  for (MachineBasicBlock::pred_iterator PI = BB.pred_begin(),
+       PE = BB.pred_end(); PI != PE; ++PI) {
+    MachineBasicBlock *PredBB = *PI;
+    if (PredBB->getLandingPadSuccessor())
+      return false;
+    if (bothUsedInPHI(*PredBB, Succs))
+      return false;
+    MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL;
+    SmallVector<MachineOperand, 4> PredCond;
+    if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+      return false;
+  }
+  return true;
+}
+
+bool
+TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB,
+                                     SmallVector<MachineBasicBlock*, 8> &TDBBs,
+                                     const DenseSet<unsigned> &UsedByPhi,
+                                     SmallVector<MachineInstr*, 16> &Copies) {
+  if (!canCompletelyDuplicateSimpleBB(*TailBB))
+    return false;
+
+  bool Changed = false;
+  SmallVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
+                                           TailBB->pred_end());
+  for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+       PE = Preds.end(); PI != PE; ++PI) {
+    MachineBasicBlock *PredBB = *PI;
+
+    MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL;
+    SmallVector<MachineOperand, 4> PredCond;
+    bool NotAnalyzable =
+      TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true);
+    (void)NotAnalyzable;
+    assert(!NotAnalyzable && "Cannot duplicate this!");
+
+    DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
+                 << "From simple Succ: " << *TailBB);
+
+    MachineBasicBlock *NewTarget = *TailBB->succ_begin();
+    MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(PredBB));
+
+    DenseMap<unsigned, unsigned> LocalVRMap;
+    SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
+    for (MachineBasicBlock::iterator I = TailBB->begin();
+         I != TailBB->end() && I->isPHI();) {
+      MachineInstr *MI = &*I;
+      ++I;
+      ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true);
+    }
+    MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
+    for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
+      Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
+                               TII->get(TargetOpcode::COPY),
+                               CopyInfos[i].first).addReg(CopyInfos[i].second));
+    }
+
+    // Make PredFBB explicit.
+    if (PredCond.empty())
+      PredFBB = PredTBB;
+
+    // Make fall through explicit.
+    if (!PredTBB)
+      PredTBB = NextBB;
+    if (!PredFBB)
+      PredFBB = NextBB;
+
+    // Redirect
+    if (PredFBB == TailBB)
+      PredFBB = NewTarget;
+    if (PredTBB == TailBB)
+      PredTBB = NewTarget;
+
+    // Make the branch unconditional if possible
+    if (PredTBB == PredFBB) {
+      PredCond.clear();
+      PredFBB = NULL;
+    }
+
+    // Avoid adding fall through branches.
+    if (PredFBB == NextBB)
+      PredFBB = NULL;
+    if (PredTBB == NextBB && PredFBB == NULL)
+      PredTBB = NULL;
+
+    TII->RemoveBranch(*PredBB);
+
+    if (PredTBB)
+      TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());
+
+    PredBB->removeSuccessor(TailBB);
+    unsigned NumSuccessors = PredBB->succ_size();
+    assert(NumSuccessors <= 1);
+    if (NumSuccessors == 0 || *PredBB->succ_begin() != NewTarget)
+      PredBB->addSuccessor(NewTarget);
+
+    TDBBs.push_back(PredBB);
+
+    Changed = true;
+  }
+  return Changed;
+}
+
 /// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
 /// of its predecessors.
 bool
@@ -558,14 +709,18 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
 
   DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
 
+  DenseSet<unsigned> UsedByPhi;
+  getRegsUsedByPHIs(*TailBB, &UsedByPhi);
+
+  if (isSimpleBB(TailBB))
+    return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies);
+
   // Iterate through all the unique predecessors and tail-duplicate this
   // block into them, if possible. Copying the list ahead of time also
   // avoids trouble with the predecessor list reallocating.
   bool Changed = false;
   SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
                                               TailBB->pred_end());
-  DenseSet<unsigned> UsedByPhi;
-  getRegsUsedByPHIs(*TailBB, &UsedByPhi);
   for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
        PE = Preds.end(); PI != PE; ++PI) {
     MachineBasicBlock *PredBB = *PI;
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 2da1bd4..df42b2c 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -487,8 +487,12 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
 
   // .comm doesn't support alignment before Leopard.
   Triple T(((LLVMTargetMachine&)TM).getTargetTriple());
-  if (T.isMacOSX() && T.isMacOSXVersionLT(10, 5))
-    CommDirectiveSupportsAlignment = false;
+  if (T.isMacOSX()) {
+    if (T.isMacOSXVersionLT(10, 5))
+      CommDirectiveSupportsAlignment = false;
+    if (!T.isMacOSXVersionLT(10, 6))
+      SupportsCompactUnwindInfo = true;
+  }
 
   TargetLoweringObjectFile::Initialize(Ctx, TM);
 
@@ -605,6 +609,12 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
   // Exception Handling.
   LSDASection = getContext().getMachOSection("__TEXT", "__gcc_except_tab", 0,
                                              SectionKind::getReadOnlyWithRel());
+
+  CompactUnwindSection =
+    getContext().getMachOSection("__LD", "__compact_unwind",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getReadOnly());
+
   // Debug Information.
   DwarfAbbrevSection =
     getContext().getMachOSection("__DWARF", "__debug_abbrev",
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index f54d879..3860e0b 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1095,12 +1095,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
                "two address instruction invalid");
 
         unsigned regB = mi->getOperand(SrcIdx).getReg();
-        TiedOperandMap::iterator OI = TiedOperands.find(regB);
-        if (OI == TiedOperands.end()) {
-          SmallVector<std::pair<unsigned, unsigned>, 4> TiedPair;
-          OI = TiedOperands.insert(std::make_pair(regB, TiedPair)).first;
-        }
-        OI->second.push_back(std::make_pair(SrcIdx, DstIdx));
+        TiedOperands[regB].push_back(std::make_pair(SrcIdx, DstIdx));
       }
 
       // Now iterate over the information collected above.
diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index 062256a..fe1f7fc 100644
--- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -282,10 +282,10 @@ GenericValue Interpreter::callExternalFunction(Function *F,
 
   if (F->getName() == "__main")
     errs() << "Tried to execute an unknown external function: "
-      << F->getType()->getDescription() << " __main\n";
+      << *F->getType() << " __main\n";
   else
     report_fatal_error("Tried to execute an unknown external function: " +
-                      F->getType()->getDescription() + " " +F->getName());
+                       F->getName());
 #ifndef USE_LIBFFI
   errs() << "Recompiling LLVM with --enable-libffi might help.\n";
 #endif
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index e8b09fc..acf7755 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -19,6 +19,7 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
@@ -33,8 +34,10 @@ using namespace llvm;
 namespace {
 
 class MCAsmStreamer : public MCStreamer {
+protected:
   formatted_raw_ostream &OS;
   const MCAsmInfo &MAI;
+private:
   OwningPtr<MCInstPrinter> InstPrinter;
   OwningPtr<MCCodeEmitter> Emitter;
   OwningPtr<TargetAsmBackend> AsmBackend;
@@ -1241,13 +1244,12 @@ void MCAsmStreamer::Finish() {
   if (!UseCFI)
     EmitFrames(false);
 }
-
 MCStreamer *llvm::createAsmStreamer(MCContext &Context,
                                     formatted_raw_ostream &OS,
                                     bool isVerboseAsm, bool useLoc,
-                                    bool useCFI,
-                                    MCInstPrinter *IP, MCCodeEmitter *CE,
-                                    TargetAsmBackend *TAB, bool ShowInst) {
+                                    bool useCFI, MCInstPrinter *IP,
+                                    MCCodeEmitter *CE, TargetAsmBackend *TAB,
+                                    bool ShowInst) {
   return new MCAsmStreamer(Context, OS, isVerboseAsm, useLoc, useCFI,
                            IP, CE, TAB, ShowInst);
 }
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index 13cb81a..937dad4 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -354,10 +354,7 @@ void MCDwarfLineAddr::Encode(int64_t LineDelta, uint64_t AddrDelta,
       OS << char(dwarf::DW_LNS_const_add_pc);
     else {
       OS << char(dwarf::DW_LNS_advance_pc);
-      SmallString<32> Tmp;
-      raw_svector_ostream OSE(Tmp);
-      MCObjectWriter::EncodeULEB128(AddrDelta, OSE);
-      OS << OSE.str();
+      MCObjectWriter::EncodeULEB128(AddrDelta, OS);
     }
     OS << char(dwarf::DW_LNS_extended_op);
     OS << char(1);
@@ -510,6 +507,12 @@ namespace {
       SectionStart(sectionStart) {
     }
 
+    /// EmitCompactUnwind - Emit the unwind information in a compact way. If
+    /// we're successful, return 'true'. Otherwise, return 'false' and it will
+    /// emit the normal CIE and FDE.
+    bool EmitCompactUnwind(MCStreamer &streamer,
+                           const MCDwarfFrameInfo &frame);
+
     const MCSymbol &EmitCIE(MCStreamer &streamer,
                             const MCSymbol *personality,
                             unsigned personalityEncoding,
@@ -623,6 +626,55 @@ void FrameEmitterImpl::EmitCFIInstructions(MCStreamer &streamer,
   }
 }
 
+/// EmitCompactUnwind - Emit the unwind information in a compact way. If we're
+/// successful, return 'true'. Otherwise, return 'false' and it will emit the
+/// normal CIE and FDE.
+bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
+                                         const MCDwarfFrameInfo &Frame) {
+#if 1
+  return false;
+#else
+  MCContext &Context = Streamer.getContext();
+  const TargetAsmInfo &TAI = Context.getTargetAsmInfo();
+  Streamer.SwitchSection(TAI.getCompactUnwindSection());
+
+  unsigned FDEEncoding = TAI.getFDEEncoding(UsingCFI);
+  unsigned Size = getSizeForEncoding(Streamer, FDEEncoding);
+
+  // range-start range-length  compact-unwind-enc personality-func   lsda
+  //  _foo       LfooEnd-_foo  0x00000023          0                 0
+  //  _bar       LbarEnd-_bar  0x00000025         __gxx_personality  except_tab1
+  //
+  //   .section __LD,__compact_unwind,regular,debug
+  //
+  //   # compact unwind for _foo
+  //   .quad _foo
+  //   .set L1,LfooEnd-_foo
+  //   .long L1
+  //   .long 0x01010001
+  //   .quad 0
+  //   .quad 0
+  //
+  //   # compact unwind for _bar
+  //   .quad _bar
+  //   .set L2,LbarEnd-_bar
+  //   .long L2
+  //   .long 0x01020011
+  //   .quad __gxx_personality
+  //   .quad except_tab1
+
+  // Range Start
+  EmitSymbol(Streamer, *Frame.Begin, FDEEncoding);
+
+  // Range Length
+  const MCExpr *Range = MakeStartMinusEndExpr(Streamer, *Frame.Begin,
+                                              *Frame.End, 0);
+  Streamer.EmitAbsValue(Range, Size);
+
+  return true;
+#endif
+}
+
 const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
                                           const MCSymbol *personality,
                                           unsigned personalityEncoding,
@@ -849,7 +901,8 @@ void MCDwarfFrameEmitter::Emit(MCStreamer &streamer,
   MCContext &context = streamer.getContext();
   const TargetAsmInfo &asmInfo = context.getTargetAsmInfo();
   const MCSection &section = isEH ?
-    *asmInfo.getEHFrameSection() : *asmInfo.getDwarfFrameSection();
+    *asmInfo.getEHFrameSection() :
+    *asmInfo.getDwarfFrameSection();
   streamer.SwitchSection(&section);
   MCSymbol *SectionStart = context.CreateTempSymbol();
   streamer.EmitLabel(SectionStart);
@@ -864,11 +917,17 @@ void MCDwarfFrameEmitter::Emit(MCStreamer &streamer,
     CIEKey key(frame.Personality, frame.PersonalityEncoding,
                frame.LsdaEncoding);
     const MCSymbol *&cieStart = isEH ? CIEStarts[key] : DummyDebugKey;
+    if (isEH && asmInfo.getSupportsCompactUnwindInfo() &&
+        Emitter.EmitCompactUnwind(streamer, frame))
+      continue;
+
     if (!cieStart)
       cieStart = &Emitter.EmitCIE(streamer, frame.Personality,
                                   frame.PersonalityEncoding, frame.Lsda,
                                   frame.LsdaEncoding);
+
     fdeEnd = Emitter.EmitFDE(streamer, *cieStart, frame);
+
     if (i != n - 1)
       streamer.EmitLabel(fdeEnd);
   }
diff --git a/lib/MC/MCELFStreamer.h b/lib/MC/MCELFStreamer.h
index db34d58..855e7e9 100644
--- a/lib/MC/MCELFStreamer.h
+++ b/lib/MC/MCELFStreamer.h
@@ -138,137 +138,3 @@ private:
 } // end llvm namespace
 
 #endif
-//===- lib/MC/MCELFStreamer.h - ELF Object Output -------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file assembles .s files and emits ELF .o object files.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_MC_MCELFSTREAMER_H
-#define LLVM_MC_MCELFSTREAMER_H
-
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCObjectStreamer.h"
-#include "llvm/MC/MCSectionELF.h"
-
-namespace llvm {
-
-class MCELFStreamer : public MCObjectStreamer {
-public:
-  MCELFStreamer(MCContext &Context, TargetAsmBackend &TAB,
-                  raw_ostream &OS, MCCodeEmitter *Emitter)
-    : MCObjectStreamer(Context, TAB, OS, Emitter) {}
-
-  ~MCELFStreamer() {}
-
-  /// @name MCStreamer Interface
-  /// @{
-
-  virtual void InitSections();
-  virtual void ChangeSection(const MCSection *Section);
-  virtual void EmitLabel(MCSymbol *Symbol);
-  virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
-  virtual void EmitThumbFunc(MCSymbol *Func);
-  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
-  virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
-  virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
-  virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
-    assert(0 && "ELF doesn't support this directive");
-  }
-  virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
-                                unsigned ByteAlignment);
-  virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
-    assert(0 && "ELF doesn't support this directive");
-  }
-
-  virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
-    assert(0 && "ELF doesn't support this directive");
-  }
-
-  virtual void EmitCOFFSymbolType(int Type) {
-    assert(0 && "ELF doesn't support this directive");
-  }
-
-  virtual void EndCOFFSymbolDef() {
-    assert(0 && "ELF doesn't support this directive");
-  }
-
-  virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
-     MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
-     SD.setSize(Value);
-  }
-
-  virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
-
-  virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
-                            unsigned Size = 0, unsigned ByteAlignment = 0) {
-    assert(0 && "ELF doesn't support this directive");
-  }
-  virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
-                              uint64_t Size, unsigned ByteAlignment = 0) {
-    assert(0 && "ELF doesn't support this directive");
-  }
-  virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
-  virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
-                                    unsigned ValueSize = 1,
-                                    unsigned MaxBytesToEmit = 0);
-  virtual void EmitCodeAlignment(unsigned ByteAlignment,
-                                 unsigned MaxBytesToEmit = 0);
-
-  virtual void EmitFileDirective(StringRef Filename);
-
-  virtual void Finish();
-
-private:
-  virtual void EmitInstToFragment(const MCInst &Inst);
-  virtual void EmitInstToData(const MCInst &Inst);
-
-  void fixSymbolsInTLSFixups(const MCExpr *expr);
-
-  struct LocalCommon {
-    MCSymbolData *SD;
-    uint64_t Size;
-    unsigned ByteAlignment;
-  };
-  std::vector<LocalCommon> LocalCommons;
-
-  SmallPtrSet<MCSymbol *, 16> BindingExplicitlySet;
-  /// @}
-  void SetSection(StringRef Section, unsigned Type, unsigned Flags,
-                  SectionKind Kind) {
-    SwitchSection(getContext().getELFSection(Section, Type, Flags, Kind));
-  }
-
-  void SetSectionData() {
-    SetSection(".data", ELF::SHT_PROGBITS,
-               ELF::SHF_WRITE |ELF::SHF_ALLOC,
-               SectionKind::getDataRel());
-    EmitCodeAlignment(4, 0);
-  }
-  void SetSectionText() {
-    SetSection(".text", ELF::SHT_PROGBITS,
-               ELF::SHF_EXECINSTR |
-               ELF::SHF_ALLOC, SectionKind::getText());
-    EmitCodeAlignment(4, 0);
-  }
-  void SetSectionBss() {
-    SetSection(".bss", ELF::SHT_NOBITS,
-               ELF::SHF_WRITE |
-               ELF::SHF_ALLOC, SectionKind::getBSS());
-    EmitCodeAlignment(4, 0);
-  }
-};
-
-} // end llvm namespace
-
-#endif
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index bbd6635..7b62db2 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -1146,7 +1146,7 @@ bool AsmParser::ParseStatement() {
     if (IDVal == ".weak_def_can_be_hidden")
       return ParseDirectiveSymbolAttribute(MCSA_WeakDefAutoPrivate);
 
-    if (IDVal == ".comm")
+    if (IDVal == ".comm" || IDVal == ".common")
       return ParseDirectiveComm(/*IsLocal=*/false);
     if (IDVal == ".lcomm")
       return ParseDirectiveComm(/*IsLocal=*/true);
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index f049b1c..0d80514 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -120,37 +120,8 @@ private:
     return SectionAddress.lookup(SD);
   }
   uint64_t getSymbolAddress(const MCSymbolData* SD,
-                            const MCAsmLayout &Layout) const {
-    const MCSymbol &S = SD->getSymbol();
-
-    // If this is a variable, then recursively evaluate now.
-    if (S.isVariable()) {
-      MCValue Target;
-      if (!S.getVariableValue()->EvaluateAsRelocatable(Target, Layout))
-        report_fatal_error("unable to evaluate offset for variable '" +
-                           S.getName() + "'");
-
-      // Verify that any used symbols are defined.
-      if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined())
-        report_fatal_error("unable to evaluate offset to undefined symbol '" +
-                           Target.getSymA()->getSymbol().getName() + "'");
-      if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined())
-        report_fatal_error("unable to evaluate offset to undefined symbol '" +
-                           Target.getSymB()->getSymbol().getName() + "'");
-
-      uint64_t Address = Target.getConstant();
-      if (Target.getSymA())
-        Address += getSymbolAddress(&Layout.getAssembler().getSymbolData(
-                                      Target.getSymA()->getSymbol()), Layout);
-      if (Target.getSymB())
-        Address += getSymbolAddress(&Layout.getAssembler().getSymbolData(
-                                      Target.getSymB()->getSymbol()), Layout);
-      return Address;
-    }
+                            const MCAsmLayout &Layout) const;
 
-    return getSectionAddress(SD->getFragment()->getParent()) +
-      Layout.getSymbolOffset(SD);
-  }
   uint64_t getFragmentAddress(const MCFragment *Fragment,
                             const MCAsmLayout &Layout) const {
     return getSectionAddress(Fragment->getParent()) +
@@ -158,18 +129,7 @@ private:
   }
 
   uint64_t getPaddingSize(const MCSectionData *SD,
-                          const MCAsmLayout &Layout) const {
-    uint64_t EndAddr = getSectionAddress(SD) + Layout.getSectionAddressSize(SD);
-    unsigned Next = SD->getLayoutOrder() + 1;
-    if (Next >= Layout.getSectionOrder().size())
-      return 0;
-
-    const MCSectionData &NextSD = *Layout.getSectionOrder()[Next];
-    if (NextSD.getSection().isVirtualSection())
-      return 0;
-    return OffsetToAlignment(EndAddr, NextSD.getAlignment());
-  }
-
+                          const MCAsmLayout &Layout) const;
 public:
   MachObjectWriter(MCMachObjectTargetWriter *MOTW, raw_ostream &_OS,
                    bool _IsLittleEndian)
@@ -188,33 +148,7 @@ public:
   /// @}
 
   void WriteHeader(unsigned NumLoadCommands, unsigned LoadCommandsSize,
-                   bool SubsectionsViaSymbols) {
-    uint32_t Flags = 0;
-
-    if (SubsectionsViaSymbols)
-      Flags |= macho::HF_SubsectionsViaSymbols;
-
-    // struct mach_header (28 bytes) or
-    // struct mach_header_64 (32 bytes)
-
-    uint64_t Start = OS.tell();
-    (void) Start;
-
-    Write32(is64Bit() ? macho::HM_Object64 : macho::HM_Object32);
-
-    Write32(TargetObjectWriter->getCPUType());
-    Write32(TargetObjectWriter->getCPUSubtype());
-
-    Write32(macho::HFT_Object);
-    Write32(NumLoadCommands);
-    Write32(LoadCommandsSize);
-    Write32(Flags);
-    if (is64Bit())
-      Write32(0); // reserved
-
-    assert(OS.tell() - Start ==
-           (is64Bit() ? macho::Header64Size : macho::Header32Size));
-  }
+                   bool SubsectionsViaSymbols);
 
   /// WriteSegmentLoadCommand - Write a segment load command.
   ///
@@ -223,105 +157,15 @@ public:
   void WriteSegmentLoadCommand(unsigned NumSections,
                                uint64_t VMSize,
                                uint64_t SectionDataStartOffset,
-                               uint64_t SectionDataSize) {
-    // struct segment_command (56 bytes) or
-    // struct segment_command_64 (72 bytes)
-
-    uint64_t Start = OS.tell();
-    (void) Start;
-
-    unsigned SegmentLoadCommandSize =
-      is64Bit() ? macho::SegmentLoadCommand64Size:
-      macho::SegmentLoadCommand32Size;
-    Write32(is64Bit() ? macho::LCT_Segment64 : macho::LCT_Segment);
-    Write32(SegmentLoadCommandSize +
-            NumSections * (is64Bit() ? macho::Section64Size :
-                           macho::Section32Size));
-
-    WriteBytes("", 16);
-    if (is64Bit()) {
-      Write64(0); // vmaddr
-      Write64(VMSize); // vmsize
-      Write64(SectionDataStartOffset); // file offset
-      Write64(SectionDataSize); // file size
-    } else {
-      Write32(0); // vmaddr
-      Write32(VMSize); // vmsize
-      Write32(SectionDataStartOffset); // file offset
-      Write32(SectionDataSize); // file size
-    }
-    Write32(0x7); // maxprot
-    Write32(0x7); // initprot
-    Write32(NumSections);
-    Write32(0); // flags
-
-    assert(OS.tell() - Start == SegmentLoadCommandSize);
-  }
+                               uint64_t SectionDataSize);
 
   void WriteSection(const MCAssembler &Asm, const MCAsmLayout &Layout,
                     const MCSectionData &SD, uint64_t FileOffset,
-                    uint64_t RelocationsStart, unsigned NumRelocations) {
-    uint64_t SectionSize = Layout.getSectionAddressSize(&SD);
-
-    // The offset is unused for virtual sections.
-    if (SD.getSection().isVirtualSection()) {
-      assert(Layout.getSectionFileSize(&SD) == 0 && "Invalid file size!");
-      FileOffset = 0;
-    }
-
-    // struct section (68 bytes) or
-    // struct section_64 (80 bytes)
-
-    uint64_t Start = OS.tell();
-    (void) Start;
-
-    const MCSectionMachO &Section = cast<MCSectionMachO>(SD.getSection());
-    WriteBytes(Section.getSectionName(), 16);
-    WriteBytes(Section.getSegmentName(), 16);
-    if (is64Bit()) {
-      Write64(getSectionAddress(&SD)); // address
-      Write64(SectionSize); // size
-    } else {
-      Write32(getSectionAddress(&SD)); // address
-      Write32(SectionSize); // size
-    }
-    Write32(FileOffset);
-
-    unsigned Flags = Section.getTypeAndAttributes();
-    if (SD.hasInstructions())
-      Flags |= MCSectionMachO::S_ATTR_SOME_INSTRUCTIONS;
-
-    assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!");
-    Write32(Log2_32(SD.getAlignment()));
-    Write32(NumRelocations ? RelocationsStart : 0);
-    Write32(NumRelocations);
-    Write32(Flags);
-    Write32(IndirectSymBase.lookup(&SD)); // reserved1
-    Write32(Section.getStubSize()); // reserved2
-    if (is64Bit())
-      Write32(0); // reserved3
-
-    assert(OS.tell() - Start == (is64Bit() ? macho::Section64Size :
-           macho::Section32Size));
-  }
+                    uint64_t RelocationsStart, unsigned NumRelocations);
 
   void WriteSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols,
                               uint32_t StringTableOffset,
-                              uint32_t StringTableSize) {
-    // struct symtab_command (24 bytes)
-
-    uint64_t Start = OS.tell();
-    (void) Start;
-
-    Write32(macho::LCT_Symtab);
-    Write32(macho::SymtabLoadCommandSize);
-    Write32(SymbolOffset);
-    Write32(NumSymbols);
-    Write32(StringTableOffset);
-    Write32(StringTableSize);
-
-    assert(OS.tell() - Start == macho::SymtabLoadCommandSize);
-  }
+                              uint32_t StringTableSize);
 
   void WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol,
                                 uint32_t NumLocalSymbols,
@@ -330,100 +174,9 @@ public:
                                 uint32_t FirstUndefinedSymbol,
                                 uint32_t NumUndefinedSymbols,
                                 uint32_t IndirectSymbolOffset,
-                                uint32_t NumIndirectSymbols) {
-    // struct dysymtab_command (80 bytes)
-
-    uint64_t Start = OS.tell();
-    (void) Start;
-
-    Write32(macho::LCT_Dysymtab);
-    Write32(macho::DysymtabLoadCommandSize);
-    Write32(FirstLocalSymbol);
-    Write32(NumLocalSymbols);
-    Write32(FirstExternalSymbol);
-    Write32(NumExternalSymbols);
-    Write32(FirstUndefinedSymbol);
-    Write32(NumUndefinedSymbols);
-    Write32(0); // tocoff
-    Write32(0); // ntoc
-    Write32(0); // modtaboff
-    Write32(0); // nmodtab
-    Write32(0); // extrefsymoff
-    Write32(0); // nextrefsyms
-    Write32(IndirectSymbolOffset);
-    Write32(NumIndirectSymbols);
-    Write32(0); // extreloff
-    Write32(0); // nextrel
-    Write32(0); // locreloff
-    Write32(0); // nlocrel
-
-    assert(OS.tell() - Start == macho::DysymtabLoadCommandSize);
-  }
-
-  void WriteNlist(MachSymbolData &MSD, const MCAsmLayout &Layout) {
-    MCSymbolData &Data = *MSD.SymbolData;
-    const MCSymbol &Symbol = Data.getSymbol();
-    uint8_t Type = 0;
-    uint16_t Flags = Data.getFlags();
-    uint32_t Address = 0;
-
-    // Set the N_TYPE bits. See <mach-o/nlist.h>.
-    //
-    // FIXME: Are the prebound or indirect fields possible here?
-    if (Symbol.isUndefined())
-      Type = macho::STT_Undefined;
-    else if (Symbol.isAbsolute())
-      Type = macho::STT_Absolute;
-    else
-      Type = macho::STT_Section;
-
-    // FIXME: Set STAB bits.
-
-    if (Data.isPrivateExtern())
-      Type |= macho::STF_PrivateExtern;
-
-    // Set external bit.
-    if (Data.isExternal() || Symbol.isUndefined())
-      Type |= macho::STF_External;
-
-    // Compute the symbol address.
-    if (Symbol.isDefined()) {
-      if (Symbol.isAbsolute()) {
-        Address = cast<MCConstantExpr>(Symbol.getVariableValue())->getValue();
-      } else {
-        Address = getSymbolAddress(&Data, Layout);
-      }
-    } else if (Data.isCommon()) {
-      // Common symbols are encoded with the size in the address
-      // field, and their alignment in the flags.
-      Address = Data.getCommonSize();
-
-      // Common alignment is packed into the 'desc' bits.
-      if (unsigned Align = Data.getCommonAlignment()) {
-        unsigned Log2Size = Log2_32(Align);
-        assert((1U << Log2Size) == Align && "Invalid 'common' alignment!");
-        if (Log2Size > 15)
-          report_fatal_error("invalid 'common' alignment '" +
-                            Twine(Align) + "'");
-        // FIXME: Keep this mask with the SymbolFlags enumeration.
-        Flags = (Flags & 0xF0FF) | (Log2Size << 8);
-      }
-    }
-
-    // struct nlist (12 bytes)
-
-    Write32(MSD.StringIndex);
-    Write8(Type);
-    Write8(MSD.SectionIndex);
+                                uint32_t NumIndirectSymbols);
 
-    // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
-    // value.
-    Write16(Flags);
-    if (is64Bit())
-      Write64(Address);
-    else
-      Write32(Address);
-  }
+  void WriteNlist(MachSymbolData &MSD, const MCAsmLayout &Layout);
 
   // FIXME: We really need to improve the relocation validation. Basically, we
   // want to implement a separate computation which evaluates the relocation
@@ -446,650 +199,454 @@ public:
   void RecordX86_64Relocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
                               const MCFragment *Fragment,
                               const MCFixup &Fixup, MCValue Target,
-                              uint64_t &FixedValue) {
-    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
-    unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind());
-    unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
-
-    // See <reloc.h>.
-    uint32_t FixupOffset =
-      Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
-    uint32_t FixupAddress =
-      getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
-    int64_t Value = 0;
-    unsigned Index = 0;
-    unsigned IsExtern = 0;
-    unsigned Type = 0;
-
-    Value = Target.getConstant();
-
-    if (IsPCRel) {
-      // Compensate for the relocation offset, Darwin x86_64 relocations only
-      // have the addend and appear to have attempted to define it to be the
-      // actual expression addend without the PCrel bias. However, instructions
-      // with data following the relocation are not accommodated for (see comment
-      // below regarding SIGNED{1,2,4}), so it isn't exactly that either.
-      Value += 1LL << Log2Size;
-    }
-
-    if (Target.isAbsolute()) { // constant
-      // SymbolNum of 0 indicates the absolute section.
-      Type = macho::RIT_X86_64_Unsigned;
-      Index = 0;
-
-      // FIXME: I believe this is broken, I don't think the linker can
-      // understand it. I think it would require a local relocation, but I'm not
-      // sure if that would work either. The official way to get an absolute
-      // PCrel relocation is to use an absolute symbol (which we don't support
-      // yet).
-      if (IsPCRel) {
-        IsExtern = 1;
-        Type = macho::RIT_X86_64_Branch;
-      }
-    } else if (Target.getSymB()) { // A - B + constant
-      const MCSymbol *A = &Target.getSymA()->getSymbol();
-      MCSymbolData &A_SD = Asm.getSymbolData(*A);
-      const MCSymbolData *A_Base = Asm.getAtom(&A_SD);
-
-      const MCSymbol *B = &Target.getSymB()->getSymbol();
-      MCSymbolData &B_SD = Asm.getSymbolData(*B);
-      const MCSymbolData *B_Base = Asm.getAtom(&B_SD);
-
-      // Neither symbol can be modified.
-      if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
-          Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
-        report_fatal_error("unsupported relocation of modified symbol");
-
-      // We don't support PCrel relocations of differences. Darwin 'as' doesn't
-      // implement most of these correctly.
-      if (IsPCRel)
-        report_fatal_error("unsupported pc-relative relocation of difference");
-
-      // The support for the situation where one or both of the symbols would
-      // require a local relocation is handled just like if the symbols were
-      // external.  This is certainly used in the case of debug sections where
-      // the section has only temporary symbols and thus the symbols don't have
-      // base symbols.  This is encoded using the section ordinal and
-      // non-extern relocation entries.
-
-      // Darwin 'as' doesn't emit correct relocations for this (it ends up with
-      // a single SIGNED relocation); reject it for now.  Except the case where
-      // both symbols don't have a base, equal but both NULL.
-      if (A_Base == B_Base && A_Base)
-        report_fatal_error("unsupported relocation with identical base");
-
-      Value += getSymbolAddress(&A_SD, Layout) -
-        (A_Base == NULL ? 0 : getSymbolAddress(A_Base, Layout));
-      Value -= getSymbolAddress(&B_SD, Layout) -
-        (B_Base == NULL ? 0 : getSymbolAddress(B_Base, Layout));
-
-      if (A_Base) {
-        Index = A_Base->getIndex();
-        IsExtern = 1;
-      }
-      else {
-        Index = A_SD.getFragment()->getParent()->getOrdinal() + 1;
-        IsExtern = 0;
-      }
-      Type = macho::RIT_X86_64_Unsigned;
-
-      macho::RelocationEntry MRE;
-      MRE.Word0 = FixupOffset;
-      MRE.Word1 = ((Index     <<  0) |
-                   (IsPCRel   << 24) |
-                   (Log2Size  << 25) |
-                   (IsExtern  << 27) |
-                   (Type      << 28));
-      Relocations[Fragment->getParent()].push_back(MRE);
-
-      if (B_Base) {
-        Index = B_Base->getIndex();
-        IsExtern = 1;
-      }
-      else {
-        Index = B_SD.getFragment()->getParent()->getOrdinal() + 1;
-        IsExtern = 0;
-      }
-      Type = macho::RIT_X86_64_Subtractor;
-    } else {
-      const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
-      MCSymbolData &SD = Asm.getSymbolData(*Symbol);
-      const MCSymbolData *Base = Asm.getAtom(&SD);
-
-      // Relocations inside debug sections always use local relocations when
-      // possible. This seems to be done because the debugger doesn't fully
-      // understand x86_64 relocation entries, and expects to find values that
-      // have already been fixed up.
-      if (Symbol->isInSection()) {
-        const MCSectionMachO &Section = static_cast<const MCSectionMachO&>(
-          Fragment->getParent()->getSection());
-        if (Section.hasAttribute(MCSectionMachO::S_ATTR_DEBUG))
-          Base = 0;
-      }
-
-      // x86_64 almost always uses external relocations, except when there is no
-      // symbol to use as a base address (a local symbol with no preceding
-      // non-local symbol).
-      if (Base) {
-        Index = Base->getIndex();
-        IsExtern = 1;
-
-        // Add the local offset, if needed.
-        if (Base != &SD)
-          Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base);
-      } else if (Symbol->isInSection() && !Symbol->isVariable()) {
-        // The index is the section ordinal (1-based).
-        Index = SD.getFragment()->getParent()->getOrdinal() + 1;
-        IsExtern = 0;
-        Value += getSymbolAddress(&SD, Layout);
-
-        if (IsPCRel)
-          Value -= FixupAddress + (1 << Log2Size);
-      } else if (Symbol->isVariable()) {
-        const MCExpr *Value = Symbol->getVariableValue();
-        int64_t Res;
-        bool isAbs = Value->EvaluateAsAbsolute(Res, Layout, SectionAddress);
-        if (isAbs) {
-          FixedValue = Res;
-          return;
-        } else {
-          report_fatal_error("unsupported relocation of variable '" +
-                             Symbol->getName() + "'");
-        }
-      } else {
-        report_fatal_error("unsupported relocation of undefined symbol '" +
-                           Symbol->getName() + "'");
-      }
-
-      MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind();
-      if (IsPCRel) {
-        if (IsRIPRel) {
-          if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
-            // x86_64 distinguishes movq foo@GOTPCREL so that the linker can
-            // rewrite the movq to an leaq at link time if the symbol ends up in
-            // the same linkage unit.
-            if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load)
-              Type = macho::RIT_X86_64_GOTLoad;
-            else
-              Type = macho::RIT_X86_64_GOT;
-          }  else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
-            Type = macho::RIT_X86_64_TLV;
-          }  else if (Modifier != MCSymbolRefExpr::VK_None) {
-            report_fatal_error("unsupported symbol modifier in relocation");
-          } else {
-            Type = macho::RIT_X86_64_Signed;
-
-            // The Darwin x86_64 relocation format has a problem where it cannot
-            // encode an address (L<foo> + <constant>) which is outside the atom
-            // containing L<foo>. Generally, this shouldn't occur but it does
-            // happen when we have a RIPrel instruction with data following the
-            // relocation entry (e.g., movb $012, L0(%rip)). Even with the PCrel
-            // adjustment Darwin x86_64 uses, the offset is still negative and
-            // the linker has no way to recognize this.
-            //
-            // To work around this, Darwin uses several special relocation types
-            // to indicate the offsets. However, the specification or
-            // implementation of these seems to also be incomplete; they should
-            // adjust the addend as well based on the actual encoded instruction
-            // (the additional bias), but instead appear to just look at the
-            // final offset.
-            switch (-(Target.getConstant() + (1LL << Log2Size))) {
-            case 1: Type = macho::RIT_X86_64_Signed1; break;
-            case 2: Type = macho::RIT_X86_64_Signed2; break;
-            case 4: Type = macho::RIT_X86_64_Signed4; break;
-            }
-          }
-        } else {
-          if (Modifier != MCSymbolRefExpr::VK_None)
-            report_fatal_error("unsupported symbol modifier in branch "
-                              "relocation");
-
-          Type = macho::RIT_X86_64_Branch;
-        }
-      } else {
-        if (Modifier == MCSymbolRefExpr::VK_GOT) {
-          Type = macho::RIT_X86_64_GOT;
-        } else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
-          // GOTPCREL is allowed as a modifier on non-PCrel instructions, in
-          // which case all we do is set the PCrel bit in the relocation entry;
-          // this is used with exception handling, for example. The source is
-          // required to include any necessary offset directly.
-          Type = macho::RIT_X86_64_GOT;
-          IsPCRel = 1;
-        } else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
-          report_fatal_error("TLVP symbol modifier should have been rip-rel");
-        } else if (Modifier != MCSymbolRefExpr::VK_None)
-          report_fatal_error("unsupported symbol modifier in relocation");
-        else
-          Type = macho::RIT_X86_64_Unsigned;
-      }
-    }
-
-    // x86_64 always writes custom values into the fixups.
-    FixedValue = Value;
-
-    // struct relocation_info (8 bytes)
-    macho::RelocationEntry MRE;
-    MRE.Word0 = FixupOffset;
-    MRE.Word1 = ((Index     <<  0) |
-                 (IsPCRel   << 24) |
-                 (Log2Size  << 25) |
-                 (IsExtern  << 27) |
-                 (Type      << 28));
-    Relocations[Fragment->getParent()].push_back(MRE);
-  }
+                              uint64_t &FixedValue);
 
   void RecordScatteredRelocation(const MCAssembler &Asm,
                                  const MCAsmLayout &Layout,
                                  const MCFragment *Fragment,
                                  const MCFixup &Fixup, MCValue Target,
                                  unsigned Log2Size,
-                                 uint64_t &FixedValue) {
-    uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
-    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
-    unsigned Type = macho::RIT_Vanilla;
-
-    // See <reloc.h>.
-    const MCSymbol *A = &Target.getSymA()->getSymbol();
-    MCSymbolData *A_SD = &Asm.getSymbolData(*A);
-
-    if (!A_SD->getFragment())
-      report_fatal_error("symbol '" + A->getName() +
-                        "' can not be undefined in a subtraction expression");
-
-    uint32_t Value = getSymbolAddress(A_SD, Layout);
-    uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent());
-    FixedValue += SecAddr;
-    uint32_t Value2 = 0;
-
-    if (const MCSymbolRefExpr *B = Target.getSymB()) {
-      MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
-
-      if (!B_SD->getFragment())
-        report_fatal_error("symbol '" + B->getSymbol().getName() +
-                          "' can not be undefined in a subtraction expression");
-
-      // Select the appropriate difference relocation type.
-      //
-      // Note that there is no longer any semantic difference between these two
-      // relocation types from the linkers point of view, this is done solely
-      // for pedantic compatibility with 'as'.
-      Type = A_SD->isExternal() ? (unsigned)macho::RIT_Difference :
-        (unsigned)macho::RIT_Generic_LocalDifference;
-      Value2 = getSymbolAddress(B_SD, Layout);
-      FixedValue -= getSectionAddress(B_SD->getFragment()->getParent());
-    }
-
-    // Relocations are written out in reverse order, so the PAIR comes first.
-    if (Type == macho::RIT_Difference ||
-        Type == macho::RIT_Generic_LocalDifference) {
-      macho::RelocationEntry MRE;
-      MRE.Word0 = ((0         <<  0) |
-                   (macho::RIT_Pair  << 24) |
-                   (Log2Size  << 28) |
-                   (IsPCRel   << 30) |
-                   macho::RF_Scattered);
-      MRE.Word1 = Value2;
-      Relocations[Fragment->getParent()].push_back(MRE);
-    }
-
-    macho::RelocationEntry MRE;
-    MRE.Word0 = ((FixupOffset <<  0) |
-                 (Type        << 24) |
-                 (Log2Size    << 28) |
-                 (IsPCRel     << 30) |
-                 macho::RF_Scattered);
-    MRE.Word1 = Value;
-    Relocations[Fragment->getParent()].push_back(MRE);
-  }
+                                 uint64_t &FixedValue);
 
   void RecordARMScatteredRelocation(const MCAssembler &Asm,
                                     const MCAsmLayout &Layout,
                                     const MCFragment *Fragment,
                                     const MCFixup &Fixup, MCValue Target,
                                     unsigned Log2Size,
-                                    uint64_t &FixedValue) {
-    uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
-    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
-    unsigned Type = macho::RIT_Vanilla;
+                                    uint64_t &FixedValue);
 
-    // See <reloc.h>.
-    const MCSymbol *A = &Target.getSymA()->getSymbol();
-    MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+  void RecordARMMovwMovtRelocation(const MCAssembler &Asm,
+                                   const MCAsmLayout &Layout,
+                                   const MCFragment *Fragment,
+                                   const MCFixup &Fixup, MCValue Target,
+                                   uint64_t &FixedValue);
 
-    if (!A_SD->getFragment())
-      report_fatal_error("symbol '" + A->getName() +
-                        "' can not be undefined in a subtraction expression");
+  void RecordTLVPRelocation(const MCAssembler &Asm,
+                            const MCAsmLayout &Layout,
+                            const MCFragment *Fragment,
+                            const MCFixup &Fixup, MCValue Target,
+                            uint64_t &FixedValue);
 
-    uint32_t Value = getSymbolAddress(A_SD, Layout);
-    uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent());
-    FixedValue += SecAddr;
-    uint32_t Value2 = 0;
+  static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
+                                       unsigned &Log2Size);
 
-    if (const MCSymbolRefExpr *B = Target.getSymB()) {
-      MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+  void RecordARMRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
+                           const MCFragment *Fragment, const MCFixup &Fixup,
+                           MCValue Target, uint64_t &FixedValue);
 
-      if (!B_SD->getFragment())
-        report_fatal_error("symbol '" + B->getSymbol().getName() +
-                          "' can not be undefined in a subtraction expression");
+  void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
+                        const MCFragment *Fragment, const MCFixup &Fixup,
+                        MCValue Target, uint64_t &FixedValue);
 
-      // Select the appropriate difference relocation type.
-      Type = macho::RIT_Difference;
-      Value2 = getSymbolAddress(B_SD, Layout);
-      FixedValue -= getSectionAddress(B_SD->getFragment()->getParent());
-    }
+  void BindIndirectSymbols(MCAssembler &Asm);
 
-    // Relocations are written out in reverse order, so the PAIR comes first.
-    if (Type == macho::RIT_Difference ||
-        Type == macho::RIT_Generic_LocalDifference) {
-      macho::RelocationEntry MRE;
-      MRE.Word0 = ((0         <<  0) |
-                   (macho::RIT_Pair  << 24) |
-                   (Log2Size  << 28) |
-                   (IsPCRel   << 30) |
-                   macho::RF_Scattered);
-      MRE.Word1 = Value2;
-      Relocations[Fragment->getParent()].push_back(MRE);
-    }
+  /// ComputeSymbolTable - Compute the symbol table data
+  ///
+  /// \param StringTable [out] - The string table data.
+  /// \param StringIndexMap [out] - Map from symbol names to offsets in the
+  /// string table.
+  void ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
+                          std::vector<MachSymbolData> &LocalSymbolData,
+                          std::vector<MachSymbolData> &ExternalSymbolData,
+                          std::vector<MachSymbolData> &UndefinedSymbolData);
 
-    macho::RelocationEntry MRE;
-    MRE.Word0 = ((FixupOffset <<  0) |
-                 (Type        << 24) |
-                 (Log2Size    << 28) |
-                 (IsPCRel     << 30) |
-                 macho::RF_Scattered);
-    MRE.Word1 = Value;
-    Relocations[Fragment->getParent()].push_back(MRE);
-  }
+  void computeSectionAddresses(const MCAssembler &Asm,
+                               const MCAsmLayout &Layout);
 
-  void RecordARMMovwMovtRelocation(const MCAssembler &Asm,
-                                   const MCAsmLayout &Layout,
-                                   const MCFragment *Fragment,
-                                   const MCFixup &Fixup, MCValue Target,
-                                   uint64_t &FixedValue) {
-    uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
-    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
-    unsigned Type = macho::RIT_ARM_Half;
+  void ExecutePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout);
 
-    // See <reloc.h>.
-    const MCSymbol *A = &Target.getSymA()->getSymbol();
-    MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+  virtual bool IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+                                                      const MCSymbolData &DataA,
+                                                      const MCFragment &FB,
+                                                      bool InSet,
+                                                      bool IsPCRel) const;
 
-    if (!A_SD->getFragment())
-      report_fatal_error("symbol '" + A->getName() +
-                        "' can not be undefined in a subtraction expression");
+  void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
+};
 
-    uint32_t Value = getSymbolAddress(A_SD, Layout);
-    uint32_t Value2 = 0;
-    uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent());
-    FixedValue += SecAddr;
+} // end anonymous namespace
 
-    if (const MCSymbolRefExpr *B = Target.getSymB()) {
-      MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+uint64_t MachObjectWriter::getSymbolAddress(const MCSymbolData* SD,
+                                            const MCAsmLayout &Layout) const {
+  const MCSymbol &S = SD->getSymbol();
 
-      if (!B_SD->getFragment())
-        report_fatal_error("symbol '" + B->getSymbol().getName() +
-                          "' can not be undefined in a subtraction expression");
+  // If this is a variable, then recursively evaluate now.
+  if (S.isVariable()) {
+    MCValue Target;
+    if (!S.getVariableValue()->EvaluateAsRelocatable(Target, Layout))
+      report_fatal_error("unable to evaluate offset for variable '" +
+                         S.getName() + "'");
 
-      // Select the appropriate difference relocation type.
-      Type = macho::RIT_ARM_HalfDifference;
-      Value2 = getSymbolAddress(B_SD, Layout);
-      FixedValue -= getSectionAddress(B_SD->getFragment()->getParent());
-    }
+    // Verify that any used symbols are defined.
+    if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined())
+      report_fatal_error("unable to evaluate offset to undefined symbol '" +
+                         Target.getSymA()->getSymbol().getName() + "'");
+    if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined())
+      report_fatal_error("unable to evaluate offset to undefined symbol '" +
+                         Target.getSymB()->getSymbol().getName() + "'");
 
-    // Relocations are written out in reverse order, so the PAIR comes first.
-    // ARM_RELOC_HALF and ARM_RELOC_HALF_SECTDIFF abuse the r_length field:
-    //
-    // For these two r_type relocations they always have a pair following them
-    // and the r_length bits are used differently.  The encoding of the
-    // r_length is as follows:
-    // low bit of r_length:
-    //  0 - :lower16: for movw instructions
-    //  1 - :upper16: for movt instructions
-    // high bit of r_length:
-    //  0 - arm instructions
-    //  1 - thumb instructions
-    // the other half of the relocated expression is in the following pair
-    // relocation entry in the the low 16 bits of r_address field.
-    unsigned ThumbBit = 0;
-    unsigned MovtBit = 0;
-    switch ((unsigned)Fixup.getKind()) {
-    default: break;
-    case ARM::fixup_arm_movt_hi16:
-    case ARM::fixup_arm_movt_hi16_pcrel:
-      MovtBit = 1;
-      break;
-    case ARM::fixup_t2_movt_hi16:
-    case ARM::fixup_t2_movt_hi16_pcrel:
-      MovtBit = 1;
-      // Fallthrough
-    case ARM::fixup_t2_movw_lo16:
-    case ARM::fixup_t2_movw_lo16_pcrel:
-      ThumbBit = 1;
-      break;
-    }
+    uint64_t Address = Target.getConstant();
+    if (Target.getSymA())
+      Address += getSymbolAddress(&Layout.getAssembler().getSymbolData(
+                                    Target.getSymA()->getSymbol()), Layout);
+    if (Target.getSymB())
+      Address += getSymbolAddress(&Layout.getAssembler().getSymbolData(
+                                    Target.getSymB()->getSymbol()), Layout);
+    return Address;
+  }
 
+  return getSectionAddress(SD->getFragment()->getParent()) +
+    Layout.getSymbolOffset(SD);
+}
 
-    if (Type == macho::RIT_ARM_HalfDifference) {
-      uint32_t OtherHalf = MovtBit
-        ? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16);
+uint64_t MachObjectWriter::getPaddingSize(const MCSectionData *SD,
+                                          const MCAsmLayout &Layout) const {
+  uint64_t EndAddr = getSectionAddress(SD) + Layout.getSectionAddressSize(SD);
+  unsigned Next = SD->getLayoutOrder() + 1;
+  if (Next >= Layout.getSectionOrder().size())
+    return 0;
+
+  const MCSectionData &NextSD = *Layout.getSectionOrder()[Next];
+  if (NextSD.getSection().isVirtualSection())
+    return 0;
+  return OffsetToAlignment(EndAddr, NextSD.getAlignment());
+}
 
-      macho::RelocationEntry MRE;
-      MRE.Word0 = ((OtherHalf       <<  0) |
-                   (macho::RIT_Pair << 24) |
-                   (MovtBit         << 28) |
-                   (ThumbBit        << 29) |
-                   (IsPCRel         << 30) |
-                   macho::RF_Scattered);
-      MRE.Word1 = Value2;
-      Relocations[Fragment->getParent()].push_back(MRE);
-    }
+void MachObjectWriter::WriteHeader(unsigned NumLoadCommands,
+                                   unsigned LoadCommandsSize,
+                                   bool SubsectionsViaSymbols) {
+  uint32_t Flags = 0;
 
-    macho::RelocationEntry MRE;
-    MRE.Word0 = ((FixupOffset <<  0) |
-                 (Type        << 24) |
-                 (MovtBit     << 28) |
-                 (ThumbBit    << 29) |
-                 (IsPCRel     << 30) |
-                 macho::RF_Scattered);
-    MRE.Word1 = Value;
-    Relocations[Fragment->getParent()].push_back(MRE);
+  if (SubsectionsViaSymbols)
+    Flags |= macho::HF_SubsectionsViaSymbols;
+
+  // struct mach_header (28 bytes) or
+  // struct mach_header_64 (32 bytes)
+
+  uint64_t Start = OS.tell();
+  (void) Start;
+
+  Write32(is64Bit() ? macho::HM_Object64 : macho::HM_Object32);
+
+  Write32(TargetObjectWriter->getCPUType());
+  Write32(TargetObjectWriter->getCPUSubtype());
+
+  Write32(macho::HFT_Object);
+  Write32(NumLoadCommands);
+  Write32(LoadCommandsSize);
+  Write32(Flags);
+  if (is64Bit())
+    Write32(0); // reserved
+
+  assert(OS.tell() - Start ==
+         (is64Bit() ? macho::Header64Size : macho::Header32Size));
+}
+
+/// WriteSegmentLoadCommand - Write a segment load command.
+///
+/// \arg NumSections - The number of sections in this segment.
+/// \arg SectionDataSize - The total size of the sections.
+void MachObjectWriter::WriteSegmentLoadCommand(unsigned NumSections,
+                                               uint64_t VMSize,
+                                               uint64_t SectionDataStartOffset,
+                                               uint64_t SectionDataSize) {
+  // struct segment_command (56 bytes) or
+  // struct segment_command_64 (72 bytes)
+
+  uint64_t Start = OS.tell();
+  (void) Start;
+
+  unsigned SegmentLoadCommandSize =
+    is64Bit() ? macho::SegmentLoadCommand64Size:
+    macho::SegmentLoadCommand32Size;
+  Write32(is64Bit() ? macho::LCT_Segment64 : macho::LCT_Segment);
+  Write32(SegmentLoadCommandSize +
+          NumSections * (is64Bit() ? macho::Section64Size :
+                         macho::Section32Size));
+
+  WriteBytes("", 16);
+  if (is64Bit()) {
+    Write64(0); // vmaddr
+    Write64(VMSize); // vmsize
+    Write64(SectionDataStartOffset); // file offset
+    Write64(SectionDataSize); // file size
+  } else {
+    Write32(0); // vmaddr
+    Write32(VMSize); // vmsize
+    Write32(SectionDataStartOffset); // file offset
+    Write32(SectionDataSize); // file size
   }
+  Write32(0x7); // maxprot
+  Write32(0x7); // initprot
+  Write32(NumSections);
+  Write32(0); // flags
 
-  void RecordTLVPRelocation(const MCAssembler &Asm,
-                            const MCAsmLayout &Layout,
-                            const MCFragment *Fragment,
-                            const MCFixup &Fixup, MCValue Target,
-                            uint64_t &FixedValue) {
-    assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP &&
-           !is64Bit() &&
-           "Should only be called with a 32-bit TLVP relocation!");
-
-    unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
-    uint32_t Value = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
-    unsigned IsPCRel = 0;
-
-    // Get the symbol data.
-    MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol());
-    unsigned Index = SD_A->getIndex();
-
-    // We're only going to have a second symbol in pic mode and it'll be a
-    // subtraction from the picbase. For 32-bit pic the addend is the difference
-    // between the picbase and the next address.  For 32-bit static the addend
-    // is zero.
-    if (Target.getSymB()) {
-      // If this is a subtraction then we're pcrel.
-      uint32_t FixupAddress =
-        getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
-      MCSymbolData *SD_B = &Asm.getSymbolData(Target.getSymB()->getSymbol());
-      IsPCRel = 1;
-      FixedValue = (FixupAddress - getSymbolAddress(SD_B, Layout) +
-                    Target.getConstant());
-      FixedValue += 1ULL << Log2Size;
-    } else {
-      FixedValue = 0;
-    }
+  assert(OS.tell() - Start == SegmentLoadCommandSize);
+}
 
-    // struct relocation_info (8 bytes)
-    macho::RelocationEntry MRE;
-    MRE.Word0 = Value;
-    MRE.Word1 = ((Index                  <<  0) |
-                 (IsPCRel                << 24) |
-                 (Log2Size               << 25) |
-                 (1                      << 27) | // Extern
-                 (macho::RIT_Generic_TLV << 28)); // Type
-    Relocations[Fragment->getParent()].push_back(MRE);
+void MachObjectWriter::WriteSection(const MCAssembler &Asm,
+                                    const MCAsmLayout &Layout,
+                                    const MCSectionData &SD,
+                                    uint64_t FileOffset,
+                                    uint64_t RelocationsStart,
+                                    unsigned NumRelocations) {
+  uint64_t SectionSize = Layout.getSectionAddressSize(&SD);
+
+  // The offset is unused for virtual sections.
+  if (SD.getSection().isVirtualSection()) {
+    assert(Layout.getSectionFileSize(&SD) == 0 && "Invalid file size!");
+    FileOffset = 0;
   }
 
-  static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
-                                       unsigned &Log2Size) {
-    RelocType = unsigned(macho::RIT_Vanilla);
-    Log2Size = ~0U;
+  // struct section (68 bytes) or
+  // struct section_64 (80 bytes)
+
+  uint64_t Start = OS.tell();
+  (void) Start;
+
+  const MCSectionMachO &Section = cast<MCSectionMachO>(SD.getSection());
+  WriteBytes(Section.getSectionName(), 16);
+  WriteBytes(Section.getSegmentName(), 16);
+  if (is64Bit()) {
+    Write64(getSectionAddress(&SD)); // address
+    Write64(SectionSize); // size
+  } else {
+    Write32(getSectionAddress(&SD)); // address
+    Write32(SectionSize); // size
+  }
+  Write32(FileOffset);
+
+  unsigned Flags = Section.getTypeAndAttributes();
+  if (SD.hasInstructions())
+    Flags |= MCSectionMachO::S_ATTR_SOME_INSTRUCTIONS;
+
+  assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!");
+  Write32(Log2_32(SD.getAlignment()));
+  Write32(NumRelocations ? RelocationsStart : 0);
+  Write32(NumRelocations);
+  Write32(Flags);
+  Write32(IndirectSymBase.lookup(&SD)); // reserved1
+  Write32(Section.getStubSize()); // reserved2
+  if (is64Bit())
+    Write32(0); // reserved3
+
+  assert(OS.tell() - Start == (is64Bit() ? macho::Section64Size :
+                               macho::Section32Size));
+}
 
-    switch (Kind) {
-    default:
-      return false;
+void MachObjectWriter::WriteSymtabLoadCommand(uint32_t SymbolOffset,
+                                              uint32_t NumSymbols,
+                                              uint32_t StringTableOffset,
+                                              uint32_t StringTableSize) {
+  // struct symtab_command (24 bytes)
 
-    case FK_Data_1:
-      Log2Size = llvm::Log2_32(1);
-      return true;
-    case FK_Data_2:
-      Log2Size = llvm::Log2_32(2);
-      return true;
-    case FK_Data_4:
-      Log2Size = llvm::Log2_32(4);
-      return true;
-    case FK_Data_8:
-      Log2Size = llvm::Log2_32(8);
-      return true;
+  uint64_t Start = OS.tell();
+  (void) Start;
 
-      // Handle 24-bit branch kinds.
-    case ARM::fixup_arm_ldst_pcrel_12:
-    case ARM::fixup_arm_pcrel_10:
-    case ARM::fixup_arm_adr_pcrel_12:
-    case ARM::fixup_arm_condbranch:
-    case ARM::fixup_arm_uncondbranch:
-      RelocType = unsigned(macho::RIT_ARM_Branch24Bit);
-      // Report as 'long', even though that is not quite accurate.
-      Log2Size = llvm::Log2_32(4);
-      return true;
+  Write32(macho::LCT_Symtab);
+  Write32(macho::SymtabLoadCommandSize);
+  Write32(SymbolOffset);
+  Write32(NumSymbols);
+  Write32(StringTableOffset);
+  Write32(StringTableSize);
 
-      // Handle Thumb branches.
-    case ARM::fixup_arm_thumb_br:
-      RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
-      Log2Size = llvm::Log2_32(2);
-      return true;
-      
-    case ARM::fixup_arm_thumb_bl:
-    case ARM::fixup_arm_thumb_blx:
-      RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
-      Log2Size = llvm::Log2_32(4);
-      return true;
+  assert(OS.tell() - Start == macho::SymtabLoadCommandSize);
+}
 
-    case ARM::fixup_arm_movt_hi16:
-    case ARM::fixup_arm_movt_hi16_pcrel:
-    case ARM::fixup_t2_movt_hi16:
-    case ARM::fixup_t2_movt_hi16_pcrel:
-      RelocType = unsigned(macho::RIT_ARM_HalfDifference);
-      // Report as 'long', even though that is not quite accurate.
-      Log2Size = llvm::Log2_32(4);
-      return true;
+void MachObjectWriter::WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol,
+                                                uint32_t NumLocalSymbols,
+                                                uint32_t FirstExternalSymbol,
+                                                uint32_t NumExternalSymbols,
+                                                uint32_t FirstUndefinedSymbol,
+                                                uint32_t NumUndefinedSymbols,
+                                                uint32_t IndirectSymbolOffset,
+                                                uint32_t NumIndirectSymbols) {
+  // struct dysymtab_command (80 bytes)
+
+  uint64_t Start = OS.tell();
+  (void) Start;
+
+  Write32(macho::LCT_Dysymtab);
+  Write32(macho::DysymtabLoadCommandSize);
+  Write32(FirstLocalSymbol);
+  Write32(NumLocalSymbols);
+  Write32(FirstExternalSymbol);
+  Write32(NumExternalSymbols);
+  Write32(FirstUndefinedSymbol);
+  Write32(NumUndefinedSymbols);
+  Write32(0); // tocoff
+  Write32(0); // ntoc
+  Write32(0); // modtaboff
+  Write32(0); // nmodtab
+  Write32(0); // extrefsymoff
+  Write32(0); // nextrefsyms
+  Write32(IndirectSymbolOffset);
+  Write32(NumIndirectSymbols);
+  Write32(0); // extreloff
+  Write32(0); // nextrel
+  Write32(0); // locreloff
+  Write32(0); // nlocrel
+
+  assert(OS.tell() - Start == macho::DysymtabLoadCommandSize);
+}
 
-    case ARM::fixup_arm_movw_lo16:
-    case ARM::fixup_arm_movw_lo16_pcrel:
-    case ARM::fixup_t2_movw_lo16:
-    case ARM::fixup_t2_movw_lo16_pcrel:
-      RelocType = unsigned(macho::RIT_ARM_Half);
-      // Report as 'long', even though that is not quite accurate.
-      Log2Size = llvm::Log2_32(4);
-      return true;
+void MachObjectWriter::WriteNlist(MachSymbolData &MSD,
+                                  const MCAsmLayout &Layout) {
+  MCSymbolData &Data = *MSD.SymbolData;
+  const MCSymbol &Symbol = Data.getSymbol();
+  uint8_t Type = 0;
+  uint16_t Flags = Data.getFlags();
+  uint32_t Address = 0;
+
+  // Set the N_TYPE bits. See <mach-o/nlist.h>.
+  //
+  // FIXME: Are the prebound or indirect fields possible here?
+  if (Symbol.isUndefined())
+    Type = macho::STT_Undefined;
+  else if (Symbol.isAbsolute())
+    Type = macho::STT_Absolute;
+  else
+    Type = macho::STT_Section;
+
+  // FIXME: Set STAB bits.
+
+  if (Data.isPrivateExtern())
+    Type |= macho::STF_PrivateExtern;
+
+  // Set external bit.
+  if (Data.isExternal() || Symbol.isUndefined())
+    Type |= macho::STF_External;
+
+  // Compute the symbol address.
+  if (Symbol.isDefined()) {
+    if (Symbol.isAbsolute()) {
+      Address = cast<MCConstantExpr>(Symbol.getVariableValue())->getValue();
+    } else {
+      Address = getSymbolAddress(&Data, Layout);
     }
-  }
-  void RecordARMRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
-                           const MCFragment *Fragment, const MCFixup &Fixup,
-                           MCValue Target, uint64_t &FixedValue) {
-    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
-    unsigned Log2Size;
-    unsigned RelocType = macho::RIT_Vanilla;
-    if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) {
-      report_fatal_error("unknown ARM fixup kind!");
-      return;
+  } else if (Data.isCommon()) {
+    // Common symbols are encoded with the size in the address
+    // field, and their alignment in the flags.
+    Address = Data.getCommonSize();
+
+    // Common alignment is packed into the 'desc' bits.
+    if (unsigned Align = Data.getCommonAlignment()) {
+      unsigned Log2Size = Log2_32(Align);
+      assert((1U << Log2Size) == Align && "Invalid 'common' alignment!");
+      if (Log2Size > 15)
+        report_fatal_error("invalid 'common' alignment '" +
+                           Twine(Align) + "'");
+      // FIXME: Keep this mask with the SymbolFlags enumeration.
+      Flags = (Flags & 0xF0FF) | (Log2Size << 8);
     }
+  }
 
-    // If this is a difference or a defined symbol plus an offset, then we need
-    // a scattered relocation entry.  Differences always require scattered
-    // relocations.
-    if (Target.getSymB()) {
-      if (RelocType == macho::RIT_ARM_Half ||
-          RelocType == macho::RIT_ARM_HalfDifference)
-        return RecordARMMovwMovtRelocation(Asm, Layout, Fragment, Fixup,
-                                           Target, FixedValue);
-      return RecordARMScatteredRelocation(Asm, Layout, Fragment, Fixup,
-                                          Target, Log2Size, FixedValue);
-    }
+  // struct nlist (12 bytes)
 
-    // Get the symbol data, if any.
-    MCSymbolData *SD = 0;
-    if (Target.getSymA())
-      SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+  Write32(MSD.StringIndex);
+  Write8(Type);
+  Write8(MSD.SectionIndex);
 
-    // FIXME: For other platforms, we need to use scattered relocations for
-    // internal relocations with offsets.  If this is an internal relocation
-    // with an offset, it also needs a scattered relocation entry.
-    //
-    // Is this right for ARM?
-    uint32_t Offset = Target.getConstant();
-    if (IsPCRel && RelocType == macho::RIT_Vanilla)
-      Offset += 1 << Log2Size;
-    if (Offset && SD && !doesSymbolRequireExternRelocation(SD))
-      return RecordARMScatteredRelocation(Asm, Layout, Fragment, Fixup, Target,
-                                          Log2Size, FixedValue);
-
-    // See <reloc.h>.
-    uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
-    unsigned Index = 0;
-    unsigned IsExtern = 0;
-    unsigned Type = 0;
-
-    if (Target.isAbsolute()) { // constant
-      // FIXME!
-      report_fatal_error("FIXME: relocations to absolute targets "
-                         "not yet implemented");
-    } else {
-      // Resolve constant variables.
-      if (SD->getSymbol().isVariable()) {
-        int64_t Res;
-        if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
-              Res, Layout, SectionAddress)) {
-          FixedValue = Res;
-          return;
-        }
-      }
+  // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
+  // value.
+  Write16(Flags);
+  if (is64Bit())
+    Write64(Address);
+  else
+    Write32(Address);
+}
 
-      // Check whether we need an external or internal relocation.
-      if (doesSymbolRequireExternRelocation(SD)) {
-        IsExtern = 1;
-        Index = SD->getIndex();
-        // For external relocations, make sure to offset the fixup value to
-        // compensate for the addend of the symbol address, if it was
-        // undefined. This occurs with weak definitions, for example.
-        if (!SD->Symbol->isUndefined())
-          FixedValue -= Layout.getSymbolOffset(SD);
-      } else {
-        // The index is the section ordinal (1-based).
-        const MCSectionData &SymSD = Asm.getSectionData(
-          SD->getSymbol().getSection());
-        Index = SymSD.getOrdinal() + 1;
-        FixedValue += getSectionAddress(&SymSD);
-      }
-      if (IsPCRel)
-        FixedValue -= getSectionAddress(Fragment->getParent());
+void MachObjectWriter::RecordX86_64Relocation(const MCAssembler &Asm,
+                                              const MCAsmLayout &Layout,
+                                              const MCFragment *Fragment,
+                                              const MCFixup &Fixup,
+                                              MCValue Target,
+                                              uint64_t &FixedValue) {
+  unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
+  unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind());
+  unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
+
+  // See <reloc.h>.
+  uint32_t FixupOffset =
+    Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
+  uint32_t FixupAddress =
+    getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
+  int64_t Value = 0;
+  unsigned Index = 0;
+  unsigned IsExtern = 0;
+  unsigned Type = 0;
+
+  Value = Target.getConstant();
+
+  if (IsPCRel) {
+    // Compensate for the relocation offset, Darwin x86_64 relocations only have
+    // the addend and appear to have attempted to define it to be the actual
+    // expression addend without the PCrel bias. However, instructions with data
+    // following the relocation are not accommodated for (see comment below
+    // regarding SIGNED{1,2,4}), so it isn't exactly that either.
+    Value += 1LL << Log2Size;
+  }
+
+  if (Target.isAbsolute()) { // constant
+    // SymbolNum of 0 indicates the absolute section.
+    Type = macho::RIT_X86_64_Unsigned;
+    Index = 0;
+
+    // FIXME: I believe this is broken, I don't think the linker can understand
+    // it. I think it would require a local relocation, but I'm not sure if that
+    // would work either. The official way to get an absolute PCrel relocation
+    // is to use an absolute symbol (which we don't support yet).
+    if (IsPCRel) {
+      IsExtern = 1;
+      Type = macho::RIT_X86_64_Branch;
+    }
+  } else if (Target.getSymB()) { // A - B + constant
+    const MCSymbol *A = &Target.getSymA()->getSymbol();
+    MCSymbolData &A_SD = Asm.getSymbolData(*A);
+    const MCSymbolData *A_Base = Asm.getAtom(&A_SD);
+
+    const MCSymbol *B = &Target.getSymB()->getSymbol();
+    MCSymbolData &B_SD = Asm.getSymbolData(*B);
+    const MCSymbolData *B_Base = Asm.getAtom(&B_SD);
 
-      // The type is determined by the fixup kind.
-      Type = RelocType;
+    // Neither symbol can be modified.
+    if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
+        Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
+      report_fatal_error("unsupported relocation of modified symbol");
+
+    // We don't support PCrel relocations of differences. Darwin 'as' doesn't
+    // implement most of these correctly.
+    if (IsPCRel)
+      report_fatal_error("unsupported pc-relative relocation of difference");
+
+    // The support for the situation where one or both of the symbols would
+    // require a local relocation is handled just like if the symbols were
+    // external.  This is certainly used in the case of debug sections where the
+    // section has only temporary symbols and thus the symbols don't have base
+    // symbols.  This is encoded using the section ordinal and non-extern
+    // relocation entries.
+
+    // Darwin 'as' doesn't emit correct relocations for this (it ends up with a
+    // single SIGNED relocation); reject it for now.  Except the case where both
+    // symbols don't have a base, equal but both NULL.
+    if (A_Base == B_Base && A_Base)
+      report_fatal_error("unsupported relocation with identical base");
+
+    Value += getSymbolAddress(&A_SD, Layout) -
+      (A_Base == NULL ? 0 : getSymbolAddress(A_Base, Layout));
+    Value -= getSymbolAddress(&B_SD, Layout) -
+      (B_Base == NULL ? 0 : getSymbolAddress(B_Base, Layout));
+
+    if (A_Base) {
+      Index = A_Base->getIndex();
+      IsExtern = 1;
+    }
+    else {
+      Index = A_SD.getFragment()->getParent()->getOrdinal() + 1;
+      IsExtern = 0;
     }
+    Type = macho::RIT_X86_64_Unsigned;
 
-    // struct relocation_info (8 bytes)
     macho::RelocationEntry MRE;
     MRE.Word0 = FixupOffset;
     MRE.Word1 = ((Index     <<  0) |
@@ -1098,523 +655,1092 @@ public:
                  (IsExtern  << 27) |
                  (Type      << 28));
     Relocations[Fragment->getParent()].push_back(MRE);
-  }
 
-  void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
-                        const MCFragment *Fragment, const MCFixup &Fixup,
-                        MCValue Target, uint64_t &FixedValue) {
-    // FIXME: These needs to be factored into the target Mach-O writer.
-    if (isARM()) {
-      RecordARMRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue);
-      return;
+    if (B_Base) {
+      Index = B_Base->getIndex();
+      IsExtern = 1;
     }
-    if (is64Bit()) {
-      RecordX86_64Relocation(Asm, Layout, Fragment, Fixup, Target, FixedValue);
-      return;
+    else {
+      Index = B_SD.getFragment()->getParent()->getOrdinal() + 1;
+      IsExtern = 0;
     }
-
-    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
-    unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
-
-    // If this is a 32-bit TLVP reloc it's handled a bit differently.
-    if (Target.getSymA() &&
-        Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) {
-      RecordTLVPRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue);
-      return;
+    Type = macho::RIT_X86_64_Subtractor;
+  } else {
+    const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
+    MCSymbolData &SD = Asm.getSymbolData(*Symbol);
+    const MCSymbolData *Base = Asm.getAtom(&SD);
+
+    // Relocations inside debug sections always use local relocations when
+    // possible. This seems to be done because the debugger doesn't fully
+    // understand x86_64 relocation entries, and expects to find values that
+    // have already been fixed up.
+    if (Symbol->isInSection()) {
+      const MCSectionMachO &Section = static_cast<const MCSectionMachO&>(
+        Fragment->getParent()->getSection());
+      if (Section.hasAttribute(MCSectionMachO::S_ATTR_DEBUG))
+        Base = 0;
     }
 
-    // If this is a difference or a defined symbol plus an offset, then we need
-    // a scattered relocation entry.
-    // Differences always require scattered relocations.
-    if (Target.getSymB())
-        return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup,
-                                         Target, Log2Size, FixedValue);
+    // x86_64 almost always uses external relocations, except when there is no
+    // symbol to use as a base address (a local symbol with no preceding
+    // non-local symbol).
+    if (Base) {
+      Index = Base->getIndex();
+      IsExtern = 1;
+
+      // Add the local offset, if needed.
+      if (Base != &SD)
+        Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base);
+    } else if (Symbol->isInSection() && !Symbol->isVariable()) {
+      // The index is the section ordinal (1-based).
+      Index = SD.getFragment()->getParent()->getOrdinal() + 1;
+      IsExtern = 0;
+      Value += getSymbolAddress(&SD, Layout);
 
-    // Get the symbol data, if any.
-    MCSymbolData *SD = 0;
-    if (Target.getSymA())
-      SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
-
-    // If this is an internal relocation with an offset, it also needs a
-    // scattered relocation entry.
-    uint32_t Offset = Target.getConstant();
-    if (IsPCRel)
-      Offset += 1 << Log2Size;
-    if (Offset && SD && !doesSymbolRequireExternRelocation(SD))
-      return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup,
-                                       Target, Log2Size, FixedValue);
-
-    // See <reloc.h>.
-    uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
-    unsigned Index = 0;
-    unsigned IsExtern = 0;
-    unsigned Type = 0;
-
-    if (Target.isAbsolute()) { // constant
-      // SymbolNum of 0 indicates the absolute section.
-      //
-      // FIXME: Currently, these are never generated (see code below). I cannot
-      // find a case where they are actually emitted.
-      Type = macho::RIT_Vanilla;
-    } else {
-      // Resolve constant variables.
-      if (SD->getSymbol().isVariable()) {
-        int64_t Res;
-        if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
-              Res, Layout, SectionAddress)) {
-          FixedValue = Res;
-          return;
-        }
+      if (IsPCRel)
+        Value -= FixupAddress + (1 << Log2Size);
+    } else if (Symbol->isVariable()) {
+      const MCExpr *Value = Symbol->getVariableValue();
+      int64_t Res;
+      bool isAbs = Value->EvaluateAsAbsolute(Res, Layout, SectionAddress);
+      if (isAbs) {
+        FixedValue = Res;
+        return;
+      } else {
+        report_fatal_error("unsupported relocation of variable '" +
+                           Symbol->getName() + "'");
       }
+    } else {
+      report_fatal_error("unsupported relocation of undefined symbol '" +
+                         Symbol->getName() + "'");
+    }
 
-      // Check whether we need an external or internal relocation.
-      if (doesSymbolRequireExternRelocation(SD)) {
-        IsExtern = 1;
-        Index = SD->getIndex();
-        // For external relocations, make sure to offset the fixup value to
-        // compensate for the addend of the symbol address, if it was
-        // undefined. This occurs with weak definitions, for example.
-        if (!SD->Symbol->isUndefined())
-          FixedValue -= Layout.getSymbolOffset(SD);
+    MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind();
+    if (IsPCRel) {
+      if (IsRIPRel) {
+        if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
+          // x86_64 distinguishes movq foo@GOTPCREL so that the linker can
+          // rewrite the movq to an leaq at link time if the symbol ends up in
+          // the same linkage unit.
+          if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load)
+            Type = macho::RIT_X86_64_GOTLoad;
+          else
+            Type = macho::RIT_X86_64_GOT;
+        }  else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
+          Type = macho::RIT_X86_64_TLV;
+        }  else if (Modifier != MCSymbolRefExpr::VK_None) {
+          report_fatal_error("unsupported symbol modifier in relocation");
+        } else {
+          Type = macho::RIT_X86_64_Signed;
+
+          // The Darwin x86_64 relocation format has a problem where it cannot
+          // encode an address (L<foo> + <constant>) which is outside the atom
+          // containing L<foo>. Generally, this shouldn't occur but it does
+          // happen when we have a RIPrel instruction with data following the
+          // relocation entry (e.g., movb $012, L0(%rip)). Even with the PCrel
+          // adjustment Darwin x86_64 uses, the offset is still negative and the
+          // linker has no way to recognize this.
+          //
+          // To work around this, Darwin uses several special relocation types
+          // to indicate the offsets. However, the specification or
+          // implementation of these seems to also be incomplete; they should
+          // adjust the addend as well based on the actual encoded instruction
+          // (the additional bias), but instead appear to just look at the final
+          // offset.
+          switch (-(Target.getConstant() + (1LL << Log2Size))) {
+          case 1: Type = macho::RIT_X86_64_Signed1; break;
+          case 2: Type = macho::RIT_X86_64_Signed2; break;
+          case 4: Type = macho::RIT_X86_64_Signed4; break;
+          }
+        }
       } else {
-        // The index is the section ordinal (1-based).
-        const MCSectionData &SymSD = Asm.getSectionData(
-          SD->getSymbol().getSection());
-        Index = SymSD.getOrdinal() + 1;
-        FixedValue += getSectionAddress(&SymSD);
-      }
-      if (IsPCRel)
-        FixedValue -= getSectionAddress(Fragment->getParent());
+        if (Modifier != MCSymbolRefExpr::VK_None)
+          report_fatal_error("unsupported symbol modifier in branch "
+                             "relocation");
 
-      Type = macho::RIT_Vanilla;
+        Type = macho::RIT_X86_64_Branch;
+      }
+    } else {
+      if (Modifier == MCSymbolRefExpr::VK_GOT) {
+        Type = macho::RIT_X86_64_GOT;
+      } else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
+        // GOTPCREL is allowed as a modifier on non-PCrel instructions, in which
+        // case all we do is set the PCrel bit in the relocation entry; this is
+        // used with exception handling, for example. The source is required to
+        // include any necessary offset directly.
+        Type = macho::RIT_X86_64_GOT;
+        IsPCRel = 1;
+      } else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
+        report_fatal_error("TLVP symbol modifier should have been rip-rel");
+      } else if (Modifier != MCSymbolRefExpr::VK_None)
+        report_fatal_error("unsupported symbol modifier in relocation");
+      else
+        Type = macho::RIT_X86_64_Unsigned;
     }
+  }
+
+  // x86_64 always writes custom values into the fixups.
+  FixedValue = Value;
+
+  // struct relocation_info (8 bytes)
+  macho::RelocationEntry MRE;
+  MRE.Word0 = FixupOffset;
+  MRE.Word1 = ((Index     <<  0) |
+               (IsPCRel   << 24) |
+               (Log2Size  << 25) |
+               (IsExtern  << 27) |
+               (Type      << 28));
+  Relocations[Fragment->getParent()].push_back(MRE);
+}
 
-    // struct relocation_info (8 bytes)
+void MachObjectWriter::RecordScatteredRelocation(const MCAssembler &Asm,
+                                                 const MCAsmLayout &Layout,
+                                                 const MCFragment *Fragment,
+                                                 const MCFixup &Fixup,
+                                                 MCValue Target,
+                                                 unsigned Log2Size,
+                                                 uint64_t &FixedValue) {
+  uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+  unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
+  unsigned Type = macho::RIT_Vanilla;
+
+  // See <reloc.h>.
+  const MCSymbol *A = &Target.getSymA()->getSymbol();
+  MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+  if (!A_SD->getFragment())
+    report_fatal_error("symbol '" + A->getName() +
+                       "' can not be undefined in a subtraction expression");
+
+  uint32_t Value = getSymbolAddress(A_SD, Layout);
+  uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent());
+  FixedValue += SecAddr;
+  uint32_t Value2 = 0;
+
+  if (const MCSymbolRefExpr *B = Target.getSymB()) {
+    MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+    if (!B_SD->getFragment())
+      report_fatal_error("symbol '" + B->getSymbol().getName() +
+                         "' can not be undefined in a subtraction expression");
+
+    // Select the appropriate difference relocation type.
+    //
+    // Note that there is no longer any semantic difference between these two
+    // relocation types from the linkers point of view, this is done solely for
+    // pedantic compatibility with 'as'.
+    Type = A_SD->isExternal() ? (unsigned)macho::RIT_Difference :
+      (unsigned)macho::RIT_Generic_LocalDifference;
+    Value2 = getSymbolAddress(B_SD, Layout);
+    FixedValue -= getSectionAddress(B_SD->getFragment()->getParent());
+  }
+
+  // Relocations are written out in reverse order, so the PAIR comes first.
+  if (Type == macho::RIT_Difference ||
+      Type == macho::RIT_Generic_LocalDifference) {
     macho::RelocationEntry MRE;
-    MRE.Word0 = FixupOffset;
-    MRE.Word1 = ((Index     <<  0) |
-                 (IsPCRel   << 24) |
-                 (Log2Size  << 25) |
-                 (IsExtern  << 27) |
-                 (Type      << 28));
+    MRE.Word0 = ((0         <<  0) |
+                 (macho::RIT_Pair  << 24) |
+                 (Log2Size  << 28) |
+                 (IsPCRel   << 30) |
+                 macho::RF_Scattered);
+    MRE.Word1 = Value2;
     Relocations[Fragment->getParent()].push_back(MRE);
   }
 
-  void BindIndirectSymbols(MCAssembler &Asm) {
-    // This is the point where 'as' creates actual symbols for indirect symbols
-    // (in the following two passes). It would be easier for us to do this
-    // sooner when we see the attribute, but that makes getting the order in the
-    // symbol table much more complicated than it is worth.
-    //
-    // FIXME: Revisit this when the dust settles.
+  macho::RelocationEntry MRE;
+  MRE.Word0 = ((FixupOffset <<  0) |
+               (Type        << 24) |
+               (Log2Size    << 28) |
+               (IsPCRel     << 30) |
+               macho::RF_Scattered);
+  MRE.Word1 = Value;
+  Relocations[Fragment->getParent()].push_back(MRE);
+}
 
-    // Bind non lazy symbol pointers first.
-    unsigned IndirectIndex = 0;
-    for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
-           ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
-      const MCSectionMachO &Section =
-        cast<MCSectionMachO>(it->SectionData->getSection());
+void MachObjectWriter::RecordARMScatteredRelocation(const MCAssembler &Asm,
+                                                    const MCAsmLayout &Layout,
+                                                    const MCFragment *Fragment,
+                                                    const MCFixup &Fixup,
+                                                    MCValue Target,
+                                                    unsigned Log2Size,
+                                                    uint64_t &FixedValue) {
+  uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+  unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
+  unsigned Type = macho::RIT_Vanilla;
+
+  // See <reloc.h>.
+  const MCSymbol *A = &Target.getSymA()->getSymbol();
+  MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+  if (!A_SD->getFragment())
+    report_fatal_error("symbol '" + A->getName() +
+                       "' can not be undefined in a subtraction expression");
+
+  uint32_t Value = getSymbolAddress(A_SD, Layout);
+  uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent());
+  FixedValue += SecAddr;
+  uint32_t Value2 = 0;
+
+  if (const MCSymbolRefExpr *B = Target.getSymB()) {
+    MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+    if (!B_SD->getFragment())
+      report_fatal_error("symbol '" + B->getSymbol().getName() +
+                         "' can not be undefined in a subtraction expression");
+
+    // Select the appropriate difference relocation type.
+    Type = macho::RIT_Difference;
+    Value2 = getSymbolAddress(B_SD, Layout);
+    FixedValue -= getSectionAddress(B_SD->getFragment()->getParent());
+  }
 
-      if (Section.getType() != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS)
-        continue;
+  // Relocations are written out in reverse order, so the PAIR comes first.
+  if (Type == macho::RIT_Difference ||
+      Type == macho::RIT_Generic_LocalDifference) {
+    macho::RelocationEntry MRE;
+    MRE.Word0 = ((0         <<  0) |
+                 (macho::RIT_Pair  << 24) |
+                 (Log2Size  << 28) |
+                 (IsPCRel   << 30) |
+                 macho::RF_Scattered);
+    MRE.Word1 = Value2;
+    Relocations[Fragment->getParent()].push_back(MRE);
+  }
 
-      // Initialize the section indirect symbol base, if necessary.
-      if (!IndirectSymBase.count(it->SectionData))
-        IndirectSymBase[it->SectionData] = IndirectIndex;
+  macho::RelocationEntry MRE;
+  MRE.Word0 = ((FixupOffset <<  0) |
+               (Type        << 24) |
+               (Log2Size    << 28) |
+               (IsPCRel     << 30) |
+               macho::RF_Scattered);
+  MRE.Word1 = Value;
+  Relocations[Fragment->getParent()].push_back(MRE);
+}
 
-      Asm.getOrCreateSymbolData(*it->Symbol);
-    }
+void MachObjectWriter::RecordARMMovwMovtRelocation(const MCAssembler &Asm,
+                                                   const MCAsmLayout &Layout,
+                                                   const MCFragment *Fragment,
+                                                   const MCFixup &Fixup,
+                                                   MCValue Target,
+                                                   uint64_t &FixedValue) {
+  uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+  unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
+  unsigned Type = macho::RIT_ARM_Half;
+
+  // See <reloc.h>.
+  const MCSymbol *A = &Target.getSymA()->getSymbol();
+  MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+  if (!A_SD->getFragment())
+    report_fatal_error("symbol '" + A->getName() +
+                       "' can not be undefined in a subtraction expression");
+
+  uint32_t Value = getSymbolAddress(A_SD, Layout);
+  uint32_t Value2 = 0;
+  uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent());
+  FixedValue += SecAddr;
+
+  if (const MCSymbolRefExpr *B = Target.getSymB()) {
+    MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+    if (!B_SD->getFragment())
+      report_fatal_error("symbol '" + B->getSymbol().getName() +
+                         "' can not be undefined in a subtraction expression");
+
+    // Select the appropriate difference relocation type.
+    Type = macho::RIT_ARM_HalfDifference;
+    Value2 = getSymbolAddress(B_SD, Layout);
+    FixedValue -= getSectionAddress(B_SD->getFragment()->getParent());
+  }
 
-    // Then lazy symbol pointers and symbol stubs.
-    IndirectIndex = 0;
-    for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
-           ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
-      const MCSectionMachO &Section =
-        cast<MCSectionMachO>(it->SectionData->getSection());
-
-      if (Section.getType() != MCSectionMachO::S_LAZY_SYMBOL_POINTERS &&
-          Section.getType() != MCSectionMachO::S_SYMBOL_STUBS)
-        continue;
-
-      // Initialize the section indirect symbol base, if necessary.
-      if (!IndirectSymBase.count(it->SectionData))
-        IndirectSymBase[it->SectionData] = IndirectIndex;
-
-      // Set the symbol type to undefined lazy, but only on construction.
-      //
-      // FIXME: Do not hardcode.
-      bool Created;
-      MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created);
-      if (Created)
-        Entry.setFlags(Entry.getFlags() | 0x0001);
-    }
+  // Relocations are written out in reverse order, so the PAIR comes first.
+  // ARM_RELOC_HALF and ARM_RELOC_HALF_SECTDIFF abuse the r_length field:
+  //
+  // For these two r_type relocations they always have a pair following them and
+  // the r_length bits are used differently.  The encoding of the r_length is as
+  // follows:
+  //   low bit of r_length:
+  //      0 - :lower16: for movw instructions
+  //      1 - :upper16: for movt instructions
+  //   high bit of r_length:
+  //      0 - arm instructions
+  //      1 - thumb instructions
+  // the other half of the relocated expression is in the following pair
+  // relocation entry in the the low 16 bits of r_address field.
+  unsigned ThumbBit = 0;
+  unsigned MovtBit = 0;
+  switch ((unsigned)Fixup.getKind()) {
+  default: break;
+  case ARM::fixup_arm_movt_hi16:
+  case ARM::fixup_arm_movt_hi16_pcrel:
+    MovtBit = 1;
+    break;
+  case ARM::fixup_t2_movt_hi16:
+  case ARM::fixup_t2_movt_hi16_pcrel:
+    MovtBit = 1;
+    // Fallthrough
+  case ARM::fixup_t2_movw_lo16:
+  case ARM::fixup_t2_movw_lo16_pcrel:
+    ThumbBit = 1;
+    break;
   }
 
-  /// ComputeSymbolTable - Compute the symbol table data
-  ///
-  /// \param StringTable [out] - The string table data.
-  /// \param StringIndexMap [out] - Map from symbol names to offsets in the
-  /// string table.
-  void ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
-                          std::vector<MachSymbolData> &LocalSymbolData,
-                          std::vector<MachSymbolData> &ExternalSymbolData,
-                          std::vector<MachSymbolData> &UndefinedSymbolData) {
-    // Build section lookup table.
-    DenseMap<const MCSection*, uint8_t> SectionIndexMap;
-    unsigned Index = 1;
-    for (MCAssembler::iterator it = Asm.begin(),
-           ie = Asm.end(); it != ie; ++it, ++Index)
-      SectionIndexMap[&it->getSection()] = Index;
-    assert(Index <= 256 && "Too many sections!");
-
-    // Index 0 is always the empty string.
-    StringMap<uint64_t> StringIndexMap;
-    StringTable += '\x00';
 
-    // Build the symbol arrays and the string table, but only for non-local
-    // symbols.
-    //
-    // The particular order that we collect the symbols and create the string
-    // table, then sort the symbols is chosen to match 'as'. Even though it
-    // doesn't matter for correctness, this is important for letting us diff .o
-    // files.
-    for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
-           ie = Asm.symbol_end(); it != ie; ++it) {
-      const MCSymbol &Symbol = it->getSymbol();
-
-      // Ignore non-linker visible symbols.
-      if (!Asm.isSymbolLinkerVisible(it->getSymbol()))
-        continue;
-
-      if (!it->isExternal() && !Symbol.isUndefined())
-        continue;
-
-      uint64_t &Entry = StringIndexMap[Symbol.getName()];
-      if (!Entry) {
-        Entry = StringTable.size();
-        StringTable += Symbol.getName();
-        StringTable += '\x00';
-      }
+  if (Type == macho::RIT_ARM_HalfDifference) {
+    uint32_t OtherHalf = MovtBit
+      ? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16);
 
-      MachSymbolData MSD;
-      MSD.SymbolData = it;
-      MSD.StringIndex = Entry;
+    macho::RelocationEntry MRE;
+    MRE.Word0 = ((OtherHalf       <<  0) |
+                 (macho::RIT_Pair << 24) |
+                 (MovtBit         << 28) |
+                 (ThumbBit        << 29) |
+                 (IsPCRel         << 30) |
+                 macho::RF_Scattered);
+    MRE.Word1 = Value2;
+    Relocations[Fragment->getParent()].push_back(MRE);
+  }
 
-      if (Symbol.isUndefined()) {
-        MSD.SectionIndex = 0;
-        UndefinedSymbolData.push_back(MSD);
-      } else if (Symbol.isAbsolute()) {
-        MSD.SectionIndex = 0;
-        ExternalSymbolData.push_back(MSD);
-      } else {
-        MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
-        assert(MSD.SectionIndex && "Invalid section index!");
-        ExternalSymbolData.push_back(MSD);
-      }
-    }
+  macho::RelocationEntry MRE;
+  MRE.Word0 = ((FixupOffset <<  0) |
+               (Type        << 24) |
+               (MovtBit     << 28) |
+               (ThumbBit    << 29) |
+               (IsPCRel     << 30) |
+               macho::RF_Scattered);
+  MRE.Word1 = Value;
+  Relocations[Fragment->getParent()].push_back(MRE);
+}
 
-    // Now add the data for local symbols.
-    for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
-           ie = Asm.symbol_end(); it != ie; ++it) {
-      const MCSymbol &Symbol = it->getSymbol();
+void MachObjectWriter::RecordTLVPRelocation(const MCAssembler &Asm,
+                                            const MCAsmLayout &Layout,
+                                            const MCFragment *Fragment,
+                                            const MCFixup &Fixup,
+                                            MCValue Target,
+                                            uint64_t &FixedValue) {
+  assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP &&
+         !is64Bit() &&
+         "Should only be called with a 32-bit TLVP relocation!");
+
+  unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
+  uint32_t Value = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+  unsigned IsPCRel = 0;
+
+  // Get the symbol data.
+  MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+  unsigned Index = SD_A->getIndex();
+
+  // We're only going to have a second symbol in pic mode and it'll be a
+  // subtraction from the picbase. For 32-bit pic the addend is the difference
+  // between the picbase and the next address.  For 32-bit static the addend is
+  // zero.
+  if (Target.getSymB()) {
+    // If this is a subtraction then we're pcrel.
+    uint32_t FixupAddress =
+      getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
+    MCSymbolData *SD_B = &Asm.getSymbolData(Target.getSymB()->getSymbol());
+    IsPCRel = 1;
+    FixedValue = (FixupAddress - getSymbolAddress(SD_B, Layout) +
+                  Target.getConstant());
+    FixedValue += 1ULL << Log2Size;
+  } else {
+    FixedValue = 0;
+  }
 
-      // Ignore non-linker visible symbols.
-      if (!Asm.isSymbolLinkerVisible(it->getSymbol()))
-        continue;
+  // struct relocation_info (8 bytes)
+  macho::RelocationEntry MRE;
+  MRE.Word0 = Value;
+  MRE.Word1 = ((Index                  <<  0) |
+               (IsPCRel                << 24) |
+               (Log2Size               << 25) |
+               (1                      << 27) | // Extern
+               (macho::RIT_Generic_TLV << 28)); // Type
+  Relocations[Fragment->getParent()].push_back(MRE);
+}
 
-      if (it->isExternal() || Symbol.isUndefined())
-        continue;
+bool MachObjectWriter::getARMFixupKindMachOInfo(unsigned Kind,
+                                                unsigned &RelocType,
+                                                unsigned &Log2Size) {
+  RelocType = unsigned(macho::RIT_Vanilla);
+  Log2Size = ~0U;
 
-      uint64_t &Entry = StringIndexMap[Symbol.getName()];
-      if (!Entry) {
-        Entry = StringTable.size();
-        StringTable += Symbol.getName();
-        StringTable += '\x00';
-      }
+  switch (Kind) {
+  default:
+    return false;
 
-      MachSymbolData MSD;
-      MSD.SymbolData = it;
-      MSD.StringIndex = Entry;
+  case FK_Data_1:
+    Log2Size = llvm::Log2_32(1);
+    return true;
+  case FK_Data_2:
+    Log2Size = llvm::Log2_32(2);
+    return true;
+  case FK_Data_4:
+    Log2Size = llvm::Log2_32(4);
+    return true;
+  case FK_Data_8:
+    Log2Size = llvm::Log2_32(8);
+    return true;
 
-      if (Symbol.isAbsolute()) {
-        MSD.SectionIndex = 0;
-        LocalSymbolData.push_back(MSD);
-      } else {
-        MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
-        assert(MSD.SectionIndex && "Invalid section index!");
-        LocalSymbolData.push_back(MSD);
-      }
-    }
+    // Handle 24-bit branch kinds.
+  case ARM::fixup_arm_ldst_pcrel_12:
+  case ARM::fixup_arm_pcrel_10:
+  case ARM::fixup_arm_adr_pcrel_12:
+  case ARM::fixup_arm_condbranch:
+  case ARM::fixup_arm_uncondbranch:
+    RelocType = unsigned(macho::RIT_ARM_Branch24Bit);
+    // Report as 'long', even though that is not quite accurate.
+    Log2Size = llvm::Log2_32(4);
+    return true;
 
-    // External and undefined symbols are required to be in lexicographic order.
-    std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
-    std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end());
+    // Handle Thumb branches.
+  case ARM::fixup_arm_thumb_br:
+    RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
+    Log2Size = llvm::Log2_32(2);
+    return true;
+      
+  case ARM::fixup_arm_thumb_bl:
+  case ARM::fixup_arm_thumb_blx:
+    RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
+    Log2Size = llvm::Log2_32(4);
+    return true;
 
-    // Set the symbol indices.
-    Index = 0;
-    for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
-      LocalSymbolData[i].SymbolData->setIndex(Index++);
-    for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
-      ExternalSymbolData[i].SymbolData->setIndex(Index++);
-    for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
-      UndefinedSymbolData[i].SymbolData->setIndex(Index++);
+  case ARM::fixup_arm_movt_hi16:
+  case ARM::fixup_arm_movt_hi16_pcrel:
+  case ARM::fixup_t2_movt_hi16:
+  case ARM::fixup_t2_movt_hi16_pcrel:
+    RelocType = unsigned(macho::RIT_ARM_HalfDifference);
+    // Report as 'long', even though that is not quite accurate.
+    Log2Size = llvm::Log2_32(4);
+    return true;
 
-    // The string table is padded to a multiple of 4.
-    while (StringTable.size() % 4)
-      StringTable += '\x00';
+  case ARM::fixup_arm_movw_lo16:
+  case ARM::fixup_arm_movw_lo16_pcrel:
+  case ARM::fixup_t2_movw_lo16:
+  case ARM::fixup_t2_movw_lo16_pcrel:
+    RelocType = unsigned(macho::RIT_ARM_Half);
+    // Report as 'long', even though that is not quite accurate.
+    Log2Size = llvm::Log2_32(4);
+    return true;
+  }
+}
+void MachObjectWriter::RecordARMRelocation(const MCAssembler &Asm,
+                                           const MCAsmLayout &Layout,
+                                           const MCFragment *Fragment,
+                                           const MCFixup &Fixup,
+                                           MCValue Target,
+                                           uint64_t &FixedValue) {
+  unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
+  unsigned Log2Size;
+  unsigned RelocType = macho::RIT_Vanilla;
+  if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) {
+    report_fatal_error("unknown ARM fixup kind!");
+    return;
   }
 
-  void computeSectionAddresses(const MCAssembler &Asm,
-                               const MCAsmLayout &Layout) {
-    uint64_t StartAddress = 0;
-    const SmallVectorImpl<MCSectionData*> &Order = Layout.getSectionOrder();
-    for (int i = 0, n = Order.size(); i != n ; ++i) {
-      const MCSectionData *SD = Order[i];
-      StartAddress = RoundUpToAlignment(StartAddress, SD->getAlignment());
-      SectionAddress[SD] = StartAddress;
-      StartAddress += Layout.getSectionAddressSize(SD);
-      // Explicitly pad the section to match the alignment requirements of the
-      // following one. This is for 'gas' compatibility, it shouldn't
-      /// strictly be necessary.
-      StartAddress += getPaddingSize(SD, Layout);
-    }
+  // If this is a difference or a defined symbol plus an offset, then we need a
+  // scattered relocation entry.  Differences always require scattered
+  // relocations.
+  if (Target.getSymB()) {
+    if (RelocType == macho::RIT_ARM_Half ||
+        RelocType == macho::RIT_ARM_HalfDifference)
+      return RecordARMMovwMovtRelocation(Asm, Layout, Fragment, Fixup,
+                                         Target, FixedValue);
+    return RecordARMScatteredRelocation(Asm, Layout, Fragment, Fixup,
+                                        Target, Log2Size, FixedValue);
   }
 
-  void ExecutePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) {
-    computeSectionAddresses(Asm, Layout);
+  // Get the symbol data, if any.
+  MCSymbolData *SD = 0;
+  if (Target.getSymA())
+    SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
 
-    // Create symbol data for any indirect symbols.
-    BindIndirectSymbols(Asm);
+  // FIXME: For other platforms, we need to use scattered relocations for
+  // internal relocations with offsets.  If this is an internal relocation with
+  // an offset, it also needs a scattered relocation entry.
+  //
+  // Is this right for ARM?
+  uint32_t Offset = Target.getConstant();
+  if (IsPCRel && RelocType == macho::RIT_Vanilla)
+    Offset += 1 << Log2Size;
+  if (Offset && SD && !doesSymbolRequireExternRelocation(SD))
+    return RecordARMScatteredRelocation(Asm, Layout, Fragment, Fixup, Target,
+                                        Log2Size, FixedValue);
+
+  // See <reloc.h>.
+  uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+  unsigned Index = 0;
+  unsigned IsExtern = 0;
+  unsigned Type = 0;
+
+  if (Target.isAbsolute()) { // constant
+    // FIXME!
+    report_fatal_error("FIXME: relocations to absolute targets "
+                       "not yet implemented");
+  } else {
+    // Resolve constant variables.
+    if (SD->getSymbol().isVariable()) {
+      int64_t Res;
+      if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
+            Res, Layout, SectionAddress)) {
+        FixedValue = Res;
+        return;
+      }
+    }
+
+    // Check whether we need an external or internal relocation.
+    if (doesSymbolRequireExternRelocation(SD)) {
+      IsExtern = 1;
+      Index = SD->getIndex();
+
+      // For external relocations, make sure to offset the fixup value to
+      // compensate for the addend of the symbol address, if it was
+      // undefined. This occurs with weak definitions, for example.
+      if (!SD->Symbol->isUndefined())
+        FixedValue -= Layout.getSymbolOffset(SD);
+    } else {
+      // The index is the section ordinal (1-based).
+      const MCSectionData &SymSD = Asm.getSectionData(
+        SD->getSymbol().getSection());
+      Index = SymSD.getOrdinal() + 1;
+      FixedValue += getSectionAddress(&SymSD);
+    }
+    if (IsPCRel)
+      FixedValue -= getSectionAddress(Fragment->getParent());
 
-    // Compute symbol table information and bind symbol indices.
-    ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData,
-                       UndefinedSymbolData);
+    // The type is determined by the fixup kind.
+    Type = RelocType;
   }
 
-  virtual bool IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
-                                                      const MCSymbolData &DataA,
-                                                      const MCFragment &FB,
-                                                      bool InSet,
-                                                      bool IsPCRel) const {
-    if (InSet)
-      return true;
+  // struct relocation_info (8 bytes)
+  macho::RelocationEntry MRE;
+  MRE.Word0 = FixupOffset;
+  MRE.Word1 = ((Index     <<  0) |
+               (IsPCRel   << 24) |
+               (Log2Size  << 25) |
+               (IsExtern  << 27) |
+               (Type      << 28));
+  Relocations[Fragment->getParent()].push_back(MRE);
+}
 
-    // The effective address is
-    //     addr(atom(A)) + offset(A)
-    //   - addr(atom(B)) - offset(B)
-    // and the offsets are not relocatable, so the fixup is fully resolved when
-    //  addr(atom(A)) - addr(atom(B)) == 0.
-    const MCSymbolData *A_Base = 0, *B_Base = 0;
+void MachObjectWriter::RecordRelocation(const MCAssembler &Asm,
+                                        const MCAsmLayout &Layout,
+                                        const MCFragment *Fragment,
+                                        const MCFixup &Fixup,
+                                        MCValue Target,
+                                        uint64_t &FixedValue) {
+  // FIXME: These needs to be factored into the target Mach-O writer.
+  if (isARM()) {
+    RecordARMRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue);
+    return;
+  }
+  if (is64Bit()) {
+    RecordX86_64Relocation(Asm, Layout, Fragment, Fixup, Target, FixedValue);
+    return;
+  }
 
-    const MCSymbol &SA = DataA.getSymbol().AliasedSymbol();
-    const MCSection &SecA = SA.getSection();
-    const MCSection &SecB = FB.getParent()->getSection();
+  unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
+  unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
 
-    if (IsPCRel) {
-      // The simple (Darwin, except on x86_64) way of dealing with this was to
-      // assume that any reference to a temporary symbol *must* be a temporary
-      // symbol in the same atom, unless the sections differ. Therefore, any
-      // PCrel relocation to a temporary symbol (in the same section) is fully
-      // resolved. This also works in conjunction with absolutized .set, which
-      // requires the compiler to use .set to absolutize the differences between
-      // symbols which the compiler knows to be assembly time constants, so we
-      // don't need to worry about considering symbol differences fully
-      // resolved.
-
-      if (!Asm.getBackend().hasReliableSymbolDifference()) {
-        if (!SA.isTemporary() || !SA.isInSection() || &SecA != &SecB)
-          return false;
-        return true;
+  // If this is a 32-bit TLVP reloc it's handled a bit differently.
+  if (Target.getSymA() &&
+      Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) {
+    RecordTLVPRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue);
+    return;
+  }
+
+  // If this is a difference or a defined symbol plus an offset, then we need a
+  // scattered relocation entry. Differences always require scattered
+  // relocations.
+  if (Target.getSymB())
+    return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup,
+                                     Target, Log2Size, FixedValue);
+
+  // Get the symbol data, if any.
+  MCSymbolData *SD = 0;
+  if (Target.getSymA())
+    SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+
+  // If this is an internal relocation with an offset, it also needs a scattered
+  // relocation entry.
+  uint32_t Offset = Target.getConstant();
+  if (IsPCRel)
+    Offset += 1 << Log2Size;
+  if (Offset && SD && !doesSymbolRequireExternRelocation(SD))
+    return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup,
+                                     Target, Log2Size, FixedValue);
+
+  // See <reloc.h>.
+  uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+  unsigned Index = 0;
+  unsigned IsExtern = 0;
+  unsigned Type = 0;
+
+  if (Target.isAbsolute()) { // constant
+    // SymbolNum of 0 indicates the absolute section.
+    //
+    // FIXME: Currently, these are never generated (see code below). I cannot
+    // find a case where they are actually emitted.
+    Type = macho::RIT_Vanilla;
+  } else {
+    // Resolve constant variables.
+    if (SD->getSymbol().isVariable()) {
+      int64_t Res;
+      if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
+            Res, Layout, SectionAddress)) {
+        FixedValue = Res;
+        return;
       }
+    }
+
+    // Check whether we need an external or internal relocation.
+    if (doesSymbolRequireExternRelocation(SD)) {
+      IsExtern = 1;
+      Index = SD->getIndex();
+      // For external relocations, make sure to offset the fixup value to
+      // compensate for the addend of the symbol address, if it was
+      // undefined. This occurs with weak definitions, for example.
+      if (!SD->Symbol->isUndefined())
+        FixedValue -= Layout.getSymbolOffset(SD);
     } else {
-      if (!TargetObjectWriter->useAggressiveSymbolFolding())
-        return false;
+      // The index is the section ordinal (1-based).
+      const MCSectionData &SymSD = Asm.getSectionData(
+        SD->getSymbol().getSection());
+      Index = SymSD.getOrdinal() + 1;
+      FixedValue += getSectionAddress(&SymSD);
     }
+    if (IsPCRel)
+      FixedValue -= getSectionAddress(Fragment->getParent());
 
-    const MCFragment &FA = *Asm.getSymbolData(SA).getFragment();
+    Type = macho::RIT_Vanilla;
+  }
 
-    A_Base = FA.getAtom();
-    if (!A_Base)
-      return false;
+  // struct relocation_info (8 bytes)
+  macho::RelocationEntry MRE;
+  MRE.Word0 = FixupOffset;
+  MRE.Word1 = ((Index     <<  0) |
+               (IsPCRel   << 24) |
+               (Log2Size  << 25) |
+               (IsExtern  << 27) |
+               (Type      << 28));
+  Relocations[Fragment->getParent()].push_back(MRE);
+}
 
-    B_Base = FB.getAtom();
-    if (!B_Base)
-      return false;
+void MachObjectWriter::BindIndirectSymbols(MCAssembler &Asm) {
+  // This is the point where 'as' creates actual symbols for indirect symbols
+  // (in the following two passes). It would be easier for us to do this sooner
+  // when we see the attribute, but that makes getting the order in the symbol
+  // table much more complicated than it is worth.
+  //
+  // FIXME: Revisit this when the dust settles.
 
-    // If the atoms are the same, they are guaranteed to have the same address.
-    if (A_Base == B_Base)
-      return true;
+  // Bind non lazy symbol pointers first.
+  unsigned IndirectIndex = 0;
+  for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
+         ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
+    const MCSectionMachO &Section =
+      cast<MCSectionMachO>(it->SectionData->getSection());
 
-    // Otherwise, we can't prove this is fully resolved.
-    return false;
+    if (Section.getType() != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS)
+      continue;
+
+    // Initialize the section indirect symbol base, if necessary.
+    if (!IndirectSymBase.count(it->SectionData))
+      IndirectSymBase[it->SectionData] = IndirectIndex;
+
+    Asm.getOrCreateSymbolData(*it->Symbol);
   }
 
-  void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) {
-    unsigned NumSections = Asm.size();
-
-    // The section data starts after the header, the segment load command (and
-    // section headers) and the symbol table.
-    unsigned NumLoadCommands = 1;
-    uint64_t LoadCommandsSize = is64Bit() ?
-      macho::SegmentLoadCommand64Size + NumSections * macho::Section64Size :
-      macho::SegmentLoadCommand32Size + NumSections * macho::Section32Size;
-
-    // Add the symbol table load command sizes, if used.
-    unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() +
-      UndefinedSymbolData.size();
-    if (NumSymbols) {
-      NumLoadCommands += 2;
-      LoadCommandsSize += (macho::SymtabLoadCommandSize +
-                           macho::DysymtabLoadCommandSize);
-    }
+  // Then lazy symbol pointers and symbol stubs.
+  IndirectIndex = 0;
+  for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
+         ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
+    const MCSectionMachO &Section =
+      cast<MCSectionMachO>(it->SectionData->getSection());
 
-    // Compute the total size of the section data, as well as its file size and
-    // vm size.
-    uint64_t SectionDataStart = (is64Bit() ? macho::Header64Size :
-                                 macho::Header32Size) + LoadCommandsSize;
-    uint64_t SectionDataSize = 0;
-    uint64_t SectionDataFileSize = 0;
-    uint64_t VMSize = 0;
-    for (MCAssembler::const_iterator it = Asm.begin(),
-           ie = Asm.end(); it != ie; ++it) {
-      const MCSectionData &SD = *it;
-      uint64_t Address = getSectionAddress(&SD);
-      uint64_t Size = Layout.getSectionAddressSize(&SD);
-      uint64_t FileSize = Layout.getSectionFileSize(&SD);
-      FileSize += getPaddingSize(&SD, Layout);
-
-      VMSize = std::max(VMSize, Address + Size);
-
-      if (SD.getSection().isVirtualSection())
-        continue;
-
-      SectionDataSize = std::max(SectionDataSize, Address + Size);
-      SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize);
-    }
+    if (Section.getType() != MCSectionMachO::S_LAZY_SYMBOL_POINTERS &&
+        Section.getType() != MCSectionMachO::S_SYMBOL_STUBS)
+      continue;
+
+    // Initialize the section indirect symbol base, if necessary.
+    if (!IndirectSymBase.count(it->SectionData))
+      IndirectSymBase[it->SectionData] = IndirectIndex;
 
-    // The section data is padded to 4 bytes.
+    // Set the symbol type to undefined lazy, but only on construction.
     //
-    // FIXME: Is this machine dependent?
-    unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4);
-    SectionDataFileSize += SectionDataPadding;
-
-    // Write the prolog, starting with the header and load command...
-    WriteHeader(NumLoadCommands, LoadCommandsSize,
-                Asm.getSubsectionsViaSymbols());
-    WriteSegmentLoadCommand(NumSections, VMSize,
-                            SectionDataStart, SectionDataSize);
-
-    // ... and then the section headers.
-    uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
-    for (MCAssembler::const_iterator it = Asm.begin(),
-           ie = Asm.end(); it != ie; ++it) {
-      std::vector<macho::RelocationEntry> &Relocs = Relocations[it];
-      unsigned NumRelocs = Relocs.size();
-      uint64_t SectionStart = SectionDataStart + getSectionAddress(it);
-      WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs);
-      RelocTableEnd += NumRelocs * macho::RelocationInfoSize;
+    // FIXME: Do not hardcode.
+    bool Created;
+    MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created);
+    if (Created)
+      Entry.setFlags(Entry.getFlags() | 0x0001);
+  }
+}
+
+/// ComputeSymbolTable - Compute the symbol table data
+///
+/// \param StringTable [out] - The string table data.
+/// \param StringIndexMap [out] - Map from symbol names to offsets in the
+/// string table.
+void MachObjectWriter::
+ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
+                   std::vector<MachSymbolData> &LocalSymbolData,
+                   std::vector<MachSymbolData> &ExternalSymbolData,
+                   std::vector<MachSymbolData> &UndefinedSymbolData) {
+  // Build section lookup table.
+  DenseMap<const MCSection*, uint8_t> SectionIndexMap;
+  unsigned Index = 1;
+  for (MCAssembler::iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it, ++Index)
+    SectionIndexMap[&it->getSection()] = Index;
+  assert(Index <= 256 && "Too many sections!");
+
+  // Index 0 is always the empty string.
+  StringMap<uint64_t> StringIndexMap;
+  StringTable += '\x00';
+
+  // Build the symbol arrays and the string table, but only for non-local
+  // symbols.
+  //
+  // The particular order that we collect the symbols and create the string
+  // table, then sort the symbols is chosen to match 'as'. Even though it
+  // doesn't matter for correctness, this is important for letting us diff .o
+  // files.
+  for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+         ie = Asm.symbol_end(); it != ie; ++it) {
+    const MCSymbol &Symbol = it->getSymbol();
+
+    // Ignore non-linker visible symbols.
+    if (!Asm.isSymbolLinkerVisible(it->getSymbol()))
+      continue;
+
+    if (!it->isExternal() && !Symbol.isUndefined())
+      continue;
+
+    uint64_t &Entry = StringIndexMap[Symbol.getName()];
+    if (!Entry) {
+      Entry = StringTable.size();
+      StringTable += Symbol.getName();
+      StringTable += '\x00';
     }
 
-    // Write the symbol table load command, if used.
-    if (NumSymbols) {
-      unsigned FirstLocalSymbol = 0;
-      unsigned NumLocalSymbols = LocalSymbolData.size();
-      unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols;
-      unsigned NumExternalSymbols = ExternalSymbolData.size();
-      unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols;
-      unsigned NumUndefinedSymbols = UndefinedSymbolData.size();
-      unsigned NumIndirectSymbols = Asm.indirect_symbol_size();
-      unsigned NumSymTabSymbols =
-        NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols;
-      uint64_t IndirectSymbolSize = NumIndirectSymbols * 4;
-      uint64_t IndirectSymbolOffset = 0;
-
-      // If used, the indirect symbols are written after the section data.
-      if (NumIndirectSymbols)
-        IndirectSymbolOffset = RelocTableEnd;
-
-      // The symbol table is written after the indirect symbol data.
-      uint64_t SymbolTableOffset = RelocTableEnd + IndirectSymbolSize;
-
-      // The string table is written after symbol table.
-      uint64_t StringTableOffset =
-        SymbolTableOffset + NumSymTabSymbols * (is64Bit() ? macho::Nlist64Size :
-                                                macho::Nlist32Size);
-      WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
-                             StringTableOffset, StringTable.size());
-
-      WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
-                               FirstExternalSymbol, NumExternalSymbols,
-                               FirstUndefinedSymbol, NumUndefinedSymbols,
-                               IndirectSymbolOffset, NumIndirectSymbols);
+    MachSymbolData MSD;
+    MSD.SymbolData = it;
+    MSD.StringIndex = Entry;
+
+    if (Symbol.isUndefined()) {
+      MSD.SectionIndex = 0;
+      UndefinedSymbolData.push_back(MSD);
+    } else if (Symbol.isAbsolute()) {
+      MSD.SectionIndex = 0;
+      ExternalSymbolData.push_back(MSD);
+    } else {
+      MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
+      assert(MSD.SectionIndex && "Invalid section index!");
+      ExternalSymbolData.push_back(MSD);
     }
+  }
+
+  // Now add the data for local symbols.
+  for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+         ie = Asm.symbol_end(); it != ie; ++it) {
+    const MCSymbol &Symbol = it->getSymbol();
+
+    // Ignore non-linker visible symbols.
+    if (!Asm.isSymbolLinkerVisible(it->getSymbol()))
+      continue;
 
-    // Write the actual section data.
-    for (MCAssembler::const_iterator it = Asm.begin(),
-           ie = Asm.end(); it != ie; ++it) {
-      Asm.WriteSectionData(it, Layout);
+    if (it->isExternal() || Symbol.isUndefined())
+      continue;
 
-      uint64_t Pad = getPaddingSize(it, Layout);
-      for (unsigned int i = 0; i < Pad; ++i)
-        Write8(0);
+    uint64_t &Entry = StringIndexMap[Symbol.getName()];
+    if (!Entry) {
+      Entry = StringTable.size();
+      StringTable += Symbol.getName();
+      StringTable += '\x00';
     }
 
-    // Write the extra padding.
-    WriteZeros(SectionDataPadding);
-
-    // Write the relocation entries.
-    for (MCAssembler::const_iterator it = Asm.begin(),
-           ie = Asm.end(); it != ie; ++it) {
-      // Write the section relocation entries, in reverse order to match 'as'
-      // (approximately, the exact algorithm is more complicated than this).
-      std::vector<macho::RelocationEntry> &Relocs = Relocations[it];
-      for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
-        Write32(Relocs[e - i - 1].Word0);
-        Write32(Relocs[e - i - 1].Word1);
-      }
+    MachSymbolData MSD;
+    MSD.SymbolData = it;
+    MSD.StringIndex = Entry;
+
+    if (Symbol.isAbsolute()) {
+      MSD.SectionIndex = 0;
+      LocalSymbolData.push_back(MSD);
+    } else {
+      MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
+      assert(MSD.SectionIndex && "Invalid section index!");
+      LocalSymbolData.push_back(MSD);
     }
+  }
 
-    // Write the symbol table data, if used.
-    if (NumSymbols) {
-      // Write the indirect symbol entries.
-      for (MCAssembler::const_indirect_symbol_iterator
-             it = Asm.indirect_symbol_begin(),
-             ie = Asm.indirect_symbol_end(); it != ie; ++it) {
-        // Indirect symbols in the non lazy symbol pointer section have some
-        // special handling.
-        const MCSectionMachO &Section =
-          static_cast<const MCSectionMachO&>(it->SectionData->getSection());
-        if (Section.getType() == MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) {
-          // If this symbol is defined and internal, mark it as such.
-          if (it->Symbol->isDefined() &&
-              !Asm.getSymbolData(*it->Symbol).isExternal()) {
-            uint32_t Flags = macho::ISF_Local;
-            if (it->Symbol->isAbsolute())
-              Flags |= macho::ISF_Absolute;
-            Write32(Flags);
-            continue;
-          }
-        }
+  // External and undefined symbols are required to be in lexicographic order.
+  std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
+  std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end());
+
+  // Set the symbol indices.
+  Index = 0;
+  for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
+    LocalSymbolData[i].SymbolData->setIndex(Index++);
+  for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
+    ExternalSymbolData[i].SymbolData->setIndex(Index++);
+  for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
+    UndefinedSymbolData[i].SymbolData->setIndex(Index++);
+
+  // The string table is padded to a multiple of 4.
+  while (StringTable.size() % 4)
+    StringTable += '\x00';
+}
 
-        Write32(Asm.getSymbolData(*it->Symbol).getIndex());
-      }
+void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm,
+                                               const MCAsmLayout &Layout) {
+  uint64_t StartAddress = 0;
+  const SmallVectorImpl<MCSectionData*> &Order = Layout.getSectionOrder();
+  for (int i = 0, n = Order.size(); i != n ; ++i) {
+    const MCSectionData *SD = Order[i];
+    StartAddress = RoundUpToAlignment(StartAddress, SD->getAlignment());
+    SectionAddress[SD] = StartAddress;
+    StartAddress += Layout.getSectionAddressSize(SD);
+
+    // Explicitly pad the section to match the alignment requirements of the
+    // following one. This is for 'gas' compatibility, it shouldn't
+    /// strictly be necessary.
+    StartAddress += getPaddingSize(SD, Layout);
+  }
+}
 
-      // FIXME: Check that offsets match computed ones.
+void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
+                                                const MCAsmLayout &Layout) {
+  computeSectionAddresses(Asm, Layout);
 
-      // Write the symbol table entries.
-      for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
-        WriteNlist(LocalSymbolData[i], Layout);
-      for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
-        WriteNlist(ExternalSymbolData[i], Layout);
-      for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
-        WriteNlist(UndefinedSymbolData[i], Layout);
+  // Create symbol data for any indirect symbols.
+  BindIndirectSymbols(Asm);
 
-      // Write the string table.
-      OS << StringTable.str();
+  // Compute symbol table information and bind symbol indices.
+  ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData,
+                     UndefinedSymbolData);
+}
+
+bool MachObjectWriter::
+IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+                                       const MCSymbolData &DataA,
+                                       const MCFragment &FB,
+                                       bool InSet,
+                                       bool IsPCRel) const {
+  if (InSet)
+    return true;
+
+  // The effective address is
+  //     addr(atom(A)) + offset(A)
+  //   - addr(atom(B)) - offset(B)
+  // and the offsets are not relocatable, so the fixup is fully resolved when
+  //  addr(atom(A)) - addr(atom(B)) == 0.
+  const MCSymbolData *A_Base = 0, *B_Base = 0;
+
+  const MCSymbol &SA = DataA.getSymbol().AliasedSymbol();
+  const MCSection &SecA = SA.getSection();
+  const MCSection &SecB = FB.getParent()->getSection();
+
+  if (IsPCRel) {
+    // The simple (Darwin, except on x86_64) way of dealing with this was to
+    // assume that any reference to a temporary symbol *must* be a temporary
+    // symbol in the same atom, unless the sections differ. Therefore, any PCrel
+    // relocation to a temporary symbol (in the same section) is fully
+    // resolved. This also works in conjunction with absolutized .set, which
+    // requires the compiler to use .set to absolutize the differences between
+    // symbols which the compiler knows to be assembly time constants, so we
+    // don't need to worry about considering symbol differences fully resolved.
+
+    if (!Asm.getBackend().hasReliableSymbolDifference()) {
+      if (!SA.isTemporary() || !SA.isInSection() || &SecA != &SecB)
+        return false;
+      return true;
     }
+  } else {
+    if (!TargetObjectWriter->useAggressiveSymbolFolding())
+      return false;
+  }
+
+  const MCFragment &FA = *Asm.getSymbolData(SA).getFragment();
+
+  A_Base = FA.getAtom();
+  if (!A_Base)
+    return false;
+
+  B_Base = FB.getAtom();
+  if (!B_Base)
+    return false;
+
+  // If the atoms are the same, they are guaranteed to have the same address.
+  if (A_Base == B_Base)
+    return true;
+
+  // Otherwise, we can't prove this is fully resolved.
+  return false;
+}
+
+void MachObjectWriter::WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) {
+  unsigned NumSections = Asm.size();
+
+  // The section data starts after the header, the segment load command (and
+  // section headers) and the symbol table.
+  unsigned NumLoadCommands = 1;
+  uint64_t LoadCommandsSize = is64Bit() ?
+    macho::SegmentLoadCommand64Size + NumSections * macho::Section64Size :
+    macho::SegmentLoadCommand32Size + NumSections * macho::Section32Size;
+
+  // Add the symbol table load command sizes, if used.
+  unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() +
+    UndefinedSymbolData.size();
+  if (NumSymbols) {
+    NumLoadCommands += 2;
+    LoadCommandsSize += (macho::SymtabLoadCommandSize +
+                         macho::DysymtabLoadCommandSize);
   }
-};
 
+  // Compute the total size of the section data, as well as its file size and vm
+  // size.
+  uint64_t SectionDataStart = (is64Bit() ? macho::Header64Size :
+                               macho::Header32Size) + LoadCommandsSize;
+  uint64_t SectionDataSize = 0;
+  uint64_t SectionDataFileSize = 0;
+  uint64_t VMSize = 0;
+  for (MCAssembler::const_iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    const MCSectionData &SD = *it;
+    uint64_t Address = getSectionAddress(&SD);
+    uint64_t Size = Layout.getSectionAddressSize(&SD);
+    uint64_t FileSize = Layout.getSectionFileSize(&SD);
+    FileSize += getPaddingSize(&SD, Layout);
+
+    VMSize = std::max(VMSize, Address + Size);
+
+    if (SD.getSection().isVirtualSection())
+      continue;
+
+    SectionDataSize = std::max(SectionDataSize, Address + Size);
+    SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize);
+  }
+
+  // The section data is padded to 4 bytes.
+  //
+  // FIXME: Is this machine dependent?
+  unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4);
+  SectionDataFileSize += SectionDataPadding;
+
+  // Write the prolog, starting with the header and load command...
+  WriteHeader(NumLoadCommands, LoadCommandsSize,
+              Asm.getSubsectionsViaSymbols());
+  WriteSegmentLoadCommand(NumSections, VMSize,
+                          SectionDataStart, SectionDataSize);
+
+  // ... and then the section headers.
+  uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
+  for (MCAssembler::const_iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    std::vector<macho::RelocationEntry> &Relocs = Relocations[it];
+    unsigned NumRelocs = Relocs.size();
+    uint64_t SectionStart = SectionDataStart + getSectionAddress(it);
+    WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs);
+    RelocTableEnd += NumRelocs * macho::RelocationInfoSize;
+  }
+
+  // Write the symbol table load command, if used.
+  if (NumSymbols) {
+    unsigned FirstLocalSymbol = 0;
+    unsigned NumLocalSymbols = LocalSymbolData.size();
+    unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols;
+    unsigned NumExternalSymbols = ExternalSymbolData.size();
+    unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols;
+    unsigned NumUndefinedSymbols = UndefinedSymbolData.size();
+    unsigned NumIndirectSymbols = Asm.indirect_symbol_size();
+    unsigned NumSymTabSymbols =
+      NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols;
+    uint64_t IndirectSymbolSize = NumIndirectSymbols * 4;
+    uint64_t IndirectSymbolOffset = 0;
+
+    // If used, the indirect symbols are written after the section data.
+    if (NumIndirectSymbols)
+      IndirectSymbolOffset = RelocTableEnd;
+
+    // The symbol table is written after the indirect symbol data.
+    uint64_t SymbolTableOffset = RelocTableEnd + IndirectSymbolSize;
+
+    // The string table is written after symbol table.
+    uint64_t StringTableOffset =
+      SymbolTableOffset + NumSymTabSymbols * (is64Bit() ? macho::Nlist64Size :
+                                              macho::Nlist32Size);
+    WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
+                           StringTableOffset, StringTable.size());
+
+    WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
+                             FirstExternalSymbol, NumExternalSymbols,
+                             FirstUndefinedSymbol, NumUndefinedSymbols,
+                             IndirectSymbolOffset, NumIndirectSymbols);
+  }
+
+  // Write the actual section data.
+  for (MCAssembler::const_iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    Asm.WriteSectionData(it, Layout);
+
+    uint64_t Pad = getPaddingSize(it, Layout);
+    for (unsigned int i = 0; i < Pad; ++i)
+      Write8(0);
+  }
+
+  // Write the extra padding.
+  WriteZeros(SectionDataPadding);
+
+  // Write the relocation entries.
+  for (MCAssembler::const_iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    // Write the section relocation entries, in reverse order to match 'as'
+    // (approximately, the exact algorithm is more complicated than this).
+    std::vector<macho::RelocationEntry> &Relocs = Relocations[it];
+    for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
+      Write32(Relocs[e - i - 1].Word0);
+      Write32(Relocs[e - i - 1].Word1);
+    }
+  }
+
+  // Write the symbol table data, if used.
+  if (NumSymbols) {
+    // Write the indirect symbol entries.
+    for (MCAssembler::const_indirect_symbol_iterator
+           it = Asm.indirect_symbol_begin(),
+           ie = Asm.indirect_symbol_end(); it != ie; ++it) {
+      // Indirect symbols in the non lazy symbol pointer section have some
+      // special handling.
+      const MCSectionMachO &Section =
+        static_cast<const MCSectionMachO&>(it->SectionData->getSection());
+      if (Section.getType() == MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) {
+        // If this symbol is defined and internal, mark it as such.
+        if (it->Symbol->isDefined() &&
+            !Asm.getSymbolData(*it->Symbol).isExternal()) {
+          uint32_t Flags = macho::ISF_Local;
+          if (it->Symbol->isAbsolute())
+            Flags |= macho::ISF_Absolute;
+          Write32(Flags);
+          continue;
+        }
+      }
+
+      Write32(Asm.getSymbolData(*it->Symbol).getIndex());
+    }
+
+    // FIXME: Check that offsets match computed ones.
+
+    // Write the symbol table entries.
+    for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
+      WriteNlist(LocalSymbolData[i], Layout);
+    for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
+      WriteNlist(ExternalSymbolData[i], Layout);
+    for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
+      WriteNlist(UndefinedSymbolData[i], Layout);
+
+    // Write the string table.
+    OS << StringTable.str();
+  }
 }
 
 MCObjectWriter *llvm::createMachObjectWriter(MCMachObjectTargetWriter *MOTW,
diff --git a/lib/Support/ConstantRange.cpp b/lib/Support/ConstantRange.cpp
index 493f708..81382d0 100644
--- a/lib/Support/ConstantRange.cpp
+++ b/lib/Support/ConstantRange.cpp
@@ -529,8 +529,8 @@ ConstantRange::sub(const ConstantRange &Other) const {
     return ConstantRange(getBitWidth(), /*isFullSet=*/true);
 
   APInt Spread_X = getSetSize(), Spread_Y = Other.getSetSize();
-  APInt NewLower = getLower() - Other.getLower();
-  APInt NewUpper = getUpper() - Other.getUpper() + 1;
+  APInt NewLower = getLower() - Other.getUpper() + 1;
+  APInt NewUpper = getUpper() - Other.getLower();
   if (NewLower == NewUpper)
     return ConstantRange(getBitWidth(), /*isFullSet=*/true);
 
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 4679f74..8f77b04 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -16,6 +16,7 @@
 #define TARGET_ARM_H
 
 #include "ARMBaseInfo.h"
+#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetMachine.h"
 #include <cassert>
@@ -27,6 +28,7 @@ class FunctionPass;
 class JITCodeEmitter;
 class formatted_raw_ostream;
 class MCCodeEmitter;
+class MCObjectWriter;
 class TargetAsmBackend;
 class MachineInstr;
 class ARMAsmPrinter;
@@ -58,6 +60,12 @@ extern Target TheARMTarget, TheThumbTarget;
 void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
                                   ARMAsmPrinter &AP);
 
+/// createARMMachObjectWriter - Construct an ARM Mach-O object writer.
+MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS,
+                                          bool Is64Bit,
+                                          uint32_t CPUType,
+                                          uint32_t CPUSubtype);
+
 } // end namespace llvm;
 
 #endif
diff --git a/lib/Target/ARM/ARMAsmBackend.cpp b/lib/Target/ARM/ARMAsmBackend.cpp
index db132da..79e9897 100644
--- a/lib/Target/ARM/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/ARMAsmBackend.cpp
@@ -28,14 +28,6 @@
 using namespace llvm;
 
 namespace {
-class ARMMachObjectWriter : public MCMachObjectTargetWriter {
-public:
-  ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType,
-                      uint32_t CPUSubtype)
-    : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
-                               /*UseAggressiveSymbolFolding=*/true) {}
-};
-
 class ARMELFObjectWriter : public MCELFObjectTargetWriter {
 public:
   ARMELFObjectWriter(Triple::OSType OSType)
@@ -423,12 +415,9 @@ public:
     : ARMAsmBackend(T), Subtype(st) { }
 
   MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
-    return createMachObjectWriter(new ARMMachObjectWriter(
-                                    /*Is64Bit=*/false,
-                                    object::mach::CTM_ARM,
-                                    Subtype),
-                                  OS,
-                                  /*IsLittleEndian=*/true);
+    return createARMMachObjectWriter(OS, /*Is64Bit=*/false,
+                                     object::mach::CTM_ARM,
+                                     Subtype);
   }
 
   void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 2adcd2c..9dc51b8 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -100,6 +100,12 @@ getReservedRegs(const MachineFunction &MF) const {
   // Some targets reserve R9.
   if (STI.isR9Reserved())
     Reserved.set(ARM::R9);
+  // Reserve D16-D31 if the subtarget doesn't support them.
+  if (!STI.hasVFP3() || STI.hasD16()) {
+    assert(ARM::D31 == ARM::D16 + 15);
+    for (unsigned i = 0; i != 16; ++i)
+      Reserved.set(ARM::D16 + i);
+  }
   return Reserved;
 }
 
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 7c44c10..4ae4af1 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -5587,7 +5587,6 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
   // Create VPADDL node.
   SelectionDAG &DAG = DCI.DAG;
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  DebugLoc DL = N->getDebugLoc();
 
   // Build operand list.
   SmallVector<SDValue, 8> Ops;
@@ -7380,9 +7379,12 @@ ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
     case 'l': return C_RegisterClass;
     case 'w': return C_RegisterClass;
     }
-  } else {
-    if (Constraint == "Uv")
-      return C_Memory;
+  } else if (Constraint.size() == 2) {
+    switch (Constraint[0]) {
+    default: break;
+    // All 'U+' constraints are addresses.
+    case 'U': return C_Memory;
+    }
   }
   return TargetLowering::getConstraintType(Constraint);
 }
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 2537fc3..5c013de 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -3008,35 +3008,22 @@ def REV  : AMiscA1I<0b01101011, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
               IIC_iUNAr, "rev", "\t$Rd, $Rm",
               [(set GPR:$Rd, (bswap GPR:$Rm))]>, Requires<[IsARM, HasV6]>;
 
+let AddedComplexity = 5 in
 def REV16 : AMiscA1I<0b01101011, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
                IIC_iUNAr, "rev16", "\t$Rd, $Rm",
-               [(set GPR:$Rd,
-                   (or (and (srl GPR:$Rm, (i32 8)), 0xFF),
-                       (or (and (shl GPR:$Rm, (i32 8)), 0xFF00),
-                           (or (and (srl GPR:$Rm, (i32 8)), 0xFF0000),
-                               (and (shl GPR:$Rm, (i32 8)), 0xFF000000)))))]>,
+               [(set GPR:$Rd, (rotr (bswap GPR:$Rm), (i32 16)))]>,
                Requires<[IsARM, HasV6]>;
 
+let AddedComplexity = 5 in
 def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
                IIC_iUNAr, "revsh", "\t$Rd, $Rm",
-               [(set GPR:$Rd,
-                  (sext_inreg
-                    (or (srl GPR:$Rm, (i32 8)),
-                        (shl GPR:$Rm, (i32 8))), i16))]>,
+               [(set GPR:$Rd, (sra (bswap GPR:$Rm), (i32 16)))]>,
                Requires<[IsARM, HasV6]>;
 
-def : ARMV6Pat<(sext_inreg (or (srl (and GPR:$Rm, 0xFF00), (i32 8)),
-                               (shl GPR:$Rm, (i32 8))), i16),
-               (REVSH GPR:$Rm)>;
-
 def : ARMV6Pat<(or (sra (shl GPR:$Rm, (i32 24)), (i32 16)),
                    (and (srl GPR:$Rm, (i32 8)), 0xFF)),
                (REVSH GPR:$Rm)>;
 
-// Need the AddedComplexity or else MOVs + REV would be chosen.
-let AddedComplexity = 5 in
-def : ARMV6Pat<(sra (bswap GPR:$Rm), (i32 16)), (REVSH GPR:$Rm)>;
-
 def lsl_shift_imm : SDNodeXForm<imm, [{
   unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::lsl, N->getZExtValue());
   return CurDAG->getTargetConstant(Sh, MVT::i32);
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 8430aa3..44fbc02 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -1176,31 +1176,16 @@ def tREV16 :                    // A8.6.135
   T1pIMiscEncode<{1,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
                  IIC_iUNAr,
                  "rev16", "\t$Rd, $Rm",
-             [(set tGPR:$Rd,
-                   (or (and (srl tGPR:$Rm, (i32 8)), 0xFF),
-                       (or (and (shl tGPR:$Rm, (i32 8)), 0xFF00),
-                           (or (and (srl tGPR:$Rm, (i32 8)), 0xFF0000),
-                               (and (shl tGPR:$Rm, (i32 8)), 0xFF000000)))))]>,
+             [(set tGPR:$Rd, (rotr (bswap tGPR:$Rm), (i32 16)))]>,
                 Requires<[IsThumb, IsThumb1Only, HasV6]>;
 
 def tREVSH :                    // A8.6.136
   T1pIMiscEncode<{1,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
                  IIC_iUNAr,
                  "revsh", "\t$Rd, $Rm",
-                 [(set tGPR:$Rd,
-                       (sext_inreg
-                         (or (srl tGPR:$Rm, (i32 8)),
-                             (shl tGPR:$Rm, (i32 8))), i16))]>,
+                 [(set tGPR:$Rd, (sra (bswap tGPR:$Rm), (i32 16)))]>,
                  Requires<[IsThumb, IsThumb1Only, HasV6]>;
 
-def : T1Pat<(sext_inreg (or (srl (and tGPR:$Rm, 0xFF00), (i32 8)),
-                            (shl tGPR:$Rm, (i32 8))), i16),
-            (tREVSH tGPR:$Rm)>,
-      Requires<[IsThumb, IsThumb1Only, HasV6]>;
-
-def : T1Pat<(sra (bswap tGPR:$Rm), (i32 16)), (tREVSH tGPR:$Rm)>,
-      Requires<[IsThumb, IsThumb1Only, HasV6]>;
-
 // Rotate right register
 def tROR :                      // A8.6.139
   T1sItDPEncode<0b0111, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 53b9cec..090670b 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -1206,29 +1206,6 @@ def t2SUBrSPs   : T2sTwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, t2_so_reg:$imm),
 }
 } // end isCodeGenOnly = 1
 
-// Signed and unsigned division on v7-M
-def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
-                 "sdiv", "\t$Rd, $Rn, $Rm",
-                 [(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>,
-                 Requires<[HasDivide, IsThumb2]> {
-  let Inst{31-27} = 0b11111;
-  let Inst{26-21} = 0b011100;
-  let Inst{20} = 0b1;
-  let Inst{15-12} = 0b1111;
-  let Inst{7-4} = 0b1111;
-}
-
-def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
-                 "udiv", "\t$Rd, $Rn, $Rm",
-                 [(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>,
-                 Requires<[HasDivide, IsThumb2]> {
-  let Inst{31-27} = 0b11111;
-  let Inst{26-21} = 0b011101;
-  let Inst{20} = 0b1;
-  let Inst{15-12} = 0b1111;
-  let Inst{7-4} = 0b1111;
-}
-
 //===----------------------------------------------------------------------===//
 //  Load / store Instructions.
 //
@@ -2560,6 +2537,32 @@ def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
                         "\t$Ra, $Rd, $Rm, $Rn", []>;
 
 //===----------------------------------------------------------------------===//
+//  Division Instructions.
+//  Signed and unsigned division on v7-M
+//
+def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
+                 "sdiv", "\t$Rd, $Rn, $Rm",
+                 [(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>,
+                 Requires<[HasDivide, IsThumb2]> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-21} = 0b011100;
+  let Inst{20} = 0b1;
+  let Inst{15-12} = 0b1111;
+  let Inst{7-4} = 0b1111;
+}
+
+def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
+                 "udiv", "\t$Rd, $Rn, $Rm",
+                 [(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>,
+                 Requires<[HasDivide, IsThumb2]> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-21} = 0b011101;
+  let Inst{20} = 0b1;
+  let Inst{15-12} = 0b1111;
+  let Inst{7-4} = 0b1111;
+}
+
+//===----------------------------------------------------------------------===//
 //  Misc. Arithmetic Instructions.
 //
 
@@ -2587,29 +2590,16 @@ def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
 
 def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
                        "rev16", ".w\t$Rd, $Rm",
-                [(set rGPR:$Rd,
-                    (or (and (srl rGPR:$Rm, (i32 8)), 0xFF),
-                        (or (and (shl rGPR:$Rm, (i32 8)), 0xFF00),
-                            (or (and (srl rGPR:$Rm, (i32 8)), 0xFF0000),
-                               (and (shl rGPR:$Rm, (i32 8)), 0xFF000000)))))]>;
+                [(set rGPR:$Rd, (rotr (bswap rGPR:$Rm), (i32 16)))]>;
 
 def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
                        "revsh", ".w\t$Rd, $Rm",
-                 [(set rGPR:$Rd,
-                    (sext_inreg
-                      (or (srl rGPR:$Rm, (i32 8)),
-                          (shl rGPR:$Rm, (i32 8))), i16))]>;
-
-def : T2Pat<(sext_inreg (or (srl (and rGPR:$Rm, 0xFF00), (i32 8)),
-                            (shl rGPR:$Rm, (i32 8))), i16),
-            (t2REVSH rGPR:$Rm)>;
+                 [(set rGPR:$Rd, (sra (bswap rGPR:$Rm), (i32 16)))]>;
 
 def : T2Pat<(or (sra (shl rGPR:$Rm, (i32 24)), (i32 16)),
-                   (and (srl rGPR:$Rm, (i32 8)), 0xFF)),
+                (and (srl rGPR:$Rm, (i32 8)), 0xFF)),
             (t2REVSH rGPR:$Rm)>;
 
-def : T2Pat<(sra (bswap rGPR:$Rm), (i32 16)), (t2REVSH rGPR:$Rm)>;
-
 def t2PKHBT : T2ThreeReg<
             (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, shift_imm:$sh),
                   IIC_iBITsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh",
diff --git a/lib/Target/ARM/ARMMachObjectWriter.cpp b/lib/Target/ARM/ARMMachObjectWriter.cpp
new file mode 100644
index 0000000..4c35d0b
--- /dev/null
+++ b/lib/Target/ARM/ARMMachObjectWriter.cpp
@@ -0,0 +1,32 @@
+//===-- ARMMachObjectWriter.cpp - ARM Mach Object Writer ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+using namespace llvm;
+
+namespace {
+class ARMMachObjectWriter : public MCMachObjectTargetWriter {
+public:
+  ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType,
+                      uint32_t CPUSubtype)
+    : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
+                               /*UseAggressiveSymbolFolding=*/true) {}
+};
+}
+
+MCObjectWriter *llvm::createARMMachObjectWriter(raw_ostream &OS,
+                                                bool Is64Bit,
+                                                uint32_t CPUType,
+                                                uint32_t CPUSubtype) {
+  return createMachObjectWriter(new ARMMachObjectWriter(Is64Bit,
+                                                        CPUType,
+                                                        CPUSubtype),
+                                OS, /*IsLittleEndian=*/true);
+}
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index f4fbae3..7741410 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -202,42 +202,14 @@ def FPEXC   : ARMReg<8, "fpexc">;
 //
 def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12),
                                                SP, LR, PC)> {
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    static const unsigned ARM_GPR_AO[] = {
-      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
-      ARM::R12,ARM::LR,
-      ARM::R4, ARM::R5, ARM::R6, ARM::R7,
-      ARM::R8, ARM::R9, ARM::R10, ARM::R11 };
-
-    // For Thumb1 mode, we don't want to allocate hi regs at all, as we
-    // don't know how to spill them. If we make our prologue/epilogue code
-    // smarter at some point, we can go back to using the above allocation
-    // orders for the Thumb1 instructions that know how to use hi regs.
-    static const unsigned THUMB_GPR_AO[] = {
-      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
-      ARM::R4, ARM::R5, ARM::R6, ARM::R7 };
-
-    GPRClass::iterator
-    GPRClass::allocation_order_begin(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
-      if (Subtarget.isThumb1Only())
-        return THUMB_GPR_AO;
-      return ARM_GPR_AO;
-    }
-
-    GPRClass::iterator
-    GPRClass::allocation_order_end(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
-      if (Subtarget.isThumb1Only())
-        return THUMB_GPR_AO + (sizeof(THUMB_GPR_AO)/sizeof(unsigned));
-      return ARM_GPR_AO + (sizeof(ARM_GPR_AO)/sizeof(unsigned));
-    }
+  // Allocate LR as the first CSR since it is always saved anyway.
+  // For Thumb1 mode, we don't want to allocate hi regs at all, as we don't
+  // know how to spill them. If we make our prologue/epilogue code smarter at
+  // some point, we can go back to using the above allocation orders for the
+  // Thumb1 instructions that know how to use hi regs.
+  let AltOrders = [(add LR, GPR), (trunc GPR, 8)];
+  let AltOrderSelect = [{
+      return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
   }];
 }
 
@@ -246,44 +218,9 @@ def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12),
 // or SP (R13 or R15) are used. The ARM ISA refers to these operands
 // via the BadReg() pseudo-code description.
 def rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> {
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    static const unsigned ARM_rGPR_AO[] = {
-      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
-      ARM::R12,ARM::LR,
-      ARM::R4, ARM::R5, ARM::R6, ARM::R7,
-      ARM::R8, ARM::R9, ARM::R10,
-      ARM::R11 };
-
-    // For Thumb1 mode, we don't want to allocate hi regs at all, as we
-    // don't know how to spill them. If we make our prologue/epilogue code
-    // smarter at some point, we can go back to using the above allocation
-    // orders for the Thumb1 instructions that know how to use hi regs.
-    static const unsigned THUMB_rGPR_AO[] = {
-      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
-      ARM::R4, ARM::R5, ARM::R6, ARM::R7 };
-
-    rGPRClass::iterator
-    rGPRClass::allocation_order_begin(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
-      if (Subtarget.isThumb1Only())
-        return THUMB_rGPR_AO;
-      return ARM_rGPR_AO;
-    }
-
-    rGPRClass::iterator
-    rGPRClass::allocation_order_end(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
-
-      if (Subtarget.isThumb1Only())
-        return THUMB_rGPR_AO + (sizeof(THUMB_rGPR_AO)/sizeof(unsigned));
-      return ARM_rGPR_AO + (sizeof(ARM_rGPR_AO)/sizeof(unsigned));
-    }
+  let AltOrders = [(add LR, rGPR), (trunc rGPR, 8)];
+  let AltOrderSelect = [{
+      return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
   }];
 }
 
@@ -296,52 +233,12 @@ def tGPR : RegisterClass<"ARM", [i32], 32, (trunc GPR, 8)>;
 // Note, getMinimalPhysRegClass(R0) returns tGPR because of the names of
 // this class and the preceding one(!)  This is what we want.
 def tcGPR : RegisterClass<"ARM", [i32], 32, (add R0, R1, R2, R3, R9, R12)> {
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    // R9 is available.
-    static const unsigned ARM_GPR_R9_TC[] = {
-      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
-      ARM::R9, ARM::R12 };
-    // R9 is not available.
-    static const unsigned ARM_GPR_NOR9_TC[] = {
-      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
-      ARM::R12 };
-
-    // For Thumb1 mode, we don't want to allocate hi regs at all, as we
-    // don't know how to spill them. If we make our prologue/epilogue code
-    // smarter at some point, we can go back to using the above allocation
-    // orders for the Thumb1 instructions that know how to use hi regs.
-    static const unsigned THUMB_GPR_AO_TC[] = {
-      ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
-
-    tcGPRClass::iterator
-    tcGPRClass::allocation_order_begin(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
-      if (Subtarget.isThumb1Only())
-        return THUMB_GPR_AO_TC;
-      return Subtarget.isTargetDarwin() ? ARM_GPR_R9_TC : ARM_GPR_NOR9_TC;
-    }
-
-    tcGPRClass::iterator
-    tcGPRClass::allocation_order_end(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
-
-      if (Subtarget.isThumb1Only())
-        return THUMB_GPR_AO_TC + (sizeof(THUMB_GPR_AO_TC)/sizeof(unsigned));
-
-      return Subtarget.isTargetDarwin() ?
-        ARM_GPR_R9_TC + (sizeof(ARM_GPR_R9_TC)/sizeof(unsigned)) :
-        ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned));
-    }
+  let AltOrders = [(and tcGPR, tGPR)];
+  let AltOrderSelect = [{
+      return MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
   }];
 }
 
-
 // Scalar single precision floating point register class..
 def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)>;
 
@@ -355,48 +252,9 @@ def SPR_8 : RegisterClass<"ARM", [f32], 32, (trunc SPR, 16)>;
 // is double-word alignment though.
 def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
                         (sequence "D%u", 0, 31)> {
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    // VFP2 / VFPv3-D16
-    static const unsigned ARM_DPR_VFP2[] = {
-      ARM::D0,  ARM::D1,  ARM::D2,  ARM::D3,
-      ARM::D4,  ARM::D5,  ARM::D6,  ARM::D7,
-      ARM::D8,  ARM::D9,  ARM::D10, ARM::D11,
-      ARM::D12, ARM::D13, ARM::D14, ARM::D15 };
-    // VFP3: D8-D15 are callee saved and should be allocated last.
-    // Save other low registers for use as DPR_VFP2 and DPR_8 classes.
-    static const unsigned ARM_DPR_VFP3[] = {
-      ARM::D16, ARM::D17, ARM::D18, ARM::D19,
-      ARM::D20, ARM::D21, ARM::D22, ARM::D23,
-      ARM::D24, ARM::D25, ARM::D26, ARM::D27,
-      ARM::D28, ARM::D29, ARM::D30, ARM::D31,
-      ARM::D0,  ARM::D1,  ARM::D2,  ARM::D3,
-      ARM::D4,  ARM::D5,  ARM::D6,  ARM::D7,
-      ARM::D8,  ARM::D9,  ARM::D10, ARM::D11,
-      ARM::D12, ARM::D13, ARM::D14, ARM::D15 };
-
-    DPRClass::iterator
-    DPRClass::allocation_order_begin(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
-      if (Subtarget.hasVFP3() && !Subtarget.hasD16())
-        return ARM_DPR_VFP3;
-      return ARM_DPR_VFP2;
-    }
-
-    DPRClass::iterator
-    DPRClass::allocation_order_end(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
-      if (Subtarget.hasVFP3() && !Subtarget.hasD16())
-        return ARM_DPR_VFP3 + (sizeof(ARM_DPR_VFP3)/sizeof(unsigned));
-      else
-        return ARM_DPR_VFP2 + (sizeof(ARM_DPR_VFP2)/sizeof(unsigned));
-    }
-  }];
+  // Allocate non-VFP2 registers D16-D31 first.
+  let AltOrders = [(rotl DPR, 16)];
+  let AltOrderSelect = [{ return 1; }];
 }
 
 // Subset of DPR that are accessible with VFP2 (and so that also have
@@ -417,29 +275,9 @@ def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
 def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
                         (sequence "Q%u", 0, 15)> {
   let SubRegClasses = [(DPR dsub_0, dsub_1)];
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    // Q4-Q7 are callee saved and should be allocated last.
-    // Save other low registers for use as QPR_VFP2 and QPR_8 classes.
-    static const unsigned ARM_QPR[] = {
-      ARM::Q8,  ARM::Q9,  ARM::Q10, ARM::Q11,
-      ARM::Q12, ARM::Q13, ARM::Q14, ARM::Q15,
-      ARM::Q0,  ARM::Q1,  ARM::Q2,  ARM::Q3,
-      ARM::Q4,  ARM::Q5,  ARM::Q6,  ARM::Q7 };
-
-    QPRClass::iterator
-    QPRClass::allocation_order_begin(const MachineFunction &MF) const {
-      return ARM_QPR;
-    }
-
-    QPRClass::iterator
-    QPRClass::allocation_order_end(const MachineFunction &MF) const {
-      return ARM_QPR + (sizeof(ARM_QPR)/sizeof(unsigned));
-    }
-  }];
+  // Allocate non-VFP2 aliases Q8-Q15 first.
+  let AltOrders = [(rotl QPR, 8)];
+  let AltOrderSelect = [{ return 1; }];
 }
 
 // Subset of QPR that have 32-bit SPR subregs.
@@ -461,27 +299,9 @@ def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
 def QQPR : RegisterClass<"ARM", [v4i64], 256, (sequence "QQ%u", 0, 7)> {
   let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3),
                        (QPR qsub_0, qsub_1)];
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    // QQ2-QQ3 are callee saved and should be allocated last.
-    // Save other low registers for use as QPR_VFP2 and QPR_8 classes.
-    static const unsigned ARM_QQPR[] = {
-      ARM::QQ4, ARM::QQ5, ARM::QQ6, ARM::QQ7,
-      ARM::QQ0, ARM::QQ1, ARM::QQ2, ARM::QQ3 };
-
-    QQPRClass::iterator
-    QQPRClass::allocation_order_begin(const MachineFunction &MF) const {
-      return ARM_QQPR;
-    }
-
-    QQPRClass::iterator
-    QQPRClass::allocation_order_end(const MachineFunction &MF) const {
-      return ARM_QQPR + (sizeof(ARM_QQPR)/sizeof(unsigned));
-    }
-  }];
+  // Allocate non-VFP2 aliases first.
+  let AltOrders = [(rotl QQPR, 4)];
+  let AltOrderSelect = [{ return 1; }];
 }
 
 // Subset of QQPR that have 32-bit SPR subregs.
@@ -498,26 +318,9 @@ def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (sequence "QQQQ%u", 0, 3)> {
   let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3,
                             dsub_4, dsub_5, dsub_6, dsub_7),
                        (QPR qsub_0, qsub_1, qsub_2, qsub_3)];
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    // QQQQ1 is callee saved and should be allocated last.
-    // Save QQQQ0 for use as QPR_VFP2 and QPR_8 classes.
-    static const unsigned ARM_QQQQPR[] = {
-      ARM::QQQQ2, ARM::QQQQ3, ARM::QQQQ0, ARM::QQQQ1 };
-
-    QQQQPRClass::iterator
-    QQQQPRClass::allocation_order_begin(const MachineFunction &MF) const {
-      return ARM_QQQQPR;
-    }
-
-    QQQQPRClass::iterator
-    QQQQPRClass::allocation_order_end(const MachineFunction &MF) const {
-      return ARM_QQQQPR + (sizeof(ARM_QQQQPR)/sizeof(unsigned));
-    }
-  }];
+  // Allocate non-VFP2 aliases first.
+  let AltOrders = [(rotl QQQQPR, 2)];
+  let AltOrderSelect = [{ return 1; }];
 }
 
 // Condition code registers.
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index d3b8b54..edc0054 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -34,6 +34,7 @@ add_llvm_target(ARMCodeGen
   ARMISelLowering.cpp
   ARMInstrInfo.cpp
   ARMJITInfo.cpp
+  ARMMachObjectWriter.cpp
   ARMMCCodeEmitter.cpp
   ARMMCExpr.cpp
   ARMLoadStoreOptimizer.cpp
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.td b/lib/Target/Blackfin/BlackfinRegisterInfo.td
index 9e2f79f..0d502fd 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.td
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.td
@@ -254,17 +254,7 @@ def PI : RegisterClass<"BF", [i32], 32, (add P, I)>;
 let CopyCost = -1, Size = 8 in {
 def JustCC  : RegisterClass<"BF", [i32], 8, (add CC)>;
 def NotCC   : RegisterClass<"BF", [i32], 8, (add NCC)>;
-def AnyCC   : RegisterClass<"BF", [i32], 8, (add CC, NCC)> {
-  let MethodProtos = [{
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    AnyCCClass::iterator
-    AnyCCClass::allocation_order_end(const MachineFunction &MF) const {
-      return allocation_order_begin(MF)+1;
-    }
-  }];
-}
+def AnyCC   : RegisterClass<"BF", [i32], 8, (add CC, NCC)>;
 def StatBit : RegisterClass<"BF", [i1], 8,
     (add AZ, AN, CC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS)>;
 }
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index 7c24037..ec4020e 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -205,6 +205,9 @@ namespace {
     std::string InterpretASMConstraint(InlineAsm::ConstraintInfo& c);
 
     void lowerIntrinsics(Function &F);
+    /// Prints the definition of the intrinsic function F. Supports the 
+    /// intrinsics which need to be explicitly defined in the CBackend.
+    void printIntrinsicDefinition(const Function &F, raw_ostream &Out);
 
     void printModuleTypes(const TypeSymbolTable &ST);
     void printContainedStructs(const Type *Ty, std::set<const Type *> &);
@@ -1777,6 +1780,7 @@ bool CWriter::doInitialization(Module &M) {
   Out << "/* Provide Declarations */\n";
   Out << "#include <stdarg.h>\n";      // Varargs support
   Out << "#include <setjmp.h>\n";      // Unwind support
+  Out << "#include <limits.h>\n";      // With overflow intrinsics support.
   generateCompilerSpecificCode(Out, TD);
 
   // Provide a definition for `bool' if not compiling with a C++ compiler.
@@ -1855,29 +1859,46 @@ bool CWriter::doInitialization(Module &M) {
   Out << "float fmodf(float, float);\n";
   Out << "long double fmodl(long double, long double);\n";
 
+  // Store the intrinsics which will be declared/defined below.
+  SmallVector<const Function*, 8> intrinsicsToDefine;
+
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
     // Don't print declarations for intrinsic functions.
-    if (!I->isIntrinsic() && I->getName() != "setjmp" &&
-        I->getName() != "longjmp" && I->getName() != "_setjmp") {
-      if (I->hasExternalWeakLinkage())
-        Out << "extern ";
-      printFunctionSignature(I, true);
-      if (I->hasWeakLinkage() || I->hasLinkOnceLinkage())
-        Out << " __ATTRIBUTE_WEAK__";
-      if (I->hasExternalWeakLinkage())
-        Out << " __EXTERNAL_WEAK__";
-      if (StaticCtors.count(I))
-        Out << " __ATTRIBUTE_CTOR__";
-      if (StaticDtors.count(I))
-        Out << " __ATTRIBUTE_DTOR__";
-      if (I->hasHiddenVisibility())
-        Out << " __HIDDEN__";
-
-      if (I->hasName() && I->getName()[0] == 1)
-        Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")";
+    // Store the used intrinsics, which need to be explicitly defined.
+    if (I->isIntrinsic()) {
+      switch (I->getIntrinsicID()) {
+        default:
+          break;
+        case Intrinsic::uadd_with_overflow:
+        case Intrinsic::sadd_with_overflow:
+          intrinsicsToDefine.push_back(I);
+          break;
+      }
+      continue;
+    }
+
+    if (I->getName() == "setjmp" ||
+        I->getName() == "longjmp" || I->getName() == "_setjmp")
+      continue;
+
+    if (I->hasExternalWeakLinkage())
+      Out << "extern ";
+    printFunctionSignature(I, true);
+    if (I->hasWeakLinkage() || I->hasLinkOnceLinkage())
+      Out << " __ATTRIBUTE_WEAK__";
+    if (I->hasExternalWeakLinkage())
+      Out << " __EXTERNAL_WEAK__";
+    if (StaticCtors.count(I))
+      Out << " __ATTRIBUTE_CTOR__";
+    if (StaticDtors.count(I))
+      Out << " __ATTRIBUTE_DTOR__";
+    if (I->hasHiddenVisibility())
+      Out << " __HIDDEN__";
+
+    if (I->hasName() && I->getName()[0] == 1)
+      Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")";
 
-      Out << ";\n";
-    }
+    Out << ";\n";
   }
 
   // Output the global variable declarations
@@ -2012,6 +2033,14 @@ bool CWriter::doInitialization(Module &M) {
   Out << "return X <= Y ; }\n";
   Out << "static inline int llvm_fcmp_oge(double X, double Y) { ";
   Out << "return X >= Y ; }\n";
+
+  // Emit definitions of the intrinsics.
+  for (SmallVector<const Function*, 8>::const_iterator
+       I = intrinsicsToDefine.begin(),
+       E = intrinsicsToDefine.end(); I != E; ++I) {
+    printIntrinsicDefinition(**I, Out);
+  }
+
   return false;
 }
 
@@ -2786,6 +2815,101 @@ void CWriter::visitSelectInst(SelectInst &I) {
   Out << "))";
 }
 
+// Returns the macro name or value of the max or min of an integer type
+// (as defined in limits.h).
+static void printLimitValue(const IntegerType &Ty, bool isSigned, bool isMax,
+                            raw_ostream &Out) {
+  const char* type;
+  const char* sprefix = "";
+
+  unsigned NumBits = Ty.getBitWidth();
+  if (NumBits <= 8) {
+    type = "CHAR";
+    sprefix = "S";
+  } else if (NumBits <= 16) {
+    type = "SHRT";
+  } else if (NumBits <= 32) {
+    type = "INT";
+  } else if (NumBits <= 64) {
+    type = "LLONG";
+  } else {
+    llvm_unreachable("Bit widths > 64 not implemented yet");
+  }
+
+  if (isSigned)
+    Out << sprefix << type << (isMax ? "_MAX" : "_MIN");
+  else
+    Out << "U" << type << (isMax ? "_MAX" : "0");
+}
+
+static bool isSupportedIntegerSize(const IntegerType &T) {
+  return T.getBitWidth() == 8 || T.getBitWidth() == 16 ||
+         T.getBitWidth() == 32 || T.getBitWidth() == 64;
+}
+
+void CWriter::printIntrinsicDefinition(const Function &F, raw_ostream &Out) {
+  const FunctionType *funT = F.getFunctionType();
+  const Type *retT = F.getReturnType();
+  const IntegerType *elemT = cast<IntegerType>(funT->getParamType(1));
+
+  assert(isSupportedIntegerSize(*elemT) &&
+         "CBackend does not support arbitrary size integers.");
+  assert(cast<StructType>(retT)->getElementType(0) == elemT &&
+         elemT == funT->getParamType(0) && funT->getNumParams() == 2);
+
+  switch (F.getIntrinsicID()) {
+  default:
+    llvm_unreachable("Unsupported Intrinsic.");
+  case Intrinsic::uadd_with_overflow:
+    // static inline Rty uadd_ixx(unsigned ixx a, unsigned ixx b) {
+    //   Rty r;
+    //   r.field0 = a + b;
+    //   r.field1 = (r.field0 < a);
+    //   return r;
+    // }
+    Out << "static inline ";
+    printType(Out, retT);
+    Out << GetValueName(&F);
+    Out << "(";
+    printSimpleType(Out, elemT, false);
+    Out << "a,";
+    printSimpleType(Out, elemT, false);
+    Out << "b) {\n  ";
+    printType(Out, retT);
+    Out << "r;\n";
+    Out << "  r.field0 = a + b;\n";
+    Out << "  r.field1 = (r.field0 < a);\n";
+    Out << "  return r;\n}\n";
+    break;
+    
+  case Intrinsic::sadd_with_overflow:            
+    // static inline Rty sadd_ixx(ixx a, ixx b) {
+    //   Rty r;
+    //   r.field1 = (b > 0 && a > XX_MAX - b) ||
+    //              (b < 0 && a < XX_MIN - b);
+    //   r.field0 = r.field1 ? 0 : a + b;
+    //   return r;
+    // }
+    Out << "static ";
+    printType(Out, retT);
+    Out << GetValueName(&F);
+    Out << "(";
+    printSimpleType(Out, elemT, true);
+    Out << "a,";
+    printSimpleType(Out, elemT, true);
+    Out << "b) {\n  ";
+    printType(Out, retT);
+    Out << "r;\n";
+    Out << "  r.field1 = (b > 0 && a > ";
+    printLimitValue(*elemT, true, true, Out);
+    Out << " - b) || (b < 0 && a < ";
+    printLimitValue(*elemT, true, false, Out);
+    Out << " - b);\n";
+    Out << "  r.field0 = r.field1 ? 0 : a + b;\n";
+    Out << "  return r;\n}\n";
+    break;
+  }
+}
 
 void CWriter::lowerIntrinsics(Function &F) {
   // This is used to keep track of intrinsics that get generated to a lowered
@@ -2816,6 +2940,8 @@ void CWriter::lowerIntrinsics(Function &F) {
           case Intrinsic::x86_sse2_cmp_sd:
           case Intrinsic::x86_sse2_cmp_pd:
           case Intrinsic::ppc_altivec_lvsl:
+          case Intrinsic::uadd_with_overflow:
+          case Intrinsic::sadd_with_overflow:
               // We directly implement these intrinsics
             break;
           default:
@@ -3109,6 +3235,14 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
     writeOperand(I.getArgOperand(0));
     Out << ")";
     return true;
+  case Intrinsic::uadd_with_overflow:
+  case Intrinsic::sadd_with_overflow:
+    Out << GetValueName(I.getCalledFunction()) << "(";
+    writeOperand(I.getArgOperand(0));
+    Out << ", ";
+    writeOperand(I.getArgOperand(1));
+    Out << ")";
+    return true;
   }
 }
 
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 797cfd5..0d15514 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -989,12 +989,12 @@ void CppWriter::printVariableUses(const GlobalVariable *GV) {
   nl(Out);
   printType(GV->getType());
   if (GV->hasInitializer()) {
-    Constant *Init = GV->getInitializer();
+    const Constant *Init = GV->getInitializer();
     printType(Init->getType());
-    if (Function *F = dyn_cast<Function>(Init)) {
+    if (const Function *F = dyn_cast<Function>(Init)) {
       nl(Out)<< "/ Function Declarations"; nl(Out);
       printFunctionHead(F);
-    } else if (GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) {
+    } else if (const GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) {
       nl(Out) << "// Global Variable Declarations"; nl(Out);
       printVariableHead(gv);
       
@@ -1353,9 +1353,10 @@ void CppWriter::printInstruction(const Instruction *I,
     printEscapedString(phi->getName());
     Out << "\", " << bbname << ");";
     nl(Out);
-    for (unsigned i = 0; i < phi->getNumOperands(); i+=2) {
+    for (unsigned i = 0; i < phi->getNumIncomingValues(); ++i) {
       Out << iName << "->addIncoming("
-          << opNames[i] << ", " << opNames[i+1] << ");";
+          << opNames[PHINode::getOperandNumForIncomingValue(i)] << ", "
+          << getOpName(phi->getIncomingBlock(i)) << ");";
       nl(Out);
     }
     break;
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 8caa7cd..6f69ba3 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -56,6 +56,9 @@ namespace {
     bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
                          unsigned AsmVariant, const char *ExtraCode,
                          raw_ostream &O);
+    bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
+                               unsigned AsmVariant, const char *ExtraCode,
+                               raw_ostream &O);
     void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
     void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O);
     void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
@@ -304,6 +307,19 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
   return false;
 }
 
+bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                           unsigned OpNum, unsigned AsmVariant,
+                                           const char *ExtraCode,
+                                           raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0])
+     return true; // Unknown modifier.
+   
+  const MachineOperand &MO = MI->getOperand(OpNum);
+  assert(MO.isReg() && "unexpected inline asm memory operand");
+  O << "0($" << MipsAsmPrinter::getRegisterName(MO.getReg()) << ")";
+  return false;
+}
+
 void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
                                   raw_ostream &O) {
   const MachineOperand &MO = MI->getOperand(opNum);
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index 57aeb1d..876f0fc 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -20,8 +20,8 @@ class CCIfSubtarget<string F, CCAction A>:
 // Only the return rules are defined here for O32. The rules for argument
 // passing are defined in MipsISelLowering.cpp.
 def RetCC_MipsO32 : CallingConv<[
-  // i32 are returned in registers V0, V1
-  CCIfType<[i32], CCAssignToReg<[V0, V1]>>,
+  // i32 are returned in registers V0, V1, A0, A1
+  CCIfType<[i32], CCAssignToReg<[V0, V1, A0, A1]>>,
 
   // f32 are returned in registers F0, F2
   CCIfType<[f32], CCAssignToReg<[F0, F2]>>,
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index d8a84ce..c35c852 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -94,6 +94,10 @@ private:
   inline SDValue getI32Imm(unsigned Imm) {
     return CurDAG->getTargetConstant(Imm, MVT::i32);
   }
+
+  virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                            char ConstraintCode,
+                                            std::vector<SDValue> &OutOps);
 };
 
 }
@@ -462,6 +466,14 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
   return ResNode;
 }
 
+bool MipsDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+                             std::vector<SDValue> &OutOps) {
+  assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
+  OutOps.push_back(Op);
+  return false;
+}
+
 /// createMipsISelDag - This pass converts a legalized DAG into a
 /// MIPS-specific DAG, ready for instruction scheduling.
 FunctionPass *llvm::createMipsISelDag(MipsTargetMachine &TM) {
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index c42054e..01624c5 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -59,6 +59,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case MipsISD::BuildPairF64:      return "MipsISD::BuildPairF64";
   case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64";
   case MipsISD::WrapperPIC:        return "MipsISD::WrapperPIC";
+  case MipsISD::DynAlloc:          return "MipsISD::DynAlloc";
   default:                         return NULL;
   }
 }
@@ -1189,9 +1190,10 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
 SDValue MipsTargetLowering::
 LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
 {
-  unsigned StackAlignment =
-    getTargetMachine().getFrameLowering()->getStackAlignment();
-  assert(StackAlignment >=
+  MachineFunction &MF = DAG.getMachineFunction();
+  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+  assert(getTargetMachine().getFrameLowering()->getStackAlignment() >=
          cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue() &&
          "Cannot lower if the alignment of the allocated space is larger than \
           that of the stack.");
@@ -1211,24 +1213,14 @@ LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
   // must be placed in the stack pointer register.
   Chain = DAG.getCopyToReg(StackPointer.getValue(1), dl, Mips::SP, Sub,
                            SDValue());
-  // Retrieve updated $sp. There is a glue input to prevent instructions that
-  // clobber $sp from being inserted between copytoreg and copyfromreg.
-  SDValue NewSP = DAG.getCopyFromReg(Chain, dl, Mips::SP, MVT::i32,
-                                     Chain.getValue(1));
-
-  // The stack space reserved by alloca is located right above the argument
-  // area. It is aligned on a boundary that is a multiple of StackAlignment.
-  MachineFunction &MF = DAG.getMachineFunction();
-  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
-  unsigned SPOffset = (MipsFI->getMaxCallFrameSize() + StackAlignment - 1) /
-                      StackAlignment * StackAlignment;
-  SDValue AllocPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, NewSP,
-                                 DAG.getConstant(SPOffset, MVT::i32));
 
   // This node always has two return values: a new stack pointer
   // value and a chain
-  SDValue Ops[2] = { AllocPtr, NewSP.getValue(1) };
-  return DAG.getMergeValues(Ops, 2, dl);
+  SDVTList VTLs = DAG.getVTList(MVT::i32, MVT::Other);
+  SDValue Ptr = DAG.getFrameIndex(MipsFI->getDynAllocFI(), getPointerTy());
+  SDValue Ops[] = { Chain, Ptr, Chain.getValue(1) };
+
+  return DAG.getNode(MipsISD::DynAlloc, dl, VTLs, Ops, 3);
 }
 
 SDValue MipsTargetLowering::
@@ -1358,7 +1350,7 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
     // General Dynamic TLS Model
     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32,
-                                                 0, MipsII::MO_TLSGD);
+                                             0, MipsII::MO_TLSGD);
     SDValue Tlsgd = DAG.getNode(MipsISD::TlsGd, dl, MVT::i32, TGA);
     SDValue GP = DAG.getRegister(Mips::GP, MVT::i32);
     SDValue Argument = DAG.getNode(ISD::ADD, dl, MVT::i32, GP, Tlsgd);
@@ -1370,36 +1362,36 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
     Args.push_back(Entry);
     std::pair<SDValue, SDValue> CallResult =
         LowerCallTo(DAG.getEntryNode(),
-                 (const Type *) Type::getInt32Ty(*DAG.getContext()),
-                 false, false, false, false,
-                 0, CallingConv::C, false, true,
-                 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
+                    (const Type *) Type::getInt32Ty(*DAG.getContext()),
+                    false, false, false, false, 0, CallingConv::C, false, true,
+                    DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG,
+                    dl);
 
     return CallResult.first;
-  } else {
-    SDValue Offset;
-    if (GV->isDeclaration()) {
-      // Initial Exec TLS Model
-      SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
-                                              MipsII::MO_GOTTPREL);
-      Offset = DAG.getLoad(MVT::i32, dl,
-                                  DAG.getEntryNode(), TGA, MachinePointerInfo(),
-                                  false, false, 0);
-    } else {
-      // Local Exec TLS Model
-      SDVTList VTs = DAG.getVTList(MVT::i32);
-      SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
-                                              MipsII::MO_TPREL_HI);
-      SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
-                                              MipsII::MO_TPREL_LO);
-      SDValue Hi = DAG.getNode(MipsISD::TprelHi, dl, VTs, &TGAHi, 1);
-      SDValue Lo = DAG.getNode(MipsISD::TprelLo, dl, MVT::i32, TGALo);
-      Offset = DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo);
-    }
+  }
 
-    SDValue ThreadPointer = DAG.getNode(MipsISD::ThreadPointer, dl, PtrVT);
-    return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
+  SDValue Offset;
+  if (GV->isDeclaration()) {
+    // Initial Exec TLS Model
+    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+                                             MipsII::MO_GOTTPREL);
+    Offset = DAG.getLoad(MVT::i32, dl,
+                         DAG.getEntryNode(), TGA, MachinePointerInfo(),
+                         false, false, 0);
+  } else {
+    // Local Exec TLS Model
+    SDVTList VTs = DAG.getVTList(MVT::i32);
+    SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+                                               MipsII::MO_TPREL_HI);
+    SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+                                               MipsII::MO_TPREL_LO);
+    SDValue Hi = DAG.getNode(MipsISD::TprelHi, dl, VTs, &TGAHi, 1);
+    SDValue Lo = DAG.getNode(MipsISD::TprelLo, dl, MVT::i32, TGALo);
+    Offset = DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo);
   }
+
+  SDValue ThreadPointer = DAG.getNode(MipsISD::ThreadPointer, dl, PtrVT);
+  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
 }
 
 SDValue MipsTargetLowering::
@@ -1770,6 +1762,10 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   if (IsPIC && !MipsFI->getGPFI())
     MipsFI->setGPFI(MFI->CreateFixedObject(4, 0, true));
 
+  // Get the frame index of the stack frame object that points to the location
+  // of dynamically allocated area on the stack.
+  int DynAllocFI = MipsFI->getDynAllocFI();
+
   // Update size of the maximum argument space.
   // For O32, a minimum of four words (16 bytes) of argument space is
   // allocated.
@@ -1781,14 +1777,17 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   if (MaxCallFrameSize < NextStackOffset) {
     MipsFI->setMaxCallFrameSize(NextStackOffset);
 
-    if (IsPIC) {
-      // $gp restore slot must be aligned.
-      unsigned StackAlignment = TFL->getStackAlignment();
-      NextStackOffset = (NextStackOffset + StackAlignment - 1) /
-                        StackAlignment * StackAlignment;
-      int GPFI = MipsFI->getGPFI();
-      MFI->setObjectOffset(GPFI, NextStackOffset);
-    }
+    // Set the offsets relative to $sp of the $gp restore slot and dynamically
+    // allocated stack space. These offsets must be aligned to a boundary
+    // determined by the stack alignment of the ABI.
+    unsigned StackAlignment = TFL->getStackAlignment();
+    NextStackOffset = (NextStackOffset + StackAlignment - 1) /
+                      StackAlignment * StackAlignment;
+
+    if (IsPIC)
+      MFI->setObjectOffset(MipsFI->getGPFI(), NextStackOffset);
+
+    MFI->setObjectOffset(DynAllocFI, NextStackOffset);
   }
 
   // With EABI is it possible to have 16 args on registers.
@@ -1965,7 +1964,8 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   InFlag = Chain.getValue(1);
 
   // Create the CALLSEQ_END node.
-  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NextStackOffset, true),
+  Chain = DAG.getCALLSEQ_END(Chain,
+                             DAG.getIntPtrConstant(NextStackOffset, true),
                              DAG.getIntPtrConstant(0, true), InFlag);
   InFlag = Chain.getValue(1);
 
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index fbcedfd..b7b85fd 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -79,7 +79,9 @@ namespace llvm {
       BuildPairF64,
       ExtractElementF64,
 
-      WrapperPIC
+      WrapperPIC,
+
+      DynAlloc
     };
   }
 
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 329a002..0651322 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -39,6 +39,9 @@ def SDT_MipsDivRem       : SDTypeProfile<0, 2,
 
 def SDT_MipsThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
 
+def SDT_MipsDynAlloc    : SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
+                                               SDTCisVT<1, iPTR>]>;
+
 // Call
 def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink,
                          [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
@@ -99,6 +102,10 @@ def MipsDivRemU   : SDNode<"MipsISD::DivRemU", SDT_MipsDivRem,
 
 def MipsWrapperPIC    : SDNode<"MipsISD::WrapperPIC",  SDTIntUnaryOp>;
 
+// Pointer to dynamically allocated stack area.
+def MipsDynAlloc  : SDNode<"MipsISD::DynAlloc", SDT_MipsDynAlloc,
+                           [SDNPHasChain, SDNPInGlue]>;
+
 //===----------------------------------------------------------------------===//
 // Mips Instruction Predicate Definitions.
 //===----------------------------------------------------------------------===//
@@ -675,6 +682,12 @@ let addr=0 in
 // can be matched. It's similar to Sparc LEA_ADDRi
 def LEA_ADDiu : EffectiveAddress<"addiu\t$dst, ${addr:stackloc}">;
 
+// DynAlloc node points to dynamically allocated stack space.
+// $sp is added to the list of implicitly used registers to prevent dead code
+// elimination from removing instructions that modify $sp.
+let Uses = [SP] in
+def DynAlloc : EffectiveAddress<"addiu\t$dst, ${addr:stackloc}">;
+
 // MADD*/MSUB*
 def MADD  : MArithR<0, "madd", MipsMAdd, 1>;
 def MADDU : MArithR<1, "maddu", MipsMAddu, 1>;
@@ -852,6 +865,9 @@ def : Pat<(setge CPURegs:$lhs, immSExt16:$rhs),
 def : Pat<(setuge CPURegs:$lhs, immSExt16:$rhs),
           (XORi (SLTiu CPURegs:$lhs, immSExt16:$rhs), 1)>;
 
+// select MipsDynAlloc
+def : Pat<(MipsDynAlloc addr:$f), (DynAlloc addr:$f)>;
+
 //===----------------------------------------------------------------------===//
 // Floating Point Support
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index df40e6c..dbb7a67 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -27,6 +27,7 @@ namespace llvm {
 class MipsFunctionInfo : public MachineFunctionInfo {
 
 private:
+  MachineFunction& MF;
   /// SRetReturnReg - Some subtargets require that sret lowering includes
   /// returning the value of the returned struct in a register. This field
   /// holds the virtual register into which the sret argument is passed.
@@ -47,6 +48,7 @@ private:
   //                LowerCall except for the frame object for restoring $gp. 
   std::pair<int, int> InArgFIRange, OutArgFIRange;
   int GPFI; // Index of the frame object for restoring $gp 
+  mutable int DynAllocFI; // Frame index of dynamically allocated stack area.   
   unsigned MaxCallFrameSize;
 
   /// AtomicFrameIndex - To implement atomic.swap and atomic.cmp.swap
@@ -55,10 +57,10 @@ private:
   int AtomicFrameIndex;
 public:
   MipsFunctionInfo(MachineFunction& MF)
-  : SRetReturnReg(0), GlobalBaseReg(0),
+  : MF(MF), SRetReturnReg(0), GlobalBaseReg(0),
     VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)),
-    OutArgFIRange(std::make_pair(-1, 0)), GPFI(0), MaxCallFrameSize(0),
-    AtomicFrameIndex(-1)
+    OutArgFIRange(std::make_pair(-1, 0)), GPFI(0), DynAllocFI(0),
+    MaxCallFrameSize(0), AtomicFrameIndex(-1)
   {}
 
   bool isInArgFI(int FI) const {
@@ -81,6 +83,16 @@ public:
   bool needGPSaveRestore() const { return getGPFI(); }
   bool isGPFI(int FI) const { return GPFI && GPFI == FI; }
 
+  // The first call to this function creates a frame object for dynamically
+  // allocated stack area.
+  int getDynAllocFI() const {
+    if (!DynAllocFI)
+      DynAllocFI = MF.getFrameInfo()->CreateFixedObject(4, 0, true);
+
+    return DynAllocFI;
+  }
+  bool isDynAllocFI(int FI) const { return DynAllocFI && DynAllocFI == FI; }
+
   unsigned getSRetReturnReg() const { return SRetReturnReg; }
   void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
 
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index b0984af..fa64f63 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -179,12 +179,14 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
   int Offset;
 
   // Calculate final offset.
-  // - There is no need to change the offset if the frame object is an outgoing
-  //   argument or a $gp restore location,
+  // - There is no need to change the offset if the frame object is one of the
+  //   following: an outgoing argument, pointer to a dynamically allocated
+  //   stack space or a $gp restore location,
   // - If the frame object is any of the following, its offset must be adjusted
   //   by adding the size of the stack:
   //   incoming argument, callee-saved register location or local variable.  
-  if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isGPFI(FrameIndex))
+  if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isGPFI(FrameIndex) ||
+      MipsFI->isDynAllocFI(FrameIndex))
     Offset = spOffset;
   else
     Offset = spOffset + stackSize;
@@ -213,7 +215,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
   //  3. Locations for callee-saved registers.
   // Everything else is referenced relative to whatever register 
   // getFrameRegister() returns.
-  if (MipsFI->isOutArgFI(FrameIndex) ||
+  if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex) ||
       (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI))
     FrameReg = Mips::SP;
   else
diff --git a/lib/Target/PTX/PTX.td b/lib/Target/PTX/PTX.td
index 2c7bd3b..6a36b24 100644
--- a/lib/Target/PTX/PTX.td
+++ b/lib/Target/PTX/PTX.td
@@ -16,7 +16,7 @@
 include "llvm/Target/Target.td"
 
 //===----------------------------------------------------------------------===//
-// Subtarget Features.
+// Subtarget Features
 //===----------------------------------------------------------------------===//
 
 //===- Architectural Features ---------------------------------------------===//
@@ -57,7 +57,7 @@ def FeatureSM20 : SubtargetFeature<"sm20", "PTXShaderModel", "PTX_SM_2_0",
                                    [FeatureSM13]>;
 
 //===----------------------------------------------------------------------===//
-// PTX supported processors.
+// PTX supported processors
 //===----------------------------------------------------------------------===//
 
 class Proc<string Name, list<SubtargetFeature> Features>
diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp
index 1142144..b1f7c1e 100644
--- a/lib/Target/PTX/PTXAsmPrinter.cpp
+++ b/lib/Target/PTX/PTXAsmPrinter.cpp
@@ -23,6 +23,7 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
@@ -162,6 +163,13 @@ void PTXAsmPrinter::EmitStartOfAsmFile(Module &M)
   OutStreamer.EmitRawText(Twine("\t.target " + ST.getTargetString() +
                                 (ST.supportsDouble() ? ""
                                                      : ", map_f64_to_f32")));
+  // .address_size directive is optional, but it must immediately follow
+  // the .target directive if present within a module
+  if (ST.supportsPTX23()) {
+    std::string addrSize = ST.is64Bit() ? "64" : "32";
+    OutStreamer.EmitRawText(Twine("\t.address_size " + addrSize));
+  }
+
   OutStreamer.AddBlankLine();
 
   // declare global variables
@@ -194,6 +202,21 @@ void PTXAsmPrinter::EmitFunctionBodyStart() {
     def += ';';
     OutStreamer.EmitRawText(Twine(def));
   }
+
+  const MachineFrameInfo* FrameInfo = MF->getFrameInfo();
+  DEBUG(dbgs() << "Have " << FrameInfo->getNumObjects()
+               << " frame object(s)\n");
+  for (unsigned i = 0, e = FrameInfo->getNumObjects(); i != e; ++i) {
+    DEBUG(dbgs() << "Size of object: " << FrameInfo->getObjectSize(i) << "\n");
+    if (FrameInfo->getObjectSize(i) > 0) {
+      std::string def = "\t.reg .b";
+      def += utostr(FrameInfo->getObjectSize(i)*8); // Convert to bits
+      def += " s";
+      def += utostr(i);
+      def += ";";
+      OutStreamer.EmitRawText(Twine(def));
+    }
+  }
 }
 
 void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
@@ -346,7 +369,7 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
 
     if (gv->hasInitializer())
     {
-      Constant *C = gv->getInitializer();  
+      const Constant *C = gv->getInitializer();  
       if (const ConstantArray *CA = dyn_cast<ConstantArray>(C))
       {
         decl += " = {";
diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp
index c305c05..5bdac89 100644
--- a/lib/Target/PTX/PTXInstrInfo.cpp
+++ b/lib/Target/PTX/PTXInstrInfo.cpp
@@ -288,6 +288,77 @@ InsertBranch(MachineBasicBlock &MBB,
   }
 }
 
+// Memory operand folding for spills
+void PTXInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MII,
+                                     unsigned SrcReg, bool isKill, int FrameIdx,
+                                       const TargetRegisterClass *RC,
+                                       const TargetRegisterInfo *TRI) const {
+  MachineInstr& MI = *MII;
+  DebugLoc DL = MI.getDebugLoc();
+
+  DEBUG(dbgs() << "storeRegToStackSlot: " << MI);
+
+  int OpCode;
+
+  // Select the appropriate opcode based on the register class
+  if (RC == PTX::RegI16RegisterClass) {
+    OpCode = PTX::STACKSTOREI16;
+  }  else if (RC == PTX::RegI32RegisterClass) {
+    OpCode = PTX::STACKSTOREI32;
+  }  else if (RC == PTX::RegI64RegisterClass) {
+    OpCode = PTX::STACKSTOREI32;
+  }  else if (RC == PTX::RegF32RegisterClass) {
+    OpCode = PTX::STACKSTOREF32;
+  }  else if (RC == PTX::RegF64RegisterClass) {
+    OpCode = PTX::STACKSTOREF64;
+  } else {
+    llvm_unreachable("Unknown PTX register class!");
+  }
+
+  // Build the store instruction (really a mov)
+  MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode));
+  MIB.addFrameIndex(FrameIdx);
+  MIB.addReg(SrcReg);
+
+  AddDefaultPredicate(MIB);
+}
+
+void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MII,
+                                        unsigned DestReg, int FrameIdx,
+                                        const TargetRegisterClass *RC,
+                                        const TargetRegisterInfo *TRI) const {
+  MachineInstr& MI = *MII;
+  DebugLoc DL = MI.getDebugLoc();
+
+  DEBUG(dbgs() << "loadRegToStackSlot: " << MI);
+
+  int OpCode;
+
+  // Select the appropriate opcode based on the register class
+  if (RC == PTX::RegI16RegisterClass) {
+    OpCode = PTX::STACKLOADI16;
+  } else if (RC == PTX::RegI32RegisterClass) {
+    OpCode = PTX::STACKLOADI32;
+  } else if (RC == PTX::RegI64RegisterClass) {
+    OpCode = PTX::STACKLOADI32;
+  } else if (RC == PTX::RegF32RegisterClass) {
+    OpCode = PTX::STACKLOADF32;
+  } else if (RC == PTX::RegF64RegisterClass) {
+    OpCode = PTX::STACKLOADF64;
+  } else {
+    llvm_unreachable("Unknown PTX register class!");
+  }
+
+  // Build the load instruction (really a mov)
+  MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode));
+  MIB.addReg(DestReg);
+  MIB.addFrameIndex(FrameIdx);
+
+  AddDefaultPredicate(MIB);
+}
+
 // static helper routines
 
 MachineSDNode *PTXInstrInfo::
diff --git a/lib/Target/PTX/PTXInstrInfo.h b/lib/Target/PTX/PTXInstrInfo.h
index a04be77..a2eea25 100644
--- a/lib/Target/PTX/PTXInstrInfo.h
+++ b/lib/Target/PTX/PTXInstrInfo.h
@@ -84,6 +84,29 @@ public:
                                 const SmallVectorImpl<MachineOperand> &Cond,
                                 DebugLoc DL) const;
 
+  // Memory operand folding for spills
+  // TODO: Implement this eventually and get rid of storeRegToStackSlot and
+  //       loadRegFromStackSlot.  Doing so will get rid of the "stack" registers
+  //       we currently use to spill, though I doubt the overall effect on ptxas
+  //       output will be large.  I have yet to see a case where ptxas is unable
+  //       to see through the "stack" register usage and hence generates
+  //       efficient code anyway.
+  // virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+  //                                             MachineInstr* MI,
+  //                                       const SmallVectorImpl<unsigned> &Ops,
+  //                                             int FrameIndex) const;
+
+  virtual void storeRegToStackSlot(MachineBasicBlock& MBB,
+                                   MachineBasicBlock::iterator MII,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass* RC,
+                                   const TargetRegisterInfo* TRI) const;
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MII,
+                                    unsigned DestReg, int FrameIdx,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
+
   // static helper routines
 
   static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td
index 71f7cc3..cc74944 100644
--- a/lib/Target/PTX/PTXInstrInfo.td
+++ b/lib/Target/PTX/PTXInstrInfo.td
@@ -584,24 +584,39 @@ defm REM : INT3<"rem", urem>;
 defm FNEG : PTX_FLOAT_2OP<"neg", fneg>;
 
 // Standard Binary Operations
-defm FADD : PTX_FLOAT_3OP<"add", fadd>;
-defm FSUB : PTX_FLOAT_3OP<"sub", fsub>;
-defm FMUL : PTX_FLOAT_3OP<"mul", fmul>;
-
-// TODO: Allow user selection of rounding modes for fdiv.
-// For division, we need to have f32 and f64 differently.
-// For f32, we just always use .approx since it is supported on all hardware
-// for PTX 1.4+, which is our minimum target.
-def FDIVrr32 : InstPTX<(outs RegF32:$d),
+defm FADD : PTX_FLOAT_3OP<"add.rn", fadd>;
+defm FSUB : PTX_FLOAT_3OP<"sub.rn", fsub>;
+defm FMUL : PTX_FLOAT_3OP<"mul.rn", fmul>;
+
+// For floating-point division:
+// SM_13+ defaults to .rn for f32 and f64,
+// SM10 must *not* provide a rounding
+
+// TODO: 
+//     - Allow user selection of rounding modes for fdiv
+//     - Add support for -prec-div=false (.approx)
+
+def FDIVrr32SM13 : InstPTX<(outs RegF32:$d),
+                       (ins RegF32:$a, RegF32:$b),
+                       "div.rn.f32\t$d, $a, $b",
+                       [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>,
+                   Requires<[SupportsSM13]>;
+def FDIVri32SM13 : InstPTX<(outs RegF32:$d),
+                       (ins RegF32:$a, f32imm:$b),
+                       "div.rn.f32\t$d, $a, $b",
+                       [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>,
+                   Requires<[SupportsSM13]>;
+def FDIVrr32SM10 : InstPTX<(outs RegF32:$d),
                        (ins RegF32:$a, RegF32:$b),
-                       "div.approx.f32\t$d, $a, $b",
-                       [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>;
-def FDIVri32 : InstPTX<(outs RegF32:$d),
+                       "div.f32\t$d, $a, $b",
+                       [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>,
+                   Requires<[DoesNotSupportSM13]>;
+def FDIVri32SM10 : InstPTX<(outs RegF32:$d),
                        (ins RegF32:$a, f32imm:$b),
-                       "div.approx.f32\t$d, $a, $b",
-                       [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>;
+                       "div.f32\t$d, $a, $b",
+                       [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>,
+                   Requires<[DoesNotSupportSM13]>;
 
-// For f64, we must specify a rounding for sm 1.3+ but *not* for sm 1.0.
 def FDIVrr64SM13 : InstPTX<(outs RegF64:$d),
                            (ins RegF64:$a, RegF64:$b),
                            "div.rn.f64\t$d, $a, $b",
@@ -681,6 +696,10 @@ defm SETPLTu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETULT, "lt">;
 defm SETPLEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETULE, "le">;
 defm SETPGTu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETUGT, "gt">;
 defm SETPGEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETUGE, "ge">;
+defm SETPLTs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETLT,  "lt">;
+defm SETPLEs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETLE,  "le">;
+defm SETPGTs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETGT,  "gt">;
+defm SETPGEs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETGE,  "ge">;
 
 // Compare u32
 
@@ -690,6 +709,10 @@ defm SETPLTu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETULT, "lt">;
 defm SETPLEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETULE, "le">;
 defm SETPGTu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETUGT, "gt">;
 defm SETPGEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETUGE, "ge">;
+defm SETPLTs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETLT,  "lt">;
+defm SETPLEs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETLE,  "le">;
+defm SETPGTs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETGT,  "gt">;
+defm SETPGEs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETGE,  "ge">;
 
 // Compare u64
 
@@ -699,6 +722,10 @@ defm SETPLTu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETULT, "lt">;
 defm SETPLEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETULE, "le">;
 defm SETPGTu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETUGT, "gt">;
 defm SETPGEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETUGE, "ge">;
+defm SETPLTs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETLT,  "lt">;
+defm SETPLEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETLE,  "le">;
+defm SETPGTs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGT,  "gt">;
+defm SETPGEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGE,  "ge">;
 
 // Compare f32
 
@@ -811,31 +838,35 @@ defm STs : PTX_ST_ALL<"st.shared", store_shared>;
 // TODO: Do something with st.param if/when it is needed.
 
 // Conversion to pred
-
+// PTX does not directly support converting to a predicate type, so we fake it
+// by performing a greater-than test between the value and zero.  This follows
+// the C convention that any non-zero value is equivalent to 'true'.
 def CVT_pred_u16
-  : InstPTX<(outs RegPred:$d), (ins RegI16:$a), "cvt.pred.u16\t$d, $a",
+  : InstPTX<(outs RegPred:$d), (ins RegI16:$a), "setp.gt.b16\t$d, $a, 0",
             [(set RegPred:$d, (trunc RegI16:$a))]>;
 
 def CVT_pred_u32
-  : InstPTX<(outs RegPred:$d), (ins RegI32:$a), "cvt.pred.u32\t$d, $a",
+  : InstPTX<(outs RegPred:$d), (ins RegI32:$a), "setp.gt.b32\t$d, $a, 0",
             [(set RegPred:$d, (trunc RegI32:$a))]>;
 
 def CVT_pred_u64
-  : InstPTX<(outs RegPred:$d), (ins RegI64:$a), "cvt.pred.u64\t$d, $a",
+  : InstPTX<(outs RegPred:$d), (ins RegI64:$a), "setp.gt.b64\t$d, $a, 0",
             [(set RegPred:$d, (trunc RegI64:$a))]>;
 
 def CVT_pred_f32
-  : InstPTX<(outs RegPred:$d), (ins RegF32:$a), "cvt.rni.pred.f32\t$d, $a",
+  : InstPTX<(outs RegPred:$d), (ins RegF32:$a), "setp.gt.b32\t$d, $a, 0",
             [(set RegPred:$d, (fp_to_uint RegF32:$a))]>;
 
 def CVT_pred_f64
-  : InstPTX<(outs RegPred:$d), (ins RegF64:$a), "cvt.rni.pred.f64\t$d, $a",
+  : InstPTX<(outs RegPred:$d), (ins RegF64:$a), "setp.gt.b64\t$d, $a, 0",
             [(set RegPred:$d, (fp_to_uint RegF64:$a))]>;
 
 // Conversion to u16
-
+// PTX does not directly support converting a predicate to a value, so we
+// use a select instruction to select either 0 or 1 (integer or fp) based
+// on the truth value of the predicate.
 def CVT_u16_pred
-  : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "cvt.u16.pred\t$d, $a",
+  : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a",
             [(set RegI16:$d, (zext RegPred:$a))]>;
 
 def CVT_u16_u32
@@ -847,17 +878,17 @@ def CVT_u16_u64
             [(set RegI16:$d, (trunc RegI64:$a))]>;
 
 def CVT_u16_f32
-  : InstPTX<(outs RegI16:$d), (ins RegF32:$a), "cvt.rni.u16.f32\t$d, $a",
+  : InstPTX<(outs RegI16:$d), (ins RegF32:$a), "cvt.rzi.u16.f32\t$d, $a",
             [(set RegI16:$d, (fp_to_uint RegF32:$a))]>;
 
 def CVT_u16_f64
-  : InstPTX<(outs RegI16:$d), (ins RegF64:$a), "cvt.rni.u16.f64\t$d, $a",
+  : InstPTX<(outs RegI16:$d), (ins RegF64:$a), "cvt.rzi.u16.f64\t$d, $a",
             [(set RegI16:$d, (fp_to_uint RegF64:$a))]>;
 
 // Conversion to u32
 
 def CVT_u32_pred
-  : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "cvt.u32.pred\t$d, $a",
+  : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a",
             [(set RegI32:$d, (zext RegPred:$a))]>;
 
 def CVT_u32_u16
@@ -869,17 +900,17 @@ def CVT_u32_u64
             [(set RegI32:$d, (trunc RegI64:$a))]>;
 
 def CVT_u32_f32
-  : InstPTX<(outs RegI32:$d), (ins RegF32:$a), "cvt.rni.u32.f32\t$d, $a",
+  : InstPTX<(outs RegI32:$d), (ins RegF32:$a), "cvt.rzi.u32.f32\t$d, $a",
             [(set RegI32:$d, (fp_to_uint RegF32:$a))]>;
 
 def CVT_u32_f64
-  : InstPTX<(outs RegI32:$d), (ins RegF64:$a), "cvt.rni.u32.f64\t$d, $a",
+  : InstPTX<(outs RegI32:$d), (ins RegF64:$a), "cvt.rzi.u32.f64\t$d, $a",
             [(set RegI32:$d, (fp_to_uint RegF64:$a))]>;
 
 // Conversion to u64
 
 def CVT_u64_pred
-  : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "cvt.u64.pred\t$d, $a",
+  : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a",
             [(set RegI64:$d, (zext RegPred:$a))]>;
 
 def CVT_u64_u16
@@ -891,17 +922,18 @@ def CVT_u64_u32
             [(set RegI64:$d, (zext RegI32:$a))]>;
 
 def CVT_u64_f32
-  : InstPTX<(outs RegI64:$d), (ins RegF32:$a), "cvt.rni.u64.f32\t$d, $a",
+  : InstPTX<(outs RegI64:$d), (ins RegF32:$a), "cvt.rzi.u64.f32\t$d, $a",
             [(set RegI64:$d, (fp_to_uint RegF32:$a))]>;
 
 def CVT_u64_f64
-  : InstPTX<(outs RegI64:$d), (ins RegF64:$a), "cvt.rni.u64.f64\t$d, $a",
+  : InstPTX<(outs RegI64:$d), (ins RegF64:$a), "cvt.rzi.u64.f64\t$d, $a",
             [(set RegI64:$d, (fp_to_uint RegF64:$a))]>;
 
 // Conversion to f32
 
 def CVT_f32_pred
-  : InstPTX<(outs RegF32:$d), (ins RegPred:$a), "cvt.rn.f32.pred\t$d, $a",
+  : InstPTX<(outs RegF32:$d), (ins RegPred:$a),
+            "selp.f32\t$d, 0F3F800000, 0F00000000, $a",  // 1.0
             [(set RegF32:$d, (uint_to_fp RegPred:$a))]>;
 
 def CVT_f32_u16
@@ -923,7 +955,8 @@ def CVT_f32_f64
 // Conversion to f64
 
 def CVT_f64_pred
-  : InstPTX<(outs RegF64:$d), (ins RegPred:$a), "cvt.rn.f64.pred\t$d, $a",
+  : InstPTX<(outs RegF64:$d), (ins RegPred:$a), 
+            "selp.f64\t$d, 0D3F80000000000000, 0D0000000000000000, $a",  // 1.0
             [(set RegF64:$d, (uint_to_fp RegPred:$a))]>;
 
 def CVT_f64_u16
@@ -962,6 +995,30 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
   def RET  : InstPTX<(outs), (ins), "ret",  [(PTXret)]>;
 }
 
+///===- Spill Instructions ------------------------------------------------===//
+// Special instructions used for stack spilling
+def STACKSTOREI16 : InstPTX<(outs), (ins i32imm:$d, RegI16:$a),
+                            "mov.u16\ts$d, $a", []>;
+def STACKSTOREI32 : InstPTX<(outs), (ins i32imm:$d, RegI32:$a),
+                            "mov.u32\ts$d, $a", []>;
+def STACKSTOREI64 : InstPTX<(outs), (ins i32imm:$d, RegI64:$a),
+                            "mov.u64\ts$d, $a", []>;
+def STACKSTOREF32 : InstPTX<(outs), (ins i32imm:$d, RegF32:$a),
+                            "mov.f32\ts$d, $a", []>;
+def STACKSTOREF64 : InstPTX<(outs), (ins i32imm:$d, RegF64:$a),
+                            "mov.f64\ts$d, $a", []>;
+
+def STACKLOADI16 : InstPTX<(outs), (ins RegI16:$d, i32imm:$a),
+                           "mov.u16\t$d, s$a", []>;
+def STACKLOADI32 : InstPTX<(outs), (ins RegI32:$d, i32imm:$a),
+                           "mov.u32\t$d, s$a", []>;
+def STACKLOADI64 : InstPTX<(outs), (ins RegI64:$d, i32imm:$a),
+                           "mov.u64\t$d, s$a", []>;
+def STACKLOADF32 : InstPTX<(outs), (ins RegF32:$d, i32imm:$a),
+                           "mov.f32\t$d, s$a", []>;
+def STACKLOADF64 : InstPTX<(outs), (ins RegF64:$d, i32imm:$a),
+                           "mov.f64\t$d, s$a", []>;
+
 ///===- Intrinsic Instructions --------------------------------------------===//
 
 include "PTXIntrinsicInstrInfo.td"
diff --git a/lib/Target/PTX/PTXRegisterInfo.cpp b/lib/Target/PTX/PTXRegisterInfo.cpp
index 0f3e7bc..b7c7ee5 100644
--- a/lib/Target/PTX/PTXRegisterInfo.cpp
+++ b/lib/Target/PTX/PTXRegisterInfo.cpp
@@ -13,7 +13,34 @@
 
 #include "PTX.h"
 #include "PTXRegisterInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
 #include "PTXGenRegisterInfo.inc"
+
+
+void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                          int SPAdj,
+                                          RegScavenger *RS) const {
+  unsigned Index;
+  MachineInstr& MI = *II;
+
+  Index = 0;
+  while (!MI.getOperand(Index).isFI()) {
+    ++Index;
+    assert(Index < MI.getNumOperands() &&
+           "Instr does not have a FrameIndex operand!");
+  }
+
+  int FrameIndex = MI.getOperand(Index).getIndex();
+
+  DEBUG(dbgs() << "eliminateFrameIndex: " << MI);
+  DEBUG(dbgs() << "- SPAdj: " << SPAdj << "\n");
+  DEBUG(dbgs() << "- FrameIndex: " << FrameIndex << "\n");
+
+  // This frame index is post stack slot re-use assignments
+  MI.getOperand(Index).ChangeToImmediate(FrameIndex);
+}
diff --git a/lib/Target/PTX/PTXRegisterInfo.h b/lib/Target/PTX/PTXRegisterInfo.h
index dc56352..223e965 100644
--- a/lib/Target/PTX/PTXRegisterInfo.h
+++ b/lib/Target/PTX/PTXRegisterInfo.h
@@ -38,11 +38,9 @@ struct PTXRegisterInfo : public PTXGenRegisterInfo {
     return Reserved; // reserve no regs
   }
 
-  virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+  virtual void eliminateFrameIndex(MachineBasicBlock::iterator II,
                                    int SPAdj,
-                                   RegScavenger *RS = NULL) const {
-    llvm_unreachable("PTX does not support general function call");
-  }
+                                   RegScavenger *RS = NULL) const;
 
   virtual unsigned getFrameRegister(const MachineFunction &MF) const {
     llvm_unreachable("PTX does not have a frame register");
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 511bb22..2176c02 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -610,6 +610,9 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
   unsigned Imm;
   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+  EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy();
+  bool isPPC64 = (PtrVT == MVT::i64);
+
   if (isInt32Immediate(N->getOperand(1), Imm)) {
     // We can codegen setcc op, imm very efficiently compared to a brcond.
     // Check for those cases here.
@@ -624,6 +627,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
         return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
       }
       case ISD::SETNE: {
+        if (isPPC64) break;
         SDValue AD =
           SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
                                          Op, getI32Imm(~0U)), 0);
@@ -647,6 +651,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
       switch (CC) {
       default: break;
       case ISD::SETEQ:
+        if (isPPC64) break;
         Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
                                             Op, getI32Imm(1)), 0);
         return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
@@ -655,6 +660,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
                                                              getI32Imm(0)), 0),
                                       Op.getValue(1));
       case ISD::SETNE: {
+        if (isPPC64) break;
         Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
         SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
                                             Op, getI32Imm(~0U));
@@ -996,22 +1002,25 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
   }
   case ISD::SELECT_CC: {
     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+    EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy();
+    bool isPPC64 = (PtrVT == MVT::i64);
 
     // Handle the setcc cases here.  select_cc lhs, 0, 1, 0, cc
-    if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
-      if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
-        if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
-          if (N1C->isNullValue() && N3C->isNullValue() &&
-              N2C->getZExtValue() == 1ULL && CC == ISD::SETNE &&
-              // FIXME: Implement this optzn for PPC64.
-              N->getValueType(0) == MVT::i32) {
-            SDNode *Tmp =
-              CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
-                                     N->getOperand(0), getI32Imm(~0U));
-            return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32,
-                                        SDValue(Tmp, 0), N->getOperand(0),
-                                        SDValue(Tmp, 1));
-          }
+    if (!isPPC64)
+      if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+        if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
+          if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
+            if (N1C->isNullValue() && N3C->isNullValue() &&
+                N2C->getZExtValue() == 1ULL && CC == ISD::SETNE &&
+                // FIXME: Implement this optzn for PPC64.
+                N->getValueType(0) == MVT::i32) {
+              SDNode *Tmp =
+                CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
+                                       N->getOperand(0), getI32Imm(~0U));
+              return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32,
+                                          SDValue(Tmp, 0), N->getOperand(0),
+                                          SDValue(Tmp, 1));
+            }
 
     SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
     unsigned BROpc = getPredicateForSetCC(CC);
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 55c15ec..c9b490b 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1870,7 +1870,11 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
       InVals.push_back(FIN);
       if (ObjSize==1 || ObjSize==2) {
         if (GPR_idx != Num_GPR_Regs) {
-          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+          unsigned VReg;
+          if (isPPC64)
+            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+          else
+            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
           SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
                                             MachinePointerInfo(),
@@ -1889,7 +1893,11 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
         // to memory.  ArgVal will be address of the beginning of
         // the object.
         if (GPR_idx != Num_GPR_Regs) {
-          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+          unsigned VReg;
+          if (isPPC64)
+            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+          else
+            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
           int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
@@ -4675,7 +4683,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
     .addReg(TmpReg).addReg(MaskReg);
   BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
     .addReg(Tmp3Reg).addReg(Tmp2Reg);
-  BuildMI(BB, dl, TII->get(PPC::STWCX))
+  BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
     .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
   BuildMI(BB, dl, TII->get(PPC::BCC))
     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 3374e9b..fd62a88 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -504,6 +504,7 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
   const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC;
   unsigned Reg = findScratchRegister(II, RS, RC, SPAdj);
   unsigned SrcReg = MI.getOperand(0).getReg();
+  bool LP64 = Subtarget.isPPC64();
 
   // We need to store the CR in the low 4-bits of the saved value. First, issue
   // an MFCRpsued to save all of the CRBits and, if needed, kill the SrcReg.
@@ -520,7 +521,7 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
       .addImm(0)
       .addImm(31);
 
-  addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::STW))
+  addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::STW8 : PPC::STW))
                     .addReg(Reg, getKillRegState(MI.getOperand(1).getImm())),
                     FrameIndex);
 
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 3343384..130a553 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -43,6 +43,7 @@ TargetLoweringObjectFile::TargetLoweringObjectFile() : Ctx(0) {
   StaticCtorSection = 0;
   StaticDtorSection = 0;
   LSDASection = 0;
+  CompactUnwindSection = 0;
 
   CommDirectiveSupportsAlignment = true;
   DwarfAbbrevSection = 0;
@@ -60,13 +61,14 @@ TargetLoweringObjectFile::TargetLoweringObjectFile() : Ctx(0) {
   
   IsFunctionEHFrameSymbolPrivate = true;
   SupportsWeakOmittedEHFrame = true;
+  SupportsCompactUnwindInfo = false;
 }
 
 TargetLoweringObjectFile::~TargetLoweringObjectFile() {
 }
 
 static bool isSuitableForBSS(const GlobalVariable *GV) {
-  Constant *C = GV->getInitializer();
+  const Constant *C = GV->getInitializer();
 
   // Must have zero initializer.
   if (!C->isNullValue())
@@ -168,7 +170,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
     return SectionKind::getBSS();
   }
 
-  Constant *C = GVar->getInitializer();
+  const Constant *C = GVar->getInitializer();
 
   // If the global is marked constant, we can put it into a mergable section,
   // a mergable string section, or general .data if it contains relocations.
diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp
index e36e136..bae3343 100644
--- a/lib/Target/TargetRegisterInfo.cpp
+++ b/lib/Target/TargetRegisterInfo.cpp
@@ -79,9 +79,9 @@ TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const {
 /// registers for the specific register class.
 static void getAllocatableSetForRC(const MachineFunction &MF,
                                    const TargetRegisterClass *RC, BitVector &R){
-  for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
-         E = RC->allocation_order_end(MF); I != E; ++I)
-    R.set(*I);
+  ArrayRef<unsigned> Order = RC->getRawAllocationOrder(MF);
+  for (unsigned i = 0; i != Order.size(); ++i)
+    R.set(Order[i]);
 }
 
 BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 1cdf2b6..6cd03d0 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -12186,8 +12186,8 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
   return SDValue();
 }
 
-static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, const X86TargetLowering *XTLI) {
-  DebugLoc dl = N->getDebugLoc();
+static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
+                                        const X86TargetLowering *XTLI) {
   SDValue Op0 = N->getOperand(0);
   // Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
   // a 32-bit target where SSE doesn't support i64->FP operations.
@@ -12198,7 +12198,8 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, const X86T
         ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
         !XTLI->getSubtarget()->is64Bit() &&
         !DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
-      SDValue FILDChain = XTLI->BuildFILD(SDValue(N, 0), Ld->getValueType(0), Ld->getChain(), Op0, DAG);
+      SDValue FILDChain = XTLI->BuildFILD(SDValue(N, 0), Ld->getValueType(0),
+                                          Ld->getChain(), Op0, DAG);
       DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1));
       return FILDChain;
     }
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index b3237d5..aebf8dc 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -2082,7 +2082,8 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
   const MachineFunction &MF = *MBB.getParent();
   assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() &&
          "Stack slot too small for store");
-  bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
+  bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= 16) ||
+    RI.canRealignStack(MF);
   unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
   DebugLoc DL = MBB.findDebugLoc(MI);
   addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
@@ -2114,7 +2115,8 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                         const TargetRegisterClass *RC,
                                         const TargetRegisterInfo *TRI) const {
   const MachineFunction &MF = *MBB.getParent();
-  bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
+  bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= 16) ||
+    RI.canRealignStack(MF);
   unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
   DebugLoc DL = MBB.findDebugLoc(MI);
   addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 7774057..8377c3a 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -2968,6 +2968,22 @@ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
             (MOVZDI2PDIrm addr:$src)>;
 }
 
+// These are the correct encodings of the instructions so that we know how to
+// read correct assembly, even though we continue to emit the wrong ones for
+// compatibility with Darwin's buggy assembler.
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+                (MOV64toPQIrr VR128:$dst, GR64:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+                (MOV64toSDrr FR64:$dst, GR64:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+                (MOVPQIto64rr GR64:$dst, VR128:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+                (MOVSDto64rr GR64:$dst, FR64:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+                (VMOVZQI2PQIrr VR128:$dst, GR64:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+                (MOVZQI2PQIrr VR128:$dst, GR64:$src), 0>;
+
 //===---------------------------------------------------------------------===//
 // SSE2 - Move Quadword
 //===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 1ad6203..fa3e3f8 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -60,7 +60,6 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
   const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
   Is64Bit = Subtarget->is64Bit();
   IsWin64 = Subtarget->isTargetWin64();
-  StackAlign = TM.getFrameLowering()->getStackAlignment();
 
   if (Is64Bit) {
     SlotSize = 8;
@@ -517,13 +516,20 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 
   // Reserve the registers that only exist in 64-bit mode.
   if (!Is64Bit) {
+    // These 8-bit registers are part of the x86-64 extension even though their
+    // super-registers are old 32-bits.
+    Reserved.set(X86::SIL);
+    Reserved.set(X86::DIL);
+    Reserved.set(X86::BPL);
+    Reserved.set(X86::SPL);
+
     for (unsigned n = 0; n != 8; ++n) {
+      // R8, R9, ...
       const unsigned GPR64[] = {
         X86::R8,  X86::R9,  X86::R10, X86::R11,
         X86::R12, X86::R13, X86::R14, X86::R15
       };
-      for (const unsigned *AI = getOverlaps(GPR64[n]); unsigned Reg = *AI;
-           ++AI)
+      for (const unsigned *AI = getOverlaps(GPR64[n]); unsigned Reg = *AI; ++AI)
         Reserved.set(Reg);
 
       // XMM8, XMM9, ...
@@ -550,6 +556,7 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
 bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   const Function *F = MF.getFunction();
+  unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
   bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
                                F->hasFnAttr(Attribute::StackAlignment));
 
@@ -625,6 +632,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
     // We need to keep the stack aligned properly.  To do this, we round the
     // amount of space needed for the outgoing arguments up to the next
     // alignment boundary.
+    unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
     Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
 
     MachineInstr *New = 0;
@@ -920,10 +928,10 @@ namespace {
     virtual bool runOnMachineFunction(MachineFunction &MF) {
       const X86TargetMachine *TM =
         static_cast<const X86TargetMachine *>(&MF.getTarget());
-      const X86RegisterInfo *X86RI = TM->getRegisterInfo();
+      const TargetFrameLowering *TFI = TM->getFrameLowering();
       MachineRegisterInfo &RI = MF.getRegInfo();
       X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
-      unsigned StackAlignment = X86RI->getStackAlignment();
+      unsigned StackAlignment = TFI->getStackAlignment();
 
       // Be over-conservative: scan over all vreg defs and find whether vector
       // registers are used. If yes, there is a possibility that vector register
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index dd3d3dc..9fd6ed5 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -56,10 +56,6 @@ private:
   ///
   unsigned SlotSize;
 
-  /// StackAlign - Default stack alignment.
-  ///
-  unsigned StackAlign;
-
   /// StackPtr - X86 physical register used as stack ptr.
   ///
   unsigned StackPtr;
@@ -75,8 +71,6 @@ public:
   /// register identifier.
   static unsigned getX86RegNum(unsigned RegNo);
 
-  unsigned getStackAlignment() const { return StackAlign; }
-
   /// getDwarfRegNum - allows modification of X86GenRegisterInfo::getDwarfRegNum
   /// (created by TableGen) for target dependencies.
   int getDwarfRegNum(unsigned RegNum, bool isEH) const;
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 590b38b..14d6d64 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -281,44 +281,9 @@ let Namespace = "X86" in {
 def GR8 : RegisterClass<"X86", [i8],  8,
                         (add AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL,
                              R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B)> {
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    static const unsigned X86_GR8_AO_64[] = {
-      X86::AL,   X86::CL,   X86::DL,   X86::SIL, X86::DIL,
-      X86::R8B,  X86::R9B,  X86::R10B, X86::R11B,
-      X86::BL,   X86::R14B, X86::R15B, X86::R12B, X86::R13B, X86::BPL
-    };
-
-    GR8Class::iterator
-    GR8Class::allocation_order_begin(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
-      if (Subtarget.is64Bit())
-        return X86_GR8_AO_64;
-      else
-        return begin();
-    }
-
-    GR8Class::iterator
-    GR8Class::allocation_order_end(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const TargetFrameLowering *TFI = TM.getFrameLowering();
-      const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
-      const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
-      // Does the function dedicate RBP / EBP to being a frame ptr?
-      if (!Subtarget.is64Bit())
-        // In 32-mode, none of the 8-bit registers aliases EBP or ESP.
-        return begin() + 8;
-      else if (TFI->hasFP(MF) || MFI->getReserveFP())
-        // If so, don't allocate SPL or BPL.
-        return array_endof(X86_GR8_AO_64) - 1;
-      else
-        // If not, just don't allocate SPL.
-        return array_endof(X86_GR8_AO_64);
-    }
+  let AltOrders = [(sub GR8, AH, BH, CH, DH)];
+  let AltOrderSelect = [{
+    return MF.getTarget().getSubtarget<X86Subtarget>().is64Bit();
   }];
 }
 
@@ -394,35 +359,9 @@ def GR64_TCW64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX,
 // GR8_NOREX - GR8 registers which do not require a REX prefix.
 def GR8_NOREX : RegisterClass<"X86", [i8], 8,
                               (add AL, CL, DL, AH, CH, DH, BL, BH)> {
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    // In 64-bit mode, it's not safe to blindly allocate H registers.
-    static const unsigned X86_GR8_NOREX_AO_64[] = {
-      X86::AL, X86::CL, X86::DL, X86::BL
-    };
-
-    GR8_NOREXClass::iterator
-    GR8_NOREXClass::allocation_order_begin(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
-      if (Subtarget.is64Bit())
-        return X86_GR8_NOREX_AO_64;
-      else
-        return begin();
-    }
-
-    GR8_NOREXClass::iterator
-    GR8_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
-      if (Subtarget.is64Bit())
-        return array_endof(X86_GR8_NOREX_AO_64);
-      else
-        return end();
-    }
+  let AltOrders = [(sub GR8_NOREX, AH, BH, CH, DH)];
+  let AltOrderSelect = [{
+    return MF.getTarget().getSubtarget<X86Subtarget>().is64Bit();
   }];
 }
 // GR16_NOREX - GR16 registers which do not require a REX prefix.
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index 8f06dd3..6df8ce0 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -114,7 +114,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
 
   
   MCSymbol *GVSym = Mang->getSymbol(GV);
-  Constant *C = GV->getInitializer();
+  const Constant *C = GV->getInitializer();
   unsigned Align = (unsigned)TD->getPreferredTypeAlignmentShift(C->getType());
   
   // Mark the start of the global
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index cdf7b76..4ac721d 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1999,9 +1999,13 @@ static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) {
 static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
                                           const std::vector<Function*> &Ctors) {
   // If we made a change, reassemble the initializer list.
-  std::vector<Constant*> CSVals;
-  CSVals.push_back(ConstantInt::get(Type::getInt32Ty(GCL->getContext()),65535));
-  CSVals.push_back(0);
+  Constant *CSVals[2];
+  CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), 65535);
+  CSVals[1] = 0;
+
+  const StructType *StructTy =
+    cast <StructType>(
+    cast<ArrayType>(GCL->getType()->getElementType())->getElementType());
 
   // Create the new init list.
   std::vector<Constant*> CAList;
@@ -2016,12 +2020,10 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
       CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()),
                                    0x7fffffff);
     }
-    CAList.push_back(ConstantStruct::get(GCL->getContext(), CSVals, false));
+    CAList.push_back(ConstantStruct::get(StructTy, CSVals));
   }
 
   // Create the array initializer.
-  const Type *StructTy =
-      cast<ArrayType>(GCL->getType()->getElementType())->getElementType();
   Constant *CA = ConstantArray::get(ArrayType::get(StructTy,
                                                    CAList.size()), CAList);
 
@@ -2218,42 +2220,40 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
     Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
 
     // Return the modified struct.
-    return ConstantStruct::get(Init->getContext(), &Elts[0], Elts.size(),
-                               STy->isPacked());
-  } else {
-    ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
-    const SequentialType *InitTy = cast<SequentialType>(Init->getType());
-
-    uint64_t NumElts;
-    if (const ArrayType *ATy = dyn_cast<ArrayType>(InitTy))
-      NumElts = ATy->getNumElements();
-    else
-      NumElts = cast<VectorType>(InitTy)->getNumElements();
-
+    return ConstantStruct::get(STy, Elts);
+  }
+  
+  ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
+  const SequentialType *InitTy = cast<SequentialType>(Init->getType());
 
-    // Break up the array into elements.
-    if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) {
-      for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i)
-        Elts.push_back(cast<Constant>(*i));
-    } else if (ConstantVector *CV = dyn_cast<ConstantVector>(Init)) {
-      for (User::op_iterator i = CV->op_begin(), e = CV->op_end(); i != e; ++i)
-        Elts.push_back(cast<Constant>(*i));
-    } else if (isa<ConstantAggregateZero>(Init)) {
-      Elts.assign(NumElts, Constant::getNullValue(InitTy->getElementType()));
-    } else {
-      assert(isa<UndefValue>(Init) && "This code is out of sync with "
-             " ConstantFoldLoadThroughGEPConstantExpr");
-      Elts.assign(NumElts, UndefValue::get(InitTy->getElementType()));
-    }
+  uint64_t NumElts;
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(InitTy))
+    NumElts = ATy->getNumElements();
+  else
+    NumElts = cast<VectorType>(InitTy)->getNumElements();
+
+  // Break up the array into elements.
+  if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) {
+    for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i)
+      Elts.push_back(cast<Constant>(*i));
+  } else if (ConstantVector *CV = dyn_cast<ConstantVector>(Init)) {
+    for (User::op_iterator i = CV->op_begin(), e = CV->op_end(); i != e; ++i)
+      Elts.push_back(cast<Constant>(*i));
+  } else if (isa<ConstantAggregateZero>(Init)) {
+    Elts.assign(NumElts, Constant::getNullValue(InitTy->getElementType()));
+  } else {
+    assert(isa<UndefValue>(Init) && "This code is out of sync with "
+           " ConstantFoldLoadThroughGEPConstantExpr");
+    Elts.assign(NumElts, UndefValue::get(InitTy->getElementType()));
+  }
 
-    assert(CI->getZExtValue() < NumElts);
-    Elts[CI->getZExtValue()] =
-      EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
+  assert(CI->getZExtValue() < NumElts);
+  Elts[CI->getZExtValue()] =
+    EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
 
-    if (Init->getType()->isArrayTy())
-      return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
-    return ConstantVector::get(Elts);
-  }
+  if (Init->getType()->isArrayTy())
+    return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
+  return ConstantVector::get(Elts);
 }
 
 /// CommitValueTo - We have decided that Addr (which satisfies the predicate
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index ef67701..455cd0a 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -412,7 +412,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
           UndefValue::get(LHS->getType()),
           ConstantInt::getTrue(II->getContext())
         };
-        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+        const StructType *ST = cast<StructType>(II->getType());
+        Constant *Struct = ConstantStruct::get(ST, V);
         return InsertValueInst::Create(Struct, Add, 0);
       }
 
@@ -425,7 +426,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
           UndefValue::get(LHS->getType()),
           ConstantInt::getFalse(II->getContext())
         };
-        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+        const StructType *ST = cast<StructType>(II->getType());
+        Constant *Struct = ConstantStruct::get(ST, V);
         return InsertValueInst::Create(Struct, Add, 0);
       }
     }
@@ -452,7 +454,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
           UndefValue::get(II->getArgOperand(0)->getType()),
           ConstantInt::getFalse(II->getContext())
         };
-        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+        Constant *Struct =
+          ConstantStruct::get(cast<StructType>(II->getType()), V);
         return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
       }
     }
@@ -472,7 +475,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
           UndefValue::get(II->getArgOperand(0)->getType()),
           ConstantInt::getFalse(II->getContext())
         };
-        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+        Constant *Struct = 
+          ConstantStruct::get(cast<StructType>(II->getType()), V);
         return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
       }
     }
@@ -503,7 +507,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
         UndefValue::get(LHS->getType()),
         Builder->getFalse()
       };
-      Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+      Constant *Struct = ConstantStruct::get(cast<StructType>(II->getType()),V);
       return InsertValueInst::Create(Struct, Mul, 0);
     }
   } // FALL THROUGH
@@ -532,7 +536,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
           UndefValue::get(II->getArgOperand(0)->getType()),
           ConstantInt::getFalse(II->getContext())
         };
-        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+        Constant *Struct = 
+          ConstantStruct::get(cast<StructType>(II->getType()), V);
         return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
       }
     }
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index b902213..07d69e8 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -561,11 +561,11 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable(
     Edge += Successors;
   }
 
+  ArrayRef<Constant*> V(&EdgeTable[0], Succs.size() * Preds.size());
   GlobalVariable *EdgeTableGV =
       new GlobalVariable(
           *M, EdgeTableTy, true, GlobalValue::InternalLinkage,
-          ConstantArray::get(EdgeTableTy,
-                             &EdgeTable[0], Succs.size() * Preds.size()),
+          ConstantArray::get(EdgeTableTy, V),
           "__llvm_gcda_edge_table");
   EdgeTableGV->setUnnamedAddr(true);
   return EdgeTableGV;
diff --git a/lib/Transforms/Instrumentation/PathProfiling.cpp b/lib/Transforms/Instrumentation/PathProfiling.cpp
index 182a43d..1e5e3f6 100644
--- a/lib/Transforms/Instrumentation/PathProfiling.cpp
+++ b/lib/Transforms/Instrumentation/PathProfiling.cpp
@@ -376,7 +376,7 @@ namespace llvm {
   public:
     static const StructType *get(LLVMContext& C) {
       return( StructType::get(
-                C, TypeBuilder<types::i<32>, xcompile>::get(C), // type
+                TypeBuilder<types::i<32>, xcompile>::get(C), // type
                 TypeBuilder<types::i<32>, xcompile>::get(C), // array size
                 TypeBuilder<types::i<8>*, xcompile>::get(C), // array/hash ptr
                 NULL));
diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
index 7435bc3..4224ee3 100644
--- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp
+++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
@@ -164,7 +164,8 @@ void llvm::InsertProfilingShutdownCall(Function *Callee, Module *Mod) {
   GlobalVariable *GlobalDtors = new GlobalVariable(
       *Mod, ArrayType::get(GlobalDtorElemTy, 1), false,
       GlobalValue::AppendingLinkage, NULL, "llvm.global_dtors");
-  dtors.push_back(ConstantStruct::get(Mod->getContext(), Elem, 2, false));
+                                    
+  dtors.push_back(ConstantStruct::get(GlobalDtorElemTy, Elem));
   GlobalDtors->setInitializer(ConstantArray::get(
       cast<ArrayType>(GlobalDtors->getType()->getElementType()), dtors));
 }
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 0e1e6f3..759e681 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -1190,8 +1190,10 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
     // escaping uses to any values that are operands to these PHIs.
     for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) {
       PHINode *P = NewPHIs[i];
-      for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii)
-        AA->addEscapingUse(P->getOperandUse(2*ii));
+      for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii) {
+        unsigned jj = PHINode::getOperandNumForIncomingValue(ii);
+        AA->addEscapingUse(P->getOperandUse(jj));
+      }
     }
   }
 
@@ -2149,8 +2151,11 @@ bool GVN::performPRE(Function &F) {
         // Because we have added a PHI-use of the pointer value, it has now
         // "escaped" from alias analysis' perspective.  We need to inform
         // AA of this.
-        for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; ++ii)
-          VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(2*ii));
+        for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee;
+             ++ii) {
+          unsigned jj = PHINode::getOperandNumForIncomingValue(ii);
+          VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj));
+        }
         
         if (MD)
           MD->invalidateCachedPointerInfo(Phi);
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 04ee7c8..1d79339 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -62,14 +62,15 @@
 #include "llvm/ADT/STLExtras.h"
 using namespace llvm;
 
-STATISTIC(NumRemoved , "Number of aux indvars removed");
-STATISTIC(NumWidened , "Number of indvars widened");
-STATISTIC(NumInserted, "Number of canonical indvars added");
-STATISTIC(NumReplaced, "Number of exit values replaced");
-STATISTIC(NumLFTR    , "Number of loop exit tests replaced");
-STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
-STATISTIC(NumElimRem , "Number of IV remainder operations eliminated");
-STATISTIC(NumElimCmp , "Number of IV comparisons eliminated");
+STATISTIC(NumRemoved     , "Number of aux indvars removed");
+STATISTIC(NumWidened     , "Number of indvars widened");
+STATISTIC(NumInserted    , "Number of canonical indvars added");
+STATISTIC(NumReplaced    , "Number of exit values replaced");
+STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
+STATISTIC(NumElimIdentity, "Number of IV identities eliminated");
+STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
+STATISTIC(NumElimRem     , "Number of IV remainder operations eliminated");
+STATISTIC(NumElimCmp     , "Number of IV comparisons eliminated");
 
 // DisableIVRewrite mode currently affects IVUsers, so is defined in libAnalysis
 // and referenced here.
@@ -84,12 +85,22 @@ namespace {
     ScalarEvolution *SE;
     DominatorTree   *DT;
     TargetData      *TD;
+
+    PHINode         *CurrIV; // Current IV being simplified.
+
+     // Instructions processed by SimplifyIVUsers for CurrIV.
+    SmallPtrSet<Instruction*,16> Simplified;
+
+    // Use-def pairs if IVUsers waiting to be processed for CurrIV.
+    SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers;
+
     SmallVector<WeakVH, 16> DeadInsts;
     bool Changed;
   public:
 
     static char ID; // Pass identification, replacement for typeid
-    IndVarSimplify() : LoopPass(ID), IU(0), LI(0), SE(0), DT(0), TD(0) {
+    IndVarSimplify() : LoopPass(ID), IU(0), LI(0), SE(0), DT(0), TD(0),
+                       CurrIV(0), Changed(false) {
       initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
     }
 
@@ -105,7 +116,8 @@ namespace {
       AU.addPreserved<ScalarEvolution>();
       AU.addPreservedID(LoopSimplifyID);
       AU.addPreservedID(LCSSAID);
-      AU.addPreserved<IVUsers>();
+      if (!DisableIVRewrite)
+        AU.addPreserved<IVUsers>();
       AU.setPreservesCFG();
     }
 
@@ -113,11 +125,15 @@ namespace {
     bool isValidRewrite(Value *FromVal, Value *ToVal);
 
     void SimplifyIVUsers(SCEVExpander &Rewriter);
+    void SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter);
+
+    bool EliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
     void EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
     void EliminateIVRemainder(BinaryOperator *Rem,
                               Value *IVOperand,
-                              bool IsSigned,
-                              PHINode *IVPhi);
+                              bool IsSigned);
+    void pushIVUsers(Instruction *Def);
+    bool isSimpleIVUser(Instruction *I, const Loop *L);
     void RewriteNonIntegerIVs(Loop *L);
 
     ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
@@ -483,6 +499,36 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
     SE->forgetLoop(L);
 }
 
+/// SimplifyIVUsers - Iteratively perform simplification on IVUsers within this
+/// loop. IVUsers is treated as a worklist. Each successive simplification may
+/// push more users which may themselves be candidates for simplification.
+///
+/// This is the old approach to IV simplification to be replaced by
+/// SimplifyIVUsersNoRewrite.
+///
+void IndVarSimplify::SimplifyIVUsers(SCEVExpander &Rewriter) {
+  // Each round of simplification involves a round of eliminating operations
+  // followed by a round of widening IVs. A single IVUsers worklist is used
+  // across all rounds. The inner loop advances the user. If widening exposes
+  // more uses, then another pass through the outer loop is triggered.
+  for (IVUsers::iterator I = IU->begin(); I != IU->end(); ++I) {
+    Instruction *UseInst = I->getUser();
+    Value *IVOperand = I->getOperandValToReplace();
+
+    if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
+      EliminateIVComparison(ICmp, IVOperand);
+      continue;
+    }
+    if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
+      bool IsSigned = Rem->getOpcode() == Instruction::SRem;
+      if (IsSigned || Rem->getOpcode() == Instruction::URem) {
+        EliminateIVRemainder(Rem, IVOperand, IsSigned);
+        continue;
+      }
+    }
+  }
+}
+
 namespace {
   // Collect information about induction variables that are used by sign/zero
   // extend operations. This information is recorded by CollectExtend and
@@ -493,33 +539,30 @@ namespace {
 
     WideIVInfo() : WidestNativeType(0), IsSigned(false) {}
   };
-  typedef std::map<PHINode *, WideIVInfo> WideIVMap;
 }
 
 /// CollectExtend - Update information about the induction variable that is
 /// extended by this sign or zero extend operation. This is used to determine
 /// the final width of the IV before actually widening it.
-static void CollectExtend(CastInst *Cast, PHINode *Phi, bool IsSigned,
-                          WideIVMap &IVMap, ScalarEvolution *SE,
-                          const TargetData *TD) {
+static void CollectExtend(CastInst *Cast, bool IsSigned, WideIVInfo &WI,
+                          ScalarEvolution *SE, const TargetData *TD) {
   const Type *Ty = Cast->getType();
   uint64_t Width = SE->getTypeSizeInBits(Ty);
   if (TD && !TD->isLegalInteger(Width))
     return;
 
-  WideIVInfo &IVInfo = IVMap[Phi];
-  if (!IVInfo.WidestNativeType) {
-    IVInfo.WidestNativeType = SE->getEffectiveSCEVType(Ty);
-    IVInfo.IsSigned = IsSigned;
+  if (!WI.WidestNativeType) {
+    WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
+    WI.IsSigned = IsSigned;
     return;
   }
 
   // We extend the IV to satisfy the sign of its first user, arbitrarily.
-  if (IVInfo.IsSigned != IsSigned)
+  if (WI.IsSigned != IsSigned)
     return;
 
-  if (Width > SE->getTypeSizeInBits(IVInfo.WidestNativeType))
-    IVInfo.WidestNativeType = SE->getEffectiveSCEVType(Ty);
+  if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
+    WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
 }
 
 namespace {
@@ -529,43 +572,44 @@ namespace {
 /// inserting truncs whenever we stop propagating the type.
 ///
 class WidenIV {
+  // Parameters
   PHINode *OrigPhi;
   const Type *WideType;
   bool IsSigned;
 
-  IVUsers *IU;
-  LoopInfo *LI;
-  Loop *L;
+  // Context
+  LoopInfo        *LI;
+  Loop            *L;
   ScalarEvolution *SE;
-  DominatorTree *DT;
-  SmallVectorImpl<WeakVH> &DeadInsts;
+  DominatorTree   *DT;
 
+  // Result
   PHINode *WidePhi;
   Instruction *WideInc;
   const SCEV *WideIncExpr;
+  SmallVectorImpl<WeakVH> &DeadInsts;
 
-  SmallPtrSet<Instruction*,16> Processed;
+  SmallPtrSet<Instruction*,16> Widened;
 
 public:
-  WidenIV(PHINode *PN, const WideIVInfo &IVInfo, IVUsers *IUsers,
-          LoopInfo *LInfo, ScalarEvolution *SEv, DominatorTree *DTree,
+  WidenIV(PHINode *PN, const WideIVInfo &WI, LoopInfo *LInfo,
+          ScalarEvolution *SEv, DominatorTree *DTree,
           SmallVectorImpl<WeakVH> &DI) :
     OrigPhi(PN),
-    WideType(IVInfo.WidestNativeType),
-    IsSigned(IVInfo.IsSigned),
-    IU(IUsers),
+    WideType(WI.WidestNativeType),
+    IsSigned(WI.IsSigned),
     LI(LInfo),
     L(LI->getLoopFor(OrigPhi->getParent())),
     SE(SEv),
     DT(DTree),
-    DeadInsts(DI),
     WidePhi(0),
     WideInc(0),
-    WideIncExpr(0) {
+    WideIncExpr(0),
+    DeadInsts(DI) {
     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
   }
 
-  bool CreateWideIV(SCEVExpander &Rewriter);
+  PHINode *CreateWideIV(SCEVExpander &Rewriter);
 
 protected:
   Instruction *CloneIVUser(Instruction *NarrowUse,
@@ -580,52 +624,6 @@ protected:
 };
 } // anonymous namespace
 
-/// SimplifyIVUsers - Iteratively perform simplification on IVUsers within this
-/// loop. IVUsers is treated as a worklist. Each successive simplification may
-/// push more users which may themselves be candidates for simplification.
-///
-void IndVarSimplify::SimplifyIVUsers(SCEVExpander &Rewriter) {
-  WideIVMap IVMap;
-
-  // Each round of simplification involves a round of eliminating operations
-  // followed by a round of widening IVs. A single IVUsers worklist is used
-  // across all rounds. The inner loop advances the user. If widening exposes
-  // more uses, then another pass through the outer loop is triggered.
-  for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E;) {
-    for(; I != E; ++I) {
-      Instruction *UseInst = I->getUser();
-      Value *IVOperand = I->getOperandValToReplace();
-
-      if (DisableIVRewrite) {
-        if (CastInst *Cast = dyn_cast<CastInst>(UseInst)) {
-          bool IsSigned = Cast->getOpcode() == Instruction::SExt;
-          if (IsSigned || Cast->getOpcode() == Instruction::ZExt) {
-            CollectExtend(Cast, I->getPhi(), IsSigned, IVMap, SE, TD);
-            continue;
-          }
-        }
-      }
-      if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
-        EliminateIVComparison(ICmp, IVOperand);
-        continue;
-      }
-      if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
-        bool IsSigned = Rem->getOpcode() == Instruction::SRem;
-        if (IsSigned || Rem->getOpcode() == Instruction::URem) {
-          EliminateIVRemainder(Rem, IVOperand, IsSigned, I->getPhi());
-          continue;
-        }
-      }
-    }
-    for (WideIVMap::const_iterator I = IVMap.begin(), E = IVMap.end();
-         I != E; ++I) {
-      WidenIV Widener(I->first, I->second, IU, LI, SE, DT, DeadInsts);
-      if (Widener.CreateWideIV(Rewriter))
-        Changed = true;
-    }
-  }
-}
-
 static Value *getExtend( Value *NarrowOper, const Type *WideType,
                                bool IsSigned, IRBuilder<> &Builder) {
   return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
@@ -744,7 +742,7 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse,
     return 0;
 
   // Handle data flow merges and bizarre phi cycles.
-  if (!Processed.insert(NarrowUse))
+  if (!Widened.insert(NarrowUse))
     return 0;
 
   // Our raison d'etre! Eliminate sign and zero extension.
@@ -775,9 +773,11 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse,
       NarrowUse->replaceAllUsesWith(NewDef);
       DeadInsts.push_back(NarrowUse);
     }
-    // Now that the extend is gone, expose it's uses to IVUsers for potential
-    // further simplification within SimplifyIVUsers.
-    IU->AddUsersIfInteresting(WideDef, WidePhi);
+    // Now that the extend is gone, we want to expose it's uses for potential
+    // further simplification. We don't need to directly inform SimplifyIVUsers
+    // of the new users, because their parent IV will be processed later as a
+    // new loop phi. If we preserved IVUsers analysis, we would also want to
+    // push the uses of WideDef here.
 
     // No further widening is needed. The deceased [sz]ext had done it for us.
     return 0;
@@ -807,7 +807,7 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse,
   // outside the loop without overflow. This suggests that the wide use
   // evaluates to the same expression as the extended narrow use, but doesn't
   // absolutely guarantee it. Hence the following failsafe check. In rare cases
-  // where it fails, we simple throw away the newly created wide use.
+  // where it fails, we simply throw away the newly created wide use.
   if (WideAddRec != SE->getSCEV(WideUse)) {
     DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
           << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n");
@@ -822,18 +822,18 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse,
 /// CreateWideIV - Process a single induction variable. First use the
 /// SCEVExpander to create a wide induction variable that evaluates to the same
 /// recurrence as the original narrow IV. Then use a worklist to forward
-/// traverse the narrow IV's def-use chain. After WidenIVUse as processed all
+/// traverse the narrow IV's def-use chain. After WidenIVUse has processed all
 /// interesting IV users, the narrow IV will be isolated for removal by
 /// DeleteDeadPHIs.
 ///
 /// It would be simpler to delete uses as they are processed, but we must avoid
 /// invalidating SCEV expressions.
 ///
-bool WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
+PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
   // Is this phi an induction variable?
   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
   if (!AddRec)
-    return false;
+    return NULL;
 
   // Widen the induction variable expression.
   const SCEV *WideIVExpr = IsSigned ?
@@ -846,9 +846,9 @@ bool WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
   // Can the IV be extended outside the loop without overflow?
   AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
   if (!AddRec || AddRec->getLoop() != L)
-    return false;
+    return NULL;
 
-  // An AddRec must have loop-invariant operands. Since this AddRec it
+  // An AddRec must have loop-invariant operands. Since this AddRec is
   // materialized by a loop header phi, the expression cannot have any post-loop
   // operands, so they must dominate the loop header.
   assert(SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
@@ -876,7 +876,7 @@ bool WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
   ++NumWidened;
 
   // Traverse the def-use chain using a worklist starting at the original IV.
-  assert(Processed.empty() && "expect initial state" );
+  assert(Widened.empty() && "expect initial state" );
 
   // Each worklist entry has a Narrow def-use link and Wide def.
   SmallVector<std::pair<Use *, Instruction *>, 8> NarrowIVUsers;
@@ -906,7 +906,7 @@ bool WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
     if (NarrowDef->use_empty())
       DeadInsts.push_back(NarrowDef);
   }
-  return true;
+  return WidePhi;
 }
 
 void IndVarSimplify::EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
@@ -945,8 +945,7 @@ void IndVarSimplify::EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
 
 void IndVarSimplify::EliminateIVRemainder(BinaryOperator *Rem,
                                           Value *IVOperand,
-                                          bool IsSigned,
-                                          PHINode *IVPhi) {
+                                          bool IsSigned) {
   // We're only interested in the case where we know something about
   // the numerator.
   if (IVOperand != Rem->getOperand(0))
@@ -989,15 +988,144 @@ void IndVarSimplify::EliminateIVRemainder(BinaryOperator *Rem,
   }
 
   // Inform IVUsers about the new users.
-  if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0)))
-    IU->AddUsersIfInteresting(I, IVPhi);
-
+  if (IU) {
+    if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0)))
+      IU->AddUsersIfInteresting(I);
+  }
   DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
   ++NumElimRem;
   Changed = true;
   DeadInsts.push_back(Rem);
 }
 
+/// EliminateIVUser - Eliminate an operation that consumes a simple IV and has
+/// no observable side-effect given the range of IV values.
+bool IndVarSimplify::EliminateIVUser(Instruction *UseInst,
+                                     Instruction *IVOperand) {
+  if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
+    EliminateIVComparison(ICmp, IVOperand);
+    return true;
+  }
+  if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
+    bool IsSigned = Rem->getOpcode() == Instruction::SRem;
+    if (IsSigned || Rem->getOpcode() == Instruction::URem) {
+      EliminateIVRemainder(Rem, IVOperand, IsSigned);
+      return true;
+    }
+  }
+
+  // Eliminate any operation that SCEV can prove is an identity function.
+  if (!SE->isSCEVable(UseInst->getType()) ||
+      (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand)))
+    return false;
+
+  UseInst->replaceAllUsesWith(IVOperand);
+
+  DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n');
+  ++NumElimIdentity;
+  Changed = true;
+  DeadInsts.push_back(UseInst);
+  return true;
+}
+
+/// pushIVUsers - Add all uses of Def to the current IV's worklist.
+///
+void IndVarSimplify::pushIVUsers(Instruction *Def) {
+
+  for (Value::use_iterator UI = Def->use_begin(), E = Def->use_end();
+       UI != E; ++UI) {
+    Instruction *User = cast<Instruction>(*UI);
+
+    // Avoid infinite or exponential worklist processing.
+    // Also ensure unique worklist users.
+    if (Simplified.insert(User))
+      SimpleIVUsers.push_back(std::make_pair(User, Def));
+  }
+}
+
+/// isSimpleIVUser - Return true if this instruction generates a simple SCEV
+/// expression in terms of that IV.
+///
+/// This is similar to IVUsers' isInsteresting() but processes each instruction
+/// non-recursively when the operand is already known to be a simpleIVUser.
+///
+bool IndVarSimplify::isSimpleIVUser(Instruction *I, const Loop *L) {
+  if (!SE->isSCEVable(I->getType()))
+    return false;
+
+  // Get the symbolic expression for this instruction.
+  const SCEV *S = SE->getSCEV(I);
+
+  // Only consider affine recurrences.
+  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
+  if (AR && AR->getLoop() == L)
+    return true;
+
+  return false;
+}
+
+/// SimplifyIVUsersNoRewrite - Iteratively perform simplification on a worklist
+/// of IV users. Each successive simplification may push more users which may
+/// themselves be candidates for simplification.
+///
+/// The "NoRewrite" algorithm does not require IVUsers analysis. Instead, it
+/// simplifies instructions in-place during analysis. Rather than rewriting
+/// induction variables bottom-up from their users, it transforms a chain of
+/// IVUsers top-down, updating the IR only when it encouters a clear
+/// optimization opportunitiy. A SCEVExpander "Rewriter" instance is still
+/// needed, but only used to generate a new IV (phi) of wider type for sign/zero
+/// extend elimination.
+///
+/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers.
+///
+void IndVarSimplify::SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter) {
+  // Simplification is performed independently for each IV, as represented by a
+  // loop header phi. Each round of simplification first iterates through the
+  // SimplifyIVUsers worklist, then determines whether the current IV should be
+  // widened. Widening adds a new phi to LoopPhis, inducing another round of
+  // simplification on the wide IV.
+  SmallVector<PHINode*, 8> LoopPhis;
+  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+    LoopPhis.push_back(cast<PHINode>(I));
+  }
+  while (!LoopPhis.empty()) {
+    CurrIV = LoopPhis.pop_back_val();
+    Simplified.clear();
+    assert(SimpleIVUsers.empty() && "expect empty IV users list");
+
+    WideIVInfo WI;
+
+    pushIVUsers(CurrIV);
+
+    while (!SimpleIVUsers.empty()) {
+      Instruction *UseInst, *Operand;
+      tie(UseInst, Operand) = SimpleIVUsers.pop_back_val();
+
+      if (EliminateIVUser(UseInst, Operand)) {
+        pushIVUsers(Operand);
+        continue;
+      }
+      if (CastInst *Cast = dyn_cast<CastInst>(UseInst)) {
+        bool IsSigned = Cast->getOpcode() == Instruction::SExt;
+        if (IsSigned || Cast->getOpcode() == Instruction::ZExt) {
+          CollectExtend(Cast, IsSigned, WI, SE, TD);
+        }
+        continue;
+      }
+      if (isSimpleIVUser(UseInst, L)) {
+        pushIVUsers(UseInst);
+      }
+    }
+    if (WI.WidestNativeType) {
+      WidenIV Widener(CurrIV, WI, LI, SE, DT, DeadInsts);
+      if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) {
+        Changed = true;
+        LoopPhis.push_back(WidePhi);
+      }
+    }
+  }
+}
+
 bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   // If LoopSimplify form is not available, stay out of trouble. Some notes:
   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
@@ -1010,12 +1138,15 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   if (!L->isLoopSimplifyForm())
     return false;
 
-  IU = &getAnalysis<IVUsers>();
+  if (!DisableIVRewrite)
+    IU = &getAnalysis<IVUsers>();
   LI = &getAnalysis<LoopInfo>();
   SE = &getAnalysis<ScalarEvolution>();
   DT = &getAnalysis<DominatorTree>();
   TD = getAnalysisIfAvailable<TargetData>();
 
+  CurrIV = NULL;
+  Simplified.clear();
   DeadInsts.clear();
   Changed = false;
 
@@ -1040,7 +1171,10 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
     RewriteLoopExitValues(L, Rewriter);
 
   // Eliminate redundant IV users.
-  SimplifyIVUsers(Rewriter);
+  if (DisableIVRewrite)
+    SimplifyIVUsersNoRewrite(L, Rewriter);
+  else
+    SimplifyIVUsers(Rewriter);
 
   // Compute the type of the largest recurrence expression, and decide whether
   // a canonical induction variable should be inserted.
@@ -1146,9 +1280,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
 
   // For completeness, inform IVUsers of the IV use in the newly-created
   // loop exit test instruction.
-  if (NewICmp)
-    IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0)),
-                              IndVar);
+  if (NewICmp && IU)
+    IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0)));
 
   // Clean up dead instructions.
   Changed |= DeleteDeadPHIs(L->getHeader());
@@ -1267,6 +1400,8 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
     // Patch the new value into place.
     if (Op->hasName())
       NewVal->takeName(Op);
+    if (Instruction *NewValI = dyn_cast<Instruction>(NewVal))
+      NewValI->setDebugLoc(User->getDebugLoc());
     User->replaceUsesOfWith(Op, NewVal);
     UI->setOperandValToReplace(NewVal);
 
@@ -1579,5 +1714,6 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
   }
 
   // Add a new IVUsers entry for the newly-created integer PHI.
-  IU->AddUsersIfInteresting(NewPHI, NewPHI);
+  if (IU)
+    IU->AddUsersIfInteresting(NewPHI);
 }
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 753a558..f7f3298 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -190,7 +190,9 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
   BasicBlock* exitingBlock = exitingBlocks[0];
   BasicBlock::iterator BI = exitBlock->begin();
   while (PHINode* P = dyn_cast<PHINode>(BI)) {
-    P->replaceUsesOfWith(exitingBlock, preheader);
+    int j = P->getBasicBlockIndex(exitingBlock);
+    assert(j >= 0 && "Can't find exiting block in exit block's phi node!");
+    P->setIncomingBlock(j, preheader);
     for (unsigned i = 1; i < exitingBlocks.size(); ++i)
       P->removeIncomingValue(exitingBlocks[i]);
     ++BI;
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 47dced3..9fd0958 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -220,7 +220,7 @@ bool LoopRotate::rotateLoop(Loop *L) {
   // For PHI nodes, the value available in OldPreHeader is just the
   // incoming value from OldPreHeader.
   for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
-    ValueMap[PN] = PN->getIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
+    ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader);
 
   // For the rest of the instructions, either hoist to the OrigPreheader if
   // possible or create a clone in the OldPreHeader if not.
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 73ebd61..afa0bf8 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -1804,8 +1804,7 @@ LSRInstance::OptimizeLoopTermCond() {
         ExitingBlock->getInstList().insert(TermBr, Cond);
 
         // Clone the IVUse, as the old use still exists!
-        CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace(),
-                              CondUse->getPhi());
+        CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());
         TermBr->replaceUsesOfWith(OldCond, Cond);
       }
     }
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index e05f29c..840c4b6 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -1021,6 +1021,10 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
         while (PHINode *PN = dyn_cast<PHINode>(Succ->begin()))
           ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM);
         
+        // If Succ has any successors with PHI nodes, update them to have
+        // entries coming from Pred instead of Succ.
+        Succ->replaceAllUsesWith(Pred);
+        
         // Move all of the successor contents from Succ to Pred.
         Pred->getInstList().splice(BI, Succ->getInstList(), Succ->begin(),
                                    Succ->end());
@@ -1028,10 +1032,6 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
         BI->eraseFromParent();
         RemoveFromWorklist(BI, Worklist);
         
-        // If Succ has any successors with PHI nodes, update them to have
-        // entries coming from Pred instead of Succ.
-        Succ->replaceAllUsesWith(Pred);
-        
         // Remove Succ from the loop tree.
         LI->removeBlock(Succ);
         LPM->deleteSimpleAnalysisValue(Succ, L);
diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp
index 6cd35e5..89a451e 100644
--- a/lib/Transforms/Scalar/ObjCARC.cpp
+++ b/lib/Transforms/Scalar/ObjCARC.cpp
@@ -33,6 +33,7 @@
 #include "llvm/Intrinsics.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Support/CallSite.h"
@@ -564,6 +565,29 @@ static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
   return 0;
 }
 
+/// ModuleHasARC - Test if the given module looks interesting to run ARC
+/// optimization on.
+static bool ModuleHasARC(const Module &M) {
+  return
+    M.getNamedValue("objc_retain") ||
+    M.getNamedValue("objc_release") ||
+    M.getNamedValue("objc_autorelease") ||
+    M.getNamedValue("objc_retainAutoreleasedReturnValue") ||
+    M.getNamedValue("objc_retainBlock") ||
+    M.getNamedValue("objc_autoreleaseReturnValue") ||
+    M.getNamedValue("objc_autoreleasePoolPush") ||
+    M.getNamedValue("objc_loadWeakRetained") ||
+    M.getNamedValue("objc_loadWeak") ||
+    M.getNamedValue("objc_destroyWeak") ||
+    M.getNamedValue("objc_storeWeak") ||
+    M.getNamedValue("objc_initWeak") ||
+    M.getNamedValue("objc_moveWeak") ||
+    M.getNamedValue("objc_copyWeak") ||
+    M.getNamedValue("objc_retainedObject") ||
+    M.getNamedValue("objc_unretainedObject") ||
+    M.getNamedValue("objc_unretainedPointer");
+}
+
 //===----------------------------------------------------------------------===//
 // ARC AliasAnalysis.
 //===----------------------------------------------------------------------===//
@@ -749,8 +773,12 @@ namespace {
   /// ObjCARCExpand - Early ARC transformations.
   class ObjCARCExpand : public FunctionPass {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual bool doInitialization(Module &M);
     virtual bool runOnFunction(Function &F);
 
+    /// Run - A flag indicating whether this optimization pass should run.
+    bool Run;
+
   public:
     static char ID;
     ObjCARCExpand() : FunctionPass(ID) {
@@ -771,10 +799,19 @@ void ObjCARCExpand::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
 }
 
+bool ObjCARCExpand::doInitialization(Module &M) {
+  Run = ModuleHasARC(M);
+  return false;
+}
+
 bool ObjCARCExpand::runOnFunction(Function &F) {
   if (!EnableARCOpts)
     return false;
 
+  // If nothing in the Module uses ARC, don't do anything.
+  if (!Run)
+    return false;
+
   bool Changed = false;
 
   for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
@@ -840,8 +877,9 @@ bool ObjCARCExpand::runOnFunction(Function &F) {
 // usually can't sink them past other calls, which would be the main
 // case where it would be useful.
 
+/// TODO: The pointer returned from objc_loadWeakRetained is retained.
+
 #include "llvm/GlobalAlias.h"
-#include "llvm/Module.h"
 #include "llvm/Constants.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -1365,10 +1403,12 @@ namespace {
     bool Changed;
     ProvenanceAnalysis PA;
 
+    /// Run - A flag indicating whether this optimization pass should run.
+    bool Run;
+
     /// RetainFunc, RelaseFunc - Declarations for objc_retain,
     /// objc_retainBlock, and objc_release.
-    Function *RetainFunc, *RetainBlockFunc, *RetainRVFunc, *ReleaseFunc,
-             *AutoreleaseFunc;
+    Function *RetainFunc, *RetainBlockFunc, *RetainRVFunc, *ReleaseFunc;
 
     /// RetainRVCallee, etc. - Declarations for ObjC runtime
     /// functions, for use in creating calls to them. These are initialized
@@ -3069,6 +3109,10 @@ bool ObjCARCOpt::doInitialization(Module &M) {
   if (!EnableARCOpts)
     return false;
 
+  Run = ModuleHasARC(M);
+  if (!Run)
+    return false;
+
   // Identify the imprecise release metadata kind.
   ImpreciseReleaseMDKind =
     M.getContext().getMDKindID("clang.imprecise_release");
@@ -3078,7 +3122,6 @@ bool ObjCARCOpt::doInitialization(Module &M) {
   RetainBlockFunc = M.getFunction("objc_retainBlock");
   RetainRVFunc = M.getFunction("objc_retainAutoreleasedReturnValue");
   ReleaseFunc = M.getFunction("objc_release");
-  AutoreleaseFunc = M.getFunction("objc_autorelease");
 
   // Intuitively, objc_retain and others are nocapture, however in practice
   // they are not, because they return their argument value. And objc_release
@@ -3098,6 +3141,10 @@ bool ObjCARCOpt::runOnFunction(Function &F) {
   if (!EnableARCOpts)
     return false;
 
+  // If nothing in the Module uses ARC, don't do anything.
+  if (!Run)
+    return false;
+
   Changed = false;
 
   PA.setAA(&getAnalysis<AliasAnalysis>());
@@ -3162,6 +3209,9 @@ namespace {
     DominatorTree *DT;
     ProvenanceAnalysis PA;
 
+    /// Run - A flag indicating whether this optimization pass should run.
+    bool Run;
+
     /// StoreStrongCallee, etc. - Declarations for ObjC runtime
     /// functions, for use in creating calls to them. These are initialized
     /// lazily to avoid cluttering up the Module with unused declarations.
@@ -3384,6 +3434,10 @@ void ObjCARCContract::ContractRelease(Instruction *Release,
 }
 
 bool ObjCARCContract::doInitialization(Module &M) {
+  Run = ModuleHasARC(M);
+  if (!Run)
+    return false;
+
   // These are initialized lazily.
   StoreStrongCallee = 0;
   RetainAutoreleaseCallee = 0;
@@ -3407,6 +3461,10 @@ bool ObjCARCContract::runOnFunction(Function &F) {
   if (!EnableARCOpts)
     return false;
 
+  // If nothing in the Module uses ARC, don't do anything.
+  if (!Run)
+    return false;
+
   Changed = false;
   AA = &getAnalysis<AliasAnalysis>();
   DT = &getAnalysis<DominatorTree>();
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index beef127..46ac948 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -293,6 +293,11 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
   if (ScalarKind == Unknown)
     ScalarKind = Integer;
 
+  // FIXME: It should be possible to promote the vector type up to the alloca's
+  // size.
+  if (ScalarKind == Vector && VectorTy->getBitWidth() != AllocaSize * 8)
+    ScalarKind = Integer;
+
   // If we were able to find a vector type that can handle this with
   // insert/extract elements, and if there was at least one use that had
   // a vector type, promote this to a vector.  We don't want to promote
@@ -384,7 +389,6 @@ void ConvertToScalarInfo::MergeInTypeForLoadOrStore(const Type *In,
   // Otherwise, we have a case that we can't handle with an optimized vector
   // form.  We can still turn this into a large integer.
   ScalarKind = Integer;
-  VectorTy = 0;
 }
 
 /// MergeInVectorType - Handles the vector case of MergeInTypeForLoadOrStore,
@@ -522,10 +526,22 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
     // If this is a constant sized memset of a constant value (e.g. 0) we can
     // handle it.
     if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
-      // Store of constant value and constant size.
-      if (!isa<ConstantInt>(MSI->getValue()) ||
-          !isa<ConstantInt>(MSI->getLength()))
+      // Store of constant value.
+      if (!isa<ConstantInt>(MSI->getValue()))
+        return false;
+
+      // Store of constant size.
+      ConstantInt *Len = dyn_cast<ConstantInt>(MSI->getLength());
+      if (!Len)
         return false;
+
+      // If the size differs from the alloca, we can only convert the alloca to
+      // an integer bag-of-bits.
+      // FIXME: This should handle all of the cases that are currently accepted
+      // as vector element insertions.
+      if (Len->getZExtValue() != AllocaSize || Offset != 0)
+        ScalarKind = Integer;
+
       IsNotTrivial = true;  // Can't be mem2reg'd.
       HadNonMemTransferAccess = true;
       continue;
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 92464e8..b4f74f9 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -153,13 +153,13 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
   // Delete the unconditional branch from the predecessor...
   PredBB->getInstList().pop_back();
   
-  // Move all definitions in the successor to the predecessor...
-  PredBB->getInstList().splice(PredBB->end(), BB->getInstList());
-  
   // Make all PHI nodes that referred to BB now refer to Pred as their
   // source...
   BB->replaceAllUsesWith(PredBB);
   
+  // Move all definitions in the successor to the predecessor...
+  PredBB->getInstList().splice(PredBB->end(), BB->getInstList());
+  
   // Inherit predecessors name if it exists.
   if (!PredBB->hasName())
     PredBB->takeName(BB);
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index d6206a3..92ce500 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -193,44 +193,22 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
   
   // If there are any PHI nodes in DestBB, we need to update them so that they
   // merge incoming values from NewBB instead of from TIBB.
-  if (PHINode *APHI = dyn_cast<PHINode>(DestBB->begin())) {
-    // This conceptually does:
-    //  foreach (PHINode *PN in DestBB)
-    //    PN->setIncomingBlock(PN->getIncomingBlock(TIBB), NewBB);
-    // but is optimized for two cases.
-    
-    if (APHI->getNumIncomingValues() <= 8) {  // Small # preds case.
-      unsigned BBIdx = 0;
-      for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
-        // We no longer enter through TIBB, now we come in through NewBB.
-        // Revector exactly one entry in the PHI node that used to come from
-        // TIBB to come from NewBB.
-        PHINode *PN = cast<PHINode>(I);
-        
-        // Reuse the previous value of BBIdx if it lines up.  In cases where we
-        // have multiple phi nodes with *lots* of predecessors, this is a speed
-        // win because we don't have to scan the PHI looking for TIBB.  This
-        // happens because the BB list of PHI nodes are usually in the same
-        // order.
-        if (PN->getIncomingBlock(BBIdx) != TIBB)
-          BBIdx = PN->getBasicBlockIndex(TIBB);
-        PN->setIncomingBlock(BBIdx, NewBB);
-      }
-    } else {
-      // However, the foreach loop is slow for blocks with lots of predecessors
-      // because PHINode::getIncomingBlock is O(n) in # preds.  Instead, walk
-      // the user list of TIBB to find the PHI nodes.
-      SmallPtrSet<PHINode*, 16> UpdatedPHIs;
-    
-      for (Value::use_iterator UI = TIBB->use_begin(), E = TIBB->use_end();
-           UI != E; ) {
-        Value::use_iterator Use = UI++;
-        if (PHINode *PN = dyn_cast<PHINode>(*Use)) {
-          // Remove one entry from each PHI.
-          if (PN->getParent() == DestBB && UpdatedPHIs.insert(PN))
-            PN->setOperand(Use.getOperandNo(), NewBB);
-        }
-      }
+  {
+    unsigned BBIdx = 0;
+    for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
+      // We no longer enter through TIBB, now we come in through NewBB.
+      // Revector exactly one entry in the PHI node that used to come from
+      // TIBB to come from NewBB.
+      PHINode *PN = cast<PHINode>(I);
+
+      // Reuse the previous value of BBIdx if it lines up.  In cases where we
+      // have multiple phi nodes with *lots* of predecessors, this is a speed
+      // win because we don't have to scan the PHI looking for TIBB.  This
+      // happens because the BB list of PHI nodes are usually in the same
+      // order.
+      if (PN->getIncomingBlock(BBIdx) != TIBB)
+	BBIdx = PN->getBasicBlockIndex(TIBB);
+      PN->setIncomingBlock(BBIdx, NewBB);
     }
   }
    
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index d967ceb..561b69d 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -572,12 +572,12 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
     // removed, so we just need to splice the blocks.
     BI->eraseFromParent();
     
-    // Move all the instructions in the succ to the pred.
-    I->getInstList().splice(I->end(), Dest->getInstList());
-    
     // Make all PHI nodes that referred to Dest now refer to I as their source.
     Dest->replaceAllUsesWith(I);
 
+    // Move all the instructions in the succ to the pred.
+    I->getInstList().splice(I->end(), Dest->getInstList());
+    
     // Remove the dest block.
     Dest->eraseFromParent();
     
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 946e62f..18ecd61 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -1097,15 +1097,15 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
         TheCall->replaceAllUsesWith(Returns[0]->getReturnValue());
     }
 
+    // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
+    BasicBlock *ReturnBB = Returns[0]->getParent();
+    ReturnBB->replaceAllUsesWith(AfterCallBB);
+
     // Splice the code from the return block into the block that it will return
     // to, which contains the code that was after the call.
-    BasicBlock *ReturnBB = Returns[0]->getParent();
     AfterCallBB->getInstList().splice(AfterCallBB->begin(),
                                       ReturnBB->getInstList());
 
-    // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
-    ReturnBB->replaceAllUsesWith(AfterCallBB);
-
     // Delete the return instruction now and empty ReturnBB now.
     Returns[0]->eraseFromParent();
     ReturnBB->eraseFromParent();
@@ -1125,8 +1125,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
 
   // Splice the code entry block into calling block, right before the
   // unconditional branch.
-  OrigBB->getInstList().splice(Br, CalleeEntry->getInstList());
   CalleeEntry->replaceAllUsesWith(OrigBB);  // Update PHI nodes
+  OrigBB->getInstList().splice(Br, CalleeEntry->getInstList());
 
   // Remove the unconditional branch.
   OrigBB->getInstList().erase(Br);
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 19c3c72..506e5e8 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -427,10 +427,6 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
   BasicBlock *PredBB = DestBB->getSinglePredecessor();
   assert(PredBB && "Block doesn't have a single predecessor!");
   
-  // Splice all the instructions from PredBB to DestBB.
-  PredBB->getTerminator()->eraseFromParent();
-  DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList());
-
   // Zap anything that took the address of DestBB.  Not doing this will give the
   // address an invalid value.
   if (DestBB->hasAddressTaken()) {
@@ -445,6 +441,10 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
   // Anything that branched to PredBB now branches to DestBB.
   PredBB->replaceAllUsesWith(DestBB);
   
+  // Splice all the instructions from PredBB to DestBB.
+  PredBB->getTerminator()->eraseFromParent();
+  DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList());
+
   if (P) {
     DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
     if (DT) {
@@ -660,12 +660,17 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
     // them, which helps expose duplicates, but we have to check all the
     // operands to be safe in case instcombine hasn't run.
     uintptr_t Hash = 0;
+    // This hash algorithm is quite weak as hash functions go, but it seems
+    // to do a good enough job for this particular purpose, and is very quick.
     for (User::op_iterator I = PN->op_begin(), E = PN->op_end(); I != E; ++I) {
-      // This hash algorithm is quite weak as hash functions go, but it seems
-      // to do a good enough job for this particular purpose, and is very quick.
       Hash ^= reinterpret_cast<uintptr_t>(static_cast<Value *>(*I));
       Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7));
     }
+    for (PHINode::block_iterator I = PN->block_begin(), E = PN->block_end();
+         I != E; ++I) {
+      Hash ^= reinterpret_cast<uintptr_t>(static_cast<BasicBlock *>(*I));
+      Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7));
+    }
     // Avoid colliding with the DenseMap sentinels ~0 and ~0-1.
     Hash >>= 1;
     // If we've never seen this hash value before, it's a unique PHI.
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index f02ffd2..e79fb5a 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -375,6 +375,7 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
     SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(),
                            ".preheader", this);
 
+  NewBB->getTerminator()->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc());
   DEBUG(dbgs() << "LoopSimplify: Creating pre-header " << NewBB->getName()
                << "\n");
 
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 7da7271..6772511 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -47,6 +47,14 @@ static inline void RemapInstruction(Instruction *I,
     if (It != VMap.end())
       I->setOperand(op, It->second);
   }
+
+  if (PHINode *PN = dyn_cast<PHINode>(I)) {
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      ValueToValueMapTy::iterator It = VMap.find(PN->getIncomingBlock(i));
+      if (It != VMap.end())
+        PN->setIncomingBlock(i, cast<BasicBlock>(It->second));
+    }
+  }
 }
 
 /// FoldBlockIntoPredecessor - Folds a basic block into its predecessor if it
@@ -75,13 +83,13 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) {
   // Delete the unconditional branch from the predecessor...
   OnlyPred->getInstList().pop_back();
 
-  // Move all definitions in the successor to the predecessor...
-  OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList());
-
   // Make all PHI nodes that referred to BB now refer to Pred as their
   // source...
   BB->replaceAllUsesWith(OnlyPred);
 
+  // Move all definitions in the successor to the predecessor...
+  OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList());
+
   std::string OldName = BB->getName();
 
   // Erase basic block from the function...
@@ -247,16 +255,14 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count,
       // the successor of the latch block.  The successor of the exit block will
       // be updated specially after unrolling all the way.
       if (*BB != LatchBlock)
-        for (Value::use_iterator UI = (*BB)->use_begin(), UE = (*BB)->use_end();
-             UI != UE;) {
-          Instruction *UseInst = cast<Instruction>(*UI);
-          ++UI;
-          if (isa<PHINode>(UseInst) && !L->contains(UseInst)) {
-            PHINode *phi = cast<PHINode>(UseInst);
-            Value *Incoming = phi->getIncomingValueForBlock(*BB);
-            phi->addIncoming(Incoming, New);
-          }
-        }
+        for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB); SI != SE;
+             ++SI)
+          if (!L->contains(*SI))
+            for (BasicBlock::iterator BBI = (*SI)->begin();
+                 PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) {
+              Value *Incoming = phi->getIncomingValueForBlock(*BB);
+              phi->addIncoming(Incoming, New);
+            }
 
       // Keep track of new headers and latches as we create them, so that
       // we can insert the proper branches later.
@@ -288,24 +294,20 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count,
   // successor blocks, update them to use the appropriate values computed as the
   // last iteration of the loop.
   if (Count != 1) {
-    SmallPtrSet<PHINode*, 8> Users;
-    for (Value::use_iterator UI = LatchBlock->use_begin(),
-         UE = LatchBlock->use_end(); UI != UE; ++UI)
-      if (PHINode *phi = dyn_cast<PHINode>(*UI))
-        Users.insert(phi);
-    
     BasicBlock *LastIterationBB = cast<BasicBlock>(LastValueMap[LatchBlock]);
-    for (SmallPtrSet<PHINode*,8>::iterator SI = Users.begin(), SE = Users.end();
+    for (succ_iterator SI = succ_begin(LatchBlock), SE = succ_end(LatchBlock);
          SI != SE; ++SI) {
-      PHINode *PN = *SI;
-      Value *InVal = PN->removeIncomingValue(LatchBlock, false);
-      // If this value was defined in the loop, take the value defined by the
-      // last iteration of the loop.
-      if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
-        if (L->contains(InValI))
-          InVal = LastValueMap[InVal];
+      for (BasicBlock::iterator BBI = (*SI)->begin();
+           PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) {
+        Value *InVal = PN->removeIncomingValue(LatchBlock, false);
+        // If this value was defined in the loop, take the value defined by the
+        // last iteration of the loop.
+        if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
+          if (L->contains(InValI))
+            InVal = LastValueMap[InVal];
+        }
+        PN->addIncoming(InVal, LastIterationBB);
       }
-      PN->addIncoming(InVal, LastIterationBB);
     }
   }
 
@@ -352,11 +354,16 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count,
       // Replace the conditional branch with an unconditional one.
       BranchInst::Create(Dest, Term);
       Term->eraseFromParent();
-      // Merge adjacent basic blocks, if possible.
-      if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI)) {
+    }
+  }
+
+  // Merge adjacent basic blocks, if possible.
+  for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
+    BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
+    if (Term->isUnconditional()) {
+      BasicBlock *Dest = Term->getSuccessor(0);
+      if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI))
         std::replace(Latches.begin(), Latches.end(), Dest, Fold);
-        std::replace(Headers.begin(), Headers.end(), Dest, Fold);
-      }
     }
   }
   
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index a1736b9..32d1dcc 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -73,6 +73,22 @@ struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > {
 };
 }
 
+/// onlyUsedByLifetimeMarkers - Return true if the only users of this pointer
+/// are lifetime markers.
+///
+static bool onlyUsedByLifetimeMarkers(const Value *V) {
+  for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
+       UI != UE; ++UI) {
+    const IntrinsicInst *II = dyn_cast<IntrinsicInst>(*UI);
+    if (!II) return false;
+
+    if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+        II->getIntrinsicID() != Intrinsic::lifetime_end)
+      return false;
+  }
+  return true;
+}
+
 /// isAllocaPromotable - Return true if this alloca is legal for promotion.
 /// This is true if there are only loads and stores to the alloca.
 ///
@@ -92,6 +108,22 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
         return false;   // Don't allow a store OF the AI, only INTO the AI.
       if (SI->isVolatile())
         return false;
+    } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
+      if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+          II->getIntrinsicID() != Intrinsic::lifetime_end)
+        return false;
+    } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+      if (BCI->getType() != Type::getInt8PtrTy(U->getContext()))
+        return false;
+      if (!onlyUsedByLifetimeMarkers(BCI))
+        return false;
+    } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+      if (GEPI->getType() != Type::getInt8PtrTy(U->getContext()))
+        return false;
+      if (!GEPI->hasAllZeroIndices())
+        return false;
+      if (!onlyUsedByLifetimeMarkers(GEPI))
+        return false;
     } else {
       return false;
     }
@@ -335,6 +367,31 @@ namespace {
   };
 }  // end of anonymous namespace
 
+static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
+  // Knowing that this alloca is promotable, we know that it's safe to kill all
+  // instructions except for load and store.
+
+  for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
+       UI != UE;) {
+    Instruction *I = cast<Instruction>(*UI);
+    ++UI;
+    if (isa<LoadInst>(I) || isa<StoreInst>(I))
+      continue;
+
+    if (!I->getType()->isVoidTy()) {
+      // The only users of this bitcast/GEP instruction are lifetime intrinsics.
+      // Follow the use/def chain to erase them now instead of leaving it for
+      // dead code elimination later.
+      for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+           UI != UE;) {
+        Instruction *Inst = cast<Instruction>(*UI);
+        ++UI;
+        Inst->eraseFromParent();
+      }
+    }
+    I->eraseFromParent();
+  }
+}
 
 void PromoteMem2Reg::run() {
   Function &F = *DT.getRoot()->getParent();
@@ -353,6 +410,8 @@ void PromoteMem2Reg::run() {
     assert(AI->getParent()->getParent() == &F &&
            "All allocas should be in the same function, which is same as DF!");
 
+    removeLifetimeIntrinsicUsers(AI);
+
     if (AI->use_empty()) {
       // If there are no uses of the alloca, just delete it now.
       if (AST) AST->deleteValue(AI);
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 6df846c..7b93b4a 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -2450,6 +2450,77 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI) {
   return !DeadCases.empty();
 }
 
+/// FindPHIForConditionForwarding - If BB would be eligible for simplification
+/// by TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
+/// by an unconditional branch), look at the phi node for BB in the successor
+/// block and see if the incoming value is equal to CaseValue. If so, return
+/// the phi node, and set PhiIndex to BB's index in the phi node.
+static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue,
+                                              BasicBlock *BB,
+                                              int *PhiIndex) {
+  if (BB->getFirstNonPHIOrDbg() != BB->getTerminator())
+    return NULL; // BB must be empty to be a candidate for simplification.
+  if (!BB->getSinglePredecessor())
+    return NULL; // BB must be dominated by the switch.
+
+  BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
+  if (!Branch || !Branch->isUnconditional())
+    return NULL; // Terminator must be unconditional branch.
+
+  BasicBlock *Succ = Branch->getSuccessor(0);
+
+  BasicBlock::iterator I = Succ->begin();
+  while (PHINode *PHI = dyn_cast<PHINode>(I++)) {
+    int Idx = PHI->getBasicBlockIndex(BB);
+    assert(Idx >= 0 && "PHI has no entry for predecessor?");
+
+    Value *InValue = PHI->getIncomingValue(Idx);
+    if (InValue != CaseValue) continue;
+
+    *PhiIndex = Idx;
+    return PHI;
+  }
+
+  return NULL;
+}
+
+/// ForwardSwitchConditionToPHI - Try to forward the condition of a switch
+/// instruction to a phi node dominated by the switch, if that would mean that
+/// some of the destination blocks of the switch can be folded away.
+/// Returns true if a change is made.
+static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
+  typedef DenseMap<PHINode*, SmallVector<int,4> > ForwardingNodesMap;
+  ForwardingNodesMap ForwardingNodes;
+
+  for (unsigned I = 1; I < SI->getNumCases(); ++I) { // 0 is the default case.
+    ConstantInt *CaseValue = SI->getCaseValue(I);
+    BasicBlock *CaseDest = SI->getSuccessor(I);
+
+    int PhiIndex;
+    PHINode *PHI = FindPHIForConditionForwarding(CaseValue, CaseDest,
+                                                 &PhiIndex);
+    if (!PHI) continue;
+
+    ForwardingNodes[PHI].push_back(PhiIndex);
+  }
+
+  bool Changed = false;
+
+  for (ForwardingNodesMap::iterator I = ForwardingNodes.begin(),
+       E = ForwardingNodes.end(); I != E; ++I) {
+    PHINode *Phi = I->first;
+    SmallVector<int,4> &Indexes = I->second;
+
+    if (Indexes.size() < 2) continue;
+
+    for (size_t I = 0, E = Indexes.size(); I != E; ++I)
+      Phi->setIncomingValue(Indexes[I], SI->getCondition());
+    Changed = true;
+  }
+
+  return Changed;
+}
+
 bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
   // If this switch is too complex to want to look at, ignore it.
   if (!isValueEqualityComparison(SI))
@@ -2486,6 +2557,9 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
   if (EliminateDeadSwitchCases(SI))
     return SimplifyCFG(BB) | true;
 
+  if (ForwardSwitchConditionToPHI(SI))
+    return SimplifyCFG(BB) | true;
+
   return false;
 }
 
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index a73bf04..de6cbdc 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Type.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
+#include "llvm/Instructions.h"
 #include "llvm/Metadata.h"
 #include "llvm/ADT/SmallVector.h"
 using namespace llvm;
@@ -128,6 +129,19 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
              "Referenced value not in value map!");
   }
 
+  // Remap phi nodes' incoming blocks.
+  if (PHINode *PN = dyn_cast<PHINode>(I)) {
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      Value *V = MapValue(PN->getIncomingBlock(i), VMap, Flags);
+      // If we aren't ignoring missing entries, assert that something happened.
+      if (V != 0)
+        PN->setIncomingBlock(i, cast<BasicBlock>(V));
+      else
+        assert((Flags & RF_IgnoreMissingEntries) &&
+               "Referenced block not in value map!");
+    }
+  }
+
   // Remap attached metadata.
   SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
   I->getAllMetadata(MDs);
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index cfcffeb..496f500 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -32,7 +32,6 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -42,11 +41,6 @@
 #include <cctype>
 using namespace llvm;
 
-static cl::opt<bool>
-EnableDebugInfoComment("enable-debug-info-comment", cl::Hidden,
-                       cl::desc("Enable debug info comments"));
-
-
 // Make virtual table appear in this compilation unit.
 AssemblyAnnotationWriter::~AssemblyAnnotationWriter() {}
 
@@ -140,30 +134,45 @@ static void PrintLLVMName(raw_ostream &OS, const Value *V) {
 // TypePrinting Class: Type printing machinery
 //===----------------------------------------------------------------------===//
 
-static DenseMap<const Type *, std::string> &getTypeNamesMap(void *M) {
-  return *static_cast<DenseMap<const Type *, std::string>*>(M);
-}
-
-void TypePrinting::clear() {
-  getTypeNamesMap(TypeNames).clear();
-}
-
-bool TypePrinting::hasTypeName(const Type *Ty) const {
-  return getTypeNamesMap(TypeNames).count(Ty);
-}
-
-void TypePrinting::addTypeName(const Type *Ty, const std::string &N) {
-  getTypeNamesMap(TypeNames).insert(std::make_pair(Ty, N));
-}
-
-
-TypePrinting::TypePrinting() {
-  TypeNames = new DenseMap<const Type *, std::string>();
-}
+/// TypePrinting - Type printing machinery.
+namespace {
+class TypePrinting {
+  DenseMap<const Type *, std::string> TypeNames;
+  TypePrinting(const TypePrinting &);   // DO NOT IMPLEMENT
+  void operator=(const TypePrinting&);  // DO NOT IMPLEMENT
+public:
+  TypePrinting() {}
+  ~TypePrinting() {}
+  
+  void clear() {
+    TypeNames.clear();
+  }
+  
+  void print(const Type *Ty, raw_ostream &OS, bool IgnoreTopLevelName = false);
+  
+  void printAtLeastOneLevel(const Type *Ty, raw_ostream &OS) {
+    print(Ty, OS, true);
+  }
+  
+  /// hasTypeName - Return true if the type has a name in TypeNames, false
+  /// otherwise.
+  bool hasTypeName(const Type *Ty) const {
+    return TypeNames.count(Ty);
+  }
 
-TypePrinting::~TypePrinting() {
-  delete &getTypeNamesMap(TypeNames);
-}
+  
+  /// addTypeName - Add a name for the specified type if it doesn't already have
+  /// one.  This name will be printed instead of the structural version of the
+  /// type in order to make the output more concise.
+  void addTypeName(const Type *Ty, const std::string &N) {
+    TypeNames.insert(std::make_pair(Ty, N));
+  }
+  
+private:
+  void CalcTypeName(const Type *Ty, SmallVectorImpl<const Type *> &TypeStack,
+                    raw_ostream &OS, bool IgnoreTopLevelName = false);
+};
+} // end anonymous namespace.
 
 /// CalcTypeName - Write the specified type to the specified raw_ostream, making
 /// use of type names or up references to shorten the type name where possible.
@@ -172,7 +181,7 @@ void TypePrinting::CalcTypeName(const Type *Ty,
                                 raw_ostream &OS, bool IgnoreTopLevelName) {
   // Check to see if the type is named.
   if (!IgnoreTopLevelName) {
-    DenseMap<const Type *, std::string> &TM = getTypeNamesMap(TypeNames);
+    DenseMap<const Type *, std::string> &TM = TypeNames;
     DenseMap<const Type *, std::string>::iterator I = TM.find(Ty);
     if (I != TM.end()) {
       OS << I->second;
@@ -283,10 +292,9 @@ void TypePrinting::CalcTypeName(const Type *Ty,
 void TypePrinting::print(const Type *Ty, raw_ostream &OS,
                          bool IgnoreTopLevelName) {
   // Check to see if the type is named.
-  DenseMap<const Type*, std::string> &TM = getTypeNamesMap(TypeNames);
   if (!IgnoreTopLevelName) {
-    DenseMap<const Type*, std::string>::iterator I = TM.find(Ty);
-    if (I != TM.end()) {
+    DenseMap<const Type*, std::string>::iterator I = TypeNames.find(Ty);
+    if (I != TypeNames.end()) {
       OS << I->second;
       return;
     }
@@ -304,7 +312,7 @@ void TypePrinting::print(const Type *Ty, raw_ostream &OS,
 
   // Cache type name for later use.
   if (!IgnoreTopLevelName)
-    TM.insert(std::make_pair(Ty, TypeOS.str()));
+    TypeNames.insert(std::make_pair(Ty, TypeOS.str()));
 }
 
 namespace {
@@ -1753,18 +1761,6 @@ void AssemblyWriter::printBasicBlock(const BasicBlock *BB) {
   if (AnnotationWriter) AnnotationWriter->emitBasicBlockEndAnnot(BB, Out);
 }
 
-/// printDebugLoc - Print DebugLoc.
-static void printDebugLoc(const DebugLoc &DL, formatted_raw_ostream &OS) {
-  OS << DL.getLine() << ":" << DL.getCol();
-  if (MDNode *N = DL.getInlinedAt(getGlobalContext())) {
-    DebugLoc IDL = DebugLoc::getFromDILocation(N);
-    if (!IDL.isUnknown()) {
-      OS << "@";
-      printDebugLoc(IDL,OS);
-    }
-  }
-}
-
 /// printInfoComment - Print a little comment after the instruction indicating
 /// which slot it occupies.
 ///
@@ -1772,43 +1768,6 @@ void AssemblyWriter::printInfoComment(const Value &V) {
   if (AnnotationWriter) {
     AnnotationWriter->printInfoComment(V, Out);
     return;
-  } else if (EnableDebugInfoComment) {
-    bool Padded = false;
-    if (const Instruction *I = dyn_cast<Instruction>(&V)) {
-      const DebugLoc &DL = I->getDebugLoc();
-      if (!DL.isUnknown()) {
-        if (!Padded) {
-          Out.PadToColumn(50);
-          Padded = true;
-          Out << ";";
-        }
-        Out << " [debug line = ";
-        printDebugLoc(DL,Out);
-        Out << "]";
-      }
-      if (const DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(I)) {
-        const MDNode *Var = DDI->getVariable();
-        if (!Padded) {
-          Out.PadToColumn(50);
-          Padded = true;
-          Out << ";";
-        }
-        if (Var && Var->getNumOperands() >= 2)
-          if (MDString *MDS = dyn_cast_or_null<MDString>(Var->getOperand(2)))
-            Out << " [debug variable = " << MDS->getString() << "]";
-      }
-      else if (const DbgValueInst *DVI = dyn_cast<DbgValueInst>(I)) {
-        const MDNode *Var = DVI->getVariable();
-        if (!Padded) {
-          Out.PadToColumn(50);
-          Padded = true;
-          Out << ";";
-        }
-        if (Var && Var->getNumOperands() >= 2)
-          if (MDString *MDS = dyn_cast_or_null<MDString>(Var->getOperand(2)))
-            Out << " [debug variable = " << MDS->getString() << "]";
-      }
-    }
   }
 }
 
@@ -1891,16 +1850,16 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
       writeOperand(I.getOperand(i), true);
     }
     Out << ']';
-  } else if (isa<PHINode>(I)) {
+  } else if (const PHINode *PN = dyn_cast<PHINode>(&I)) {
     Out << ' ';
     TypePrinter.print(I.getType(), Out);
     Out << ' ';
 
-    for (unsigned op = 0, Eop = I.getNumOperands(); op < Eop; op += 2) {
+    for (unsigned op = 0, Eop = PN->getNumIncomingValues(); op < Eop; ++op) {
       if (op) Out << ", ";
       Out << "[ ";
-      writeOperand(I.getOperand(op  ), false); Out << ", ";
-      writeOperand(I.getOperand(op+1), false); Out << " ]";
+      writeOperand(PN->getIncomingValue(op), false); Out << ", ";
+      writeOperand(PN->getIncomingBlock(op), false); Out << " ]";
     }
   } else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(&I)) {
     Out << ' ';
diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp
index 9d4543d..cb7d2ac 100644
--- a/lib/VMCore/AutoUpgrade.cpp
+++ b/lib/VMCore/AutoUpgrade.cpp
@@ -28,266 +28,22 @@ using namespace llvm;
 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
   assert(F && "Illegal to upgrade a non-existent Function.");
 
-  // Get the Function's name.
-  const std::string& Name = F->getName();
-
-  // Convenience
-  const FunctionType *FTy = F->getFunctionType();
-
   // Quickly eliminate it, if it's not a candidate.
-  if (Name.length() <= 8 || Name[0] != 'l' || Name[1] != 'l' || 
-      Name[2] != 'v' || Name[3] != 'm' || Name[4] != '.')
+  StringRef Name = F->getName();
+  if (Name.size() <= 8 || !Name.startswith("llvm."))
     return false;
+  Name = Name.substr(5); // Strip off "llvm."
 
+  const FunctionType *FTy = F->getFunctionType();
   Module *M = F->getParent();
-  switch (Name[5]) {
+  
+  switch (Name[0]) {
   default: break;
-  case 'a':
-    // This upgrades the llvm.atomic.lcs, llvm.atomic.las, llvm.atomic.lss,
-    // and atomics with default address spaces to their new names to their new
-    // function name (e.g. llvm.atomic.add.i32 => llvm.atomic.add.i32.p0i32)
-    if (Name.compare(5,7,"atomic.",7) == 0) {
-      if (Name.compare(12,3,"lcs",3) == 0) {
-        std::string::size_type delim = Name.find('.',12);
-        F->setName("llvm.atomic.cmp.swap" + Name.substr(delim) +
-                   ".p0" + Name.substr(delim+1));
-        NewFn = F;
-        return true;
-      }
-      else if (Name.compare(12,3,"las",3) == 0) {
-        std::string::size_type delim = Name.find('.',12);
-        F->setName("llvm.atomic.load.add"+Name.substr(delim)
-                   + ".p0" + Name.substr(delim+1));
-        NewFn = F;
-        return true;
-      }
-      else if (Name.compare(12,3,"lss",3) == 0) {
-        std::string::size_type delim = Name.find('.',12);
-        F->setName("llvm.atomic.load.sub"+Name.substr(delim)
-                   + ".p0" + Name.substr(delim+1));
-        NewFn = F;
-        return true;
-      }
-      else if (Name.rfind(".p") == std::string::npos) {
-        // We don't have an address space qualifier so this has be upgraded
-        // to the new name.  Copy the type name at the end of the intrinsic
-        // and add to it
-        std::string::size_type delim = Name.find_last_of('.');
-        assert(delim != std::string::npos && "can not find type");
-        F->setName(Name + ".p0" + Name.substr(delim+1));
-        NewFn = F;
-        return true;
-      }
-    } else if (Name.compare(5, 9, "arm.neon.", 9) == 0) {
-      if (((Name.compare(14, 5, "vmovl", 5) == 0 ||
-            Name.compare(14, 5, "vaddl", 5) == 0 ||
-            Name.compare(14, 5, "vsubl", 5) == 0 ||
-            Name.compare(14, 5, "vaddw", 5) == 0 ||
-            Name.compare(14, 5, "vsubw", 5) == 0 ||
-            Name.compare(14, 5, "vmlal", 5) == 0 ||
-            Name.compare(14, 5, "vmlsl", 5) == 0 ||
-            Name.compare(14, 5, "vabdl", 5) == 0 ||
-            Name.compare(14, 5, "vabal", 5) == 0) &&
-           (Name.compare(19, 2, "s.", 2) == 0 ||
-            Name.compare(19, 2, "u.", 2) == 0)) ||
-
-          (Name.compare(14, 4, "vaba", 4) == 0 &&
-           (Name.compare(18, 2, "s.", 2) == 0 ||
-            Name.compare(18, 2, "u.", 2) == 0)) ||
-
-          (Name.compare(14, 6, "vmovn.", 6) == 0)) {
-
-        // Calls to these are transformed into IR without intrinsics.
-        NewFn = 0;
-        return true;
-      }
-      // Old versions of NEON ld/st intrinsics are missing alignment arguments.
-      bool isVLd = (Name.compare(14, 3, "vld", 3) == 0);
-      bool isVSt = (Name.compare(14, 3, "vst", 3) == 0);
-      if (isVLd || isVSt) {
-        unsigned NumVecs = Name.at(17) - '0';
-        if (NumVecs == 0 || NumVecs > 4)
-          return false;
-        bool isLaneOp = (Name.compare(18, 5, "lane.", 5) == 0);
-        if (!isLaneOp && Name.at(18) != '.')
-          return false;
-        unsigned ExpectedArgs = 2; // for the address and alignment
-        if (isVSt || isLaneOp)
-          ExpectedArgs += NumVecs;
-        if (isLaneOp)
-          ExpectedArgs += 1; // for the lane number
-        unsigned NumP = FTy->getNumParams();
-        if (NumP != ExpectedArgs - 1)
-          return false;
-
-        // Change the name of the old (bad) intrinsic, because 
-        // its type is incorrect, but we cannot overload that name.
-        F->setName("");
-
-        // One argument is missing: add the alignment argument.
-        std::vector<const Type*> NewParams;
-        for (unsigned p = 0; p < NumP; ++p)
-          NewParams.push_back(FTy->getParamType(p));
-        NewParams.push_back(Type::getInt32Ty(F->getContext()));
-        FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(),
-                                                 NewParams, false);
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, NewFTy));
-        return true;
-      }
-    }
-    break;
-  case 'b':
-    //  This upgrades the name of the llvm.bswap intrinsic function to only use 
-    //  a single type name for overloading. We only care about the old format
-    //  'llvm.bswap.i*.i*', so check for 'bswap.' and then for there being 
-    //  a '.' after 'bswap.'
-    if (Name.compare(5,6,"bswap.",6) == 0) {
-      std::string::size_type delim = Name.find('.',11);
-      
-      if (delim != std::string::npos) {
-        //  Construct the new name as 'llvm.bswap' + '.i*'
-        F->setName(Name.substr(0,10)+Name.substr(delim));
-        NewFn = F;
-        return true;
-      }
-    }
-    break;
-
-  case 'c':
-    //  We only want to fix the 'llvm.ct*' intrinsics which do not have the 
-    //  correct return type, so we check for the name, and then check if the 
-    //  return type does not match the parameter type.
-    if ( (Name.compare(5,5,"ctpop",5) == 0 ||
-          Name.compare(5,4,"ctlz",4) == 0 ||
-          Name.compare(5,4,"cttz",4) == 0) &&
-        FTy->getReturnType() != FTy->getParamType(0)) {
-      //  We first need to change the name of the old (bad) intrinsic, because 
-      //  its type is incorrect, but we cannot overload that name. We 
-      //  arbitrarily unique it here allowing us to construct a correctly named 
-      //  and typed function below.
-      F->setName("");
-
-      //  Now construct the new intrinsic with the correct name and type. We 
-      //  leave the old function around in order to query its type, whatever it 
-      //  may be, and correctly convert up to the new type.
-      NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                    FTy->getParamType(0),
-                                                    FTy->getParamType(0),
-                                                    (Type *)0));
-      return true;
-    }
-    break;
-
-  case 'e':
-    //  The old llvm.eh.selector.i32 is equivalent to the new llvm.eh.selector.
-    if (Name.compare("llvm.eh.selector.i32") == 0) {
-      F->setName("llvm.eh.selector");
-      NewFn = F;
-      return true;
-    }
-    //  The old llvm.eh.typeid.for.i32 is equivalent to llvm.eh.typeid.for.
-    if (Name.compare("llvm.eh.typeid.for.i32") == 0) {
-      F->setName("llvm.eh.typeid.for");
-      NewFn = F;
-      return true;
-    }
-    //  Convert the old llvm.eh.selector.i64 to a call to llvm.eh.selector.
-    if (Name.compare("llvm.eh.selector.i64") == 0) {
-      NewFn = Intrinsic::getDeclaration(M, Intrinsic::eh_selector);
-      return true;
-    }
-    //  Convert the old llvm.eh.typeid.for.i64 to a call to llvm.eh.typeid.for.
-    if (Name.compare("llvm.eh.typeid.for.i64") == 0) {
-      NewFn = Intrinsic::getDeclaration(M, Intrinsic::eh_typeid_for);
-      return true;
-    }
-    break;
-
-  case 'm': {
-    // This upgrades the llvm.memcpy, llvm.memmove, and llvm.memset to the
-    // new format that allows overloading the pointer for different address
-    // space (e.g., llvm.memcpy.i16 => llvm.memcpy.p0i8.p0i8.i16)
-    const char* NewFnName = NULL;
-    if (Name.compare(5,8,"memcpy.i",8) == 0) {
-      if (Name[13] == '8')
-        NewFnName = "llvm.memcpy.p0i8.p0i8.i8";
-      else if (Name.compare(13,2,"16") == 0)
-        NewFnName = "llvm.memcpy.p0i8.p0i8.i16";
-      else if (Name.compare(13,2,"32") == 0)
-        NewFnName = "llvm.memcpy.p0i8.p0i8.i32";
-      else if (Name.compare(13,2,"64") == 0)
-        NewFnName = "llvm.memcpy.p0i8.p0i8.i64";
-    } else if (Name.compare(5,9,"memmove.i",9) == 0) {
-      if (Name[14] == '8')
-        NewFnName = "llvm.memmove.p0i8.p0i8.i8";
-      else if (Name.compare(14,2,"16") == 0)
-        NewFnName = "llvm.memmove.p0i8.p0i8.i16";
-      else if (Name.compare(14,2,"32") == 0)
-        NewFnName = "llvm.memmove.p0i8.p0i8.i32";
-      else if (Name.compare(14,2,"64") == 0)
-        NewFnName = "llvm.memmove.p0i8.p0i8.i64";
-    }
-    else if (Name.compare(5,8,"memset.i",8) == 0) {
-      if (Name[13] == '8')
-        NewFnName = "llvm.memset.p0i8.i8";
-      else if (Name.compare(13,2,"16") == 0)
-        NewFnName = "llvm.memset.p0i8.i16";
-      else if (Name.compare(13,2,"32") == 0)
-        NewFnName = "llvm.memset.p0i8.i32";
-      else if (Name.compare(13,2,"64") == 0)
-        NewFnName = "llvm.memset.p0i8.i64";
-    }
-    if (NewFnName) {
-      NewFn = cast<Function>(M->getOrInsertFunction(NewFnName, 
-                                            FTy->getReturnType(),
-                                            FTy->getParamType(0),
-                                            FTy->getParamType(1),
-                                            FTy->getParamType(2),
-                                            FTy->getParamType(3),
-                                            Type::getInt1Ty(F->getContext()),
-                                            (Type *)0));
-      return true;
-    }
-    break;
-  }
   case 'p':
-    //  This upgrades the llvm.part.select overloaded intrinsic names to only 
-    //  use one type specifier in the name. We only care about the old format
-    //  'llvm.part.select.i*.i*', and solve as above with bswap.
-    if (Name.compare(5,12,"part.select.",12) == 0) {
-      std::string::size_type delim = Name.find('.',17);
-      
-      if (delim != std::string::npos) {
-        //  Construct a new name as 'llvm.part.select' + '.i*'
-        F->setName(Name.substr(0,16)+Name.substr(delim));
-        NewFn = F;
-        return true;
-      }
-      break;
-    }
-
-    //  This upgrades the llvm.part.set intrinsics similarly as above, however 
-    //  we care about 'llvm.part.set.i*.i*.i*', but only the first two types 
-    //  must match. There is an additional type specifier after these two 
-    //  matching types that we must retain when upgrading.  Thus, we require 
-    //  finding 2 periods, not just one, after the intrinsic name.
-    if (Name.compare(5,9,"part.set.",9) == 0) {
-      std::string::size_type delim = Name.find('.',14);
-
-      if (delim != std::string::npos &&
-          Name.find('.',delim+1) != std::string::npos) {
-        //  Construct a new name as 'llvm.part.select' + '.i*.i*'
-        F->setName(Name.substr(0,13)+Name.substr(delim));
-        NewFn = F;
-        return true;
-      }
-      break;
-    }
-
     //  This upgrades the llvm.prefetch intrinsic to accept one more parameter,
     //  which is a instruction / data cache identifier. The old version only
     //  implicitly accepted the data version.
-    if (Name.compare(5,8,"prefetch",8) == 0) {
+    if (Name == "prefetch") {
       // Don't do anything if it has the correct number of arguments already
       if (FTy->getNumParams() == 4)
         break;
@@ -297,8 +53,9 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
       //  its type is incorrect, but we cannot overload that name. We
       //  arbitrarily unique it here allowing us to construct a correctly named
       //  and typed function below.
+      std::string NameTmp = F->getName();
       F->setName("");
-      NewFn = cast<Function>(M->getOrInsertFunction(Name,
+      NewFn = cast<Function>(M->getOrInsertFunction(NameTmp,
                                                     FTy->getReturnType(),
                                                     FTy->getParamType(0),
                                                     FTy->getParamType(1),
@@ -309,301 +66,39 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
     }
 
     break;
-  case 'x':
-    // This fixes the poorly named crc32 intrinsics
-    if (Name.compare(5, 13, "x86.sse42.crc", 13) == 0) {
-      const char* NewFnName = NULL;
-      if (Name.compare(18, 2, "32", 2) == 0) {
-        if (Name.compare(20, 2, ".8") == 0 && Name.length() == 22) {
-          NewFnName = "llvm.x86.sse42.crc32.32.8";
-        } else if (Name.compare(20, 3, ".16") == 0 && Name.length() == 23) {
-          NewFnName = "llvm.x86.sse42.crc32.32.16";
-        } else if (Name.compare(20, 3, ".32") == 0 && Name.length() == 23) {
-          NewFnName = "llvm.x86.sse42.crc32.32.32";
-        }
-      }
-      else if (Name.compare(18, 2, "64", 2) == 0) {
-        if (Name.compare(20, 2, ".8") == 0 && Name.length() == 22) {
-          NewFnName = "llvm.x86.sse42.crc32.64.8";
-        } else if (Name.compare(20, 3, ".64") == 0 && Name.length() == 23) {
-          NewFnName = "llvm.x86.sse42.crc32.64.64";
-        }
-      }
-      if (NewFnName) {
-        F->setName(NewFnName);
-        NewFn = F;
-        return true;
-      }
+  case 'x': {
+    const char *NewFnName = NULL;
+    // This fixes the poorly named crc32 intrinsics.
+    if (Name == "x86.sse42.crc32.8")
+      NewFnName = "llvm.x86.sse42.crc32.32.8";
+    else if (Name == "x86.sse42.crc32.16")
+      NewFnName = "llvm.x86.sse42.crc32.32.16";
+    else if (Name == "x86.sse42.crc32.32")
+      NewFnName = "llvm.x86.sse42.crc32.32.32";
+    else if (Name == "x86.sse42.crc64.8")
+      NewFnName = "llvm.x86.sse42.crc32.64.8";
+    else if (Name == "x86.sse42.crc64.64")
+      NewFnName = "llvm.x86.sse42.crc32.64.64";
+    
+    if (NewFnName) {
+      F->setName(NewFnName);
+      NewFn = F;
+      return true;
     }
 
-    // This fixes all MMX shift intrinsic instructions to take a
-    // x86_mmx instead of a v1i64, v2i32, v4i16, or v8i8.
-    if (Name.compare(5, 8, "x86.mmx.", 8) == 0) {
-      const Type *X86_MMXTy = VectorType::getX86_MMXTy(FTy->getContext());
-
-      if (Name.compare(13, 4, "padd", 4) == 0   ||
-          Name.compare(13, 4, "psub", 4) == 0   ||
-          Name.compare(13, 4, "pmul", 4) == 0   ||
-          Name.compare(13, 5, "pmadd", 5) == 0  ||
-          Name.compare(13, 4, "pand", 4) == 0   ||
-          Name.compare(13, 3, "por", 3) == 0    ||
-          Name.compare(13, 4, "pxor", 4) == 0   ||
-          Name.compare(13, 4, "pavg", 4) == 0   ||
-          Name.compare(13, 4, "pmax", 4) == 0   ||
-          Name.compare(13, 4, "pmin", 4) == 0   ||
-          Name.compare(13, 4, "psad", 4) == 0   ||
-          Name.compare(13, 4, "psll", 4) == 0   ||
-          Name.compare(13, 4, "psrl", 4) == 0   ||
-          Name.compare(13, 4, "psra", 4) == 0   ||
-          Name.compare(13, 4, "pack", 4) == 0   ||
-          Name.compare(13, 6, "punpck", 6) == 0 ||
-          Name.compare(13, 4, "pcmp", 4) == 0) {
-        assert(FTy->getNumParams() == 2 && "MMX intrinsic takes 2 args!");
-        const Type *SecondParamTy = X86_MMXTy;
-
-        if (Name.compare(13, 5, "pslli", 5) == 0 ||
-            Name.compare(13, 5, "psrli", 5) == 0 ||
-            Name.compare(13, 5, "psrai", 5) == 0)
-          SecondParamTy = FTy->getParamType(1);
-
-        // Don't do anything if it has the correct types.
-        if (FTy->getReturnType() == X86_MMXTy &&
-            FTy->getParamType(0) == X86_MMXTy &&
-            FTy->getParamType(1) == SecondParamTy)
-          break;
-
-        // We first need to change the name of the old (bad) intrinsic, because
-        // its type is incorrect, but we cannot overload that name. We
-        // arbitrarily unique it here allowing us to construct a correctly named
-        // and typed function below.
-        F->setName("");
-
-        // Now construct the new intrinsic with the correct name and type. We
-        // leave the old function around in order to query its type, whatever it
-        // may be, and correctly convert up to the new type.
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      X86_MMXTy, X86_MMXTy,
-                                                      SecondParamTy, (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 8, "maskmovq", 8) == 0) {
-        // Don't do anything if it has the correct types.
-        if (FTy->getParamType(0) == X86_MMXTy &&
-            FTy->getParamType(1) == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      FTy->getReturnType(),
-                                                      X86_MMXTy,
-                                                      X86_MMXTy,
-                                                      FTy->getParamType(2),
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 8, "pmovmskb", 8) == 0) {
-        if (FTy->getParamType(0) == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      FTy->getReturnType(),
-                                                      X86_MMXTy,
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 5, "movnt", 5) == 0) {
-        if (FTy->getParamType(1) == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      FTy->getReturnType(),
-                                                      FTy->getParamType(0),
-                                                      X86_MMXTy,
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 7, "palignr", 7) == 0) {
-        if (FTy->getReturnType() == X86_MMXTy &&
-            FTy->getParamType(0) == X86_MMXTy &&
-            FTy->getParamType(1) == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      X86_MMXTy,
-                                                      X86_MMXTy,
-                                                      X86_MMXTy,
-                                                      FTy->getParamType(2),
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 5, "pextr", 5) == 0) {
-        if (FTy->getParamType(0) == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      FTy->getReturnType(),
-                                                      X86_MMXTy,
-                                                      FTy->getParamType(1),
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 5, "pinsr", 5) == 0) {
-        if (FTy->getReturnType() == X86_MMXTy &&
-            FTy->getParamType(0) == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      X86_MMXTy,
-                                                      X86_MMXTy,
-                                                      FTy->getParamType(1),
-                                                      FTy->getParamType(2),
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 12, "cvtsi32.si64", 12) == 0) {
-        if (FTy->getReturnType() == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      X86_MMXTy,
-                                                      FTy->getParamType(0),
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 12, "cvtsi64.si32", 12) == 0) {
-        if (FTy->getParamType(0) == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      FTy->getReturnType(),
-                                                      X86_MMXTy,
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 8, "vec.init", 8) == 0) {
-        if (FTy->getReturnType() == X86_MMXTy)
-          break;
-
-        F->setName("");
-
-        if (Name.compare(21, 2, ".b", 2) == 0)
-          NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                        X86_MMXTy,
-                                                        FTy->getParamType(0),
-                                                        FTy->getParamType(1),
-                                                        FTy->getParamType(2),
-                                                        FTy->getParamType(3),
-                                                        FTy->getParamType(4),
-                                                        FTy->getParamType(5),
-                                                        FTy->getParamType(6),
-                                                        FTy->getParamType(7),
-                                                        (Type*)0));
-        else if (Name.compare(21, 2, ".w", 2) == 0)
-          NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                        X86_MMXTy,
-                                                        FTy->getParamType(0),
-                                                        FTy->getParamType(1),
-                                                        FTy->getParamType(2),
-                                                        FTy->getParamType(3),
-                                                        (Type*)0));
-        else if (Name.compare(21, 2, ".d", 2) == 0)
-          NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                        X86_MMXTy,
-                                                        FTy->getParamType(0),
-                                                        FTy->getParamType(1),
-                                                        (Type*)0));
-        return true;
-      }
-
-
-      if (Name.compare(13, 9, "vec.ext.d", 9) == 0) {
-        if (FTy->getReturnType() == X86_MMXTy &&
-            FTy->getParamType(0) == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      X86_MMXTy,
-                                                      X86_MMXTy,
-                                                      FTy->getParamType(1),
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 9, "emms", 4) == 0 ||
-          Name.compare(13, 9, "femms", 5) == 0) {
-        NewFn = 0;
-        break;
-      }
-
-      // We really shouldn't get here ever.
-      assert(0 && "Invalid MMX intrinsic!");
-      break;
-    } else if (Name.compare(5,17,"x86.sse2.loadh.pd",17) == 0 ||
-               Name.compare(5,17,"x86.sse2.loadl.pd",17) == 0 ||
-               Name.compare(5,16,"x86.sse2.movl.dq",16) == 0 ||
-               Name.compare(5,15,"x86.sse2.movs.d",15) == 0 ||
-               Name.compare(5,16,"x86.sse2.shuf.pd",16) == 0 ||
-               Name.compare(5,18,"x86.sse2.unpckh.pd",18) == 0 ||
-               Name.compare(5,18,"x86.sse2.unpckl.pd",18) == 0 ||
-               Name.compare(5,20,"x86.sse2.punpckh.qdq",20) == 0 ||
-               Name.compare(5,20,"x86.sse2.punpckl.qdq",20) == 0) {
-      // Calls to these intrinsics are transformed into ShuffleVector's.
-      NewFn = 0;
-      return true;
-    } else if (Name.compare(5, 16, "x86.sse41.pmulld", 16) == 0) {
-      // Calls to these intrinsics are transformed into vector multiplies.
-      NewFn = 0;
-      return true;
-    } else if (Name.compare(5, 18, "x86.ssse3.palign.r", 18) == 0 ||
-               Name.compare(5, 22, "x86.ssse3.palign.r.128", 22) == 0) {
-      // Calls to these intrinsics are transformed into vector shuffles, shifts,
-      // or 0.
-      NewFn = 0;
-      return true;           
-    } else if (Name.compare(5, 16, "x86.sse.loadu.ps", 16) == 0 ||
-               Name.compare(5, 17, "x86.sse2.loadu.dq", 17) == 0 ||
-               Name.compare(5, 17, "x86.sse2.loadu.pd", 17) == 0) {
-      // Calls to these instructions are transformed into unaligned loads.
-      NewFn = 0;
+    // Calls to these instructions are transformed into unaligned loads.
+    if (Name == "x86.sse.loadu.ps" || Name == "x86.sse2.loadu.dq" ||
+        Name == "x86.sse2.loadu.pd")
       return true;
-    } else if (Name.compare(5, 16, "x86.sse.movnt.ps", 16) == 0 ||
-               Name.compare(5, 17, "x86.sse2.movnt.dq", 17) == 0 ||
-               Name.compare(5, 17, "x86.sse2.movnt.pd", 17) == 0 ||
-               Name.compare(5, 17, "x86.sse2.movnt.i", 16) == 0) {
-      // Calls to these instructions are transformed into nontemporal stores.
-      NewFn = 0;
-      return true;
-    } else if (Name.compare(5, 17, "x86.ssse3.pshuf.w", 17) == 0) {
-      // This is an SSE/MMX instruction.
-      const Type *X86_MMXTy = VectorType::getX86_MMXTy(FTy->getContext());
-      NewFn =
-        cast<Function>(M->getOrInsertFunction("llvm.x86.sse.pshuf.w",
-                                              X86_MMXTy,
-                                              X86_MMXTy,
-                                              Type::getInt8Ty(F->getContext()),
-                                              (Type*)0));
+      
+    // Calls to these instructions are transformed into nontemporal stores.
+    if (Name == "x86.sse.movnt.ps"  || Name == "x86.sse2.movnt.dq" ||
+        Name == "x86.sse2.movnt.pd" || Name == "x86.sse2.movnt.i")
       return true;
-    }
 
     break;
   }
+  }
 
   //  This may not belong here. This function is effectively being overloaded 
   //  to both detect an intrinsic which needs upgrading, and to provide the 
@@ -625,105 +120,10 @@ bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
 }
 
 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
-  StringRef Name(GV->getName());
-
-  // We are only upgrading one symbol here.
-  if (Name == ".llvm.eh.catch.all.value") {
-    GV->setName("llvm.eh.catch.all.value");
-    return true;
-  }
-
+  // Nothing to do yet.
   return false;
 }
 
-/// ExtendNEONArgs - For NEON "long" and "wide" operations, where the results
-/// have vector elements twice as big as one or both source operands, do the
-/// sign- or zero-extension that used to be handled by intrinsics.  The
-/// extended values are returned via V0 and V1.
-static void ExtendNEONArgs(CallInst *CI, Value *Arg0, Value *Arg1,
-                           Value *&V0, Value *&V1) {
-  Function *F = CI->getCalledFunction();
-  const std::string& Name = F->getName();
-  bool isLong = (Name.at(18) == 'l');
-  bool isSigned = (Name.at(19) == 's');
-
-  if (isSigned) {
-    if (isLong)
-      V0 = new SExtInst(Arg0, CI->getType(), "", CI);
-    else
-      V0 = Arg0;
-    V1 = new SExtInst(Arg1, CI->getType(), "", CI);
-  } else {
-    if (isLong)
-      V0 = new ZExtInst(Arg0, CI->getType(), "", CI);
-    else
-      V0 = Arg0;
-    V1 = new ZExtInst(Arg1, CI->getType(), "", CI);
-  }
-}
-
-/// CallVABD - As part of expanding a call to one of the old NEON vabdl, vaba,
-/// or vabal intrinsics, construct a call to a vabd intrinsic.  Examine the
-/// name of the old intrinsic to determine whether to use a signed or unsigned
-/// vabd intrinsic.  Get the type from the old call instruction, adjusted for
-/// half-size vector elements if the old intrinsic was vabdl or vabal.
-static Instruction *CallVABD(CallInst *CI, Value *Arg0, Value *Arg1) {
-  Function *F = CI->getCalledFunction();
-  const std::string& Name = F->getName();
-  bool isLong = (Name.at(18) == 'l');
-  bool isSigned = (Name.at(isLong ? 19 : 18) == 's');
-
-  Intrinsic::ID intID;
-  if (isSigned)
-    intID = Intrinsic::arm_neon_vabds;
-  else
-    intID = Intrinsic::arm_neon_vabdu;
-
-  const Type *Ty = CI->getType();
-  if (isLong)
-    Ty = VectorType::getTruncatedElementVectorType(cast<const VectorType>(Ty));
-
-  Function *VABD = Intrinsic::getDeclaration(F->getParent(), intID, &Ty, 1);
-  Value *Operands[2];
-  Operands[0] = Arg0;
-  Operands[1] = Arg1;
-  return CallInst::Create(VABD, Operands, Operands+2, 
-                          "upgraded."+CI->getName(), CI);
-}
-
-/// ConstructNewCallInst - Construct a new CallInst with the signature of NewFn.
-static void ConstructNewCallInst(Function *NewFn, CallInst *OldCI,
-                                 Value **Operands, unsigned NumOps,
-                                 bool AssignName = true) {
-  // Construct a new CallInst.
-  CallInst *NewCI =
-    CallInst::Create(NewFn, Operands, Operands + NumOps,
-                     AssignName ? "upgraded." + OldCI->getName() : "", OldCI);
-
-  NewCI->setTailCall(OldCI->isTailCall());
-  NewCI->setCallingConv(OldCI->getCallingConv());
-
-  // Handle any uses of the old CallInst. If the type has changed, add a cast.
-  if (!OldCI->use_empty()) {
-    if (OldCI->getType() != NewCI->getType()) {
-      Function *OldFn = OldCI->getCalledFunction();
-      CastInst *RetCast =
-        CastInst::Create(CastInst::getCastOpcode(NewCI, true,
-                                                 OldFn->getReturnType(), true),
-                         NewCI, OldFn->getReturnType(), NewCI->getName(),OldCI);
-
-      // Replace all uses of the old call with the new cast which has the
-      // correct type.
-      OldCI->replaceAllUsesWith(RetCast);
-    } else {
-      OldCI->replaceAllUsesWith(NewCI);
-    }
-  }
-
-  // Clean up the old call now that it has been completely upgraded.
-  OldCI->eraseFromParent();
-}
-
 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the 
 // upgraded intrinsic. All argument and return casting must be provided in 
 // order to seamlessly integrate with existing context.
@@ -735,284 +135,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
   assert(F && "CallInst has no function associated with it.");
 
   if (!NewFn) {
-    // Get the Function's name.
-    const std::string& Name = F->getName();
-
-    // Upgrade ARM NEON intrinsics.
-    if (Name.compare(5, 9, "arm.neon.", 9) == 0) {
-      Instruction *NewI;
-      Value *V0, *V1;
-      if (Name.compare(14, 7, "vmovls.", 7) == 0) {
-        NewI = new SExtInst(CI->getArgOperand(0), CI->getType(),
-                            "upgraded." + CI->getName(), CI);
-      } else if (Name.compare(14, 7, "vmovlu.", 7) == 0) {
-        NewI = new ZExtInst(CI->getArgOperand(0), CI->getType(),
-                            "upgraded." + CI->getName(), CI);
-      } else if (Name.compare(14, 4, "vadd", 4) == 0) {
-        ExtendNEONArgs(CI, CI->getArgOperand(0), CI->getArgOperand(1), V0, V1);
-        NewI = BinaryOperator::CreateAdd(V0, V1, "upgraded."+CI->getName(), CI);
-      } else if (Name.compare(14, 4, "vsub", 4) == 0) {
-        ExtendNEONArgs(CI, CI->getArgOperand(0), CI->getArgOperand(1), V0, V1);
-        NewI = BinaryOperator::CreateSub(V0, V1,"upgraded."+CI->getName(),CI);
-      } else if (Name.compare(14, 4, "vmul", 4) == 0) {
-        ExtendNEONArgs(CI, CI->getArgOperand(0), CI->getArgOperand(1), V0, V1);
-        NewI = BinaryOperator::CreateMul(V0, V1,"upgraded."+CI->getName(),CI);
-      } else if (Name.compare(14, 4, "vmla", 4) == 0) {
-        ExtendNEONArgs(CI, CI->getArgOperand(1), CI->getArgOperand(2), V0, V1);
-        Instruction *MulI = BinaryOperator::CreateMul(V0, V1, "", CI);
-        NewI = BinaryOperator::CreateAdd(CI->getArgOperand(0), MulI,
-                                         "upgraded."+CI->getName(), CI);
-      } else if (Name.compare(14, 4, "vmls", 4) == 0) {
-        ExtendNEONArgs(CI, CI->getArgOperand(1), CI->getArgOperand(2), V0, V1);
-        Instruction *MulI = BinaryOperator::CreateMul(V0, V1, "", CI);
-        NewI = BinaryOperator::CreateSub(CI->getArgOperand(0), MulI,
-                                         "upgraded."+CI->getName(), CI);
-      } else if (Name.compare(14, 4, "vabd", 4) == 0) {
-        NewI = CallVABD(CI, CI->getArgOperand(0), CI->getArgOperand(1));
-        NewI = new ZExtInst(NewI, CI->getType(), "upgraded."+CI->getName(), CI);
-      } else if (Name.compare(14, 4, "vaba", 4) == 0) {
-        NewI = CallVABD(CI, CI->getArgOperand(1), CI->getArgOperand(2));
-        if (Name.at(18) == 'l')
-          NewI = new ZExtInst(NewI, CI->getType(), "", CI);
-        NewI = BinaryOperator::CreateAdd(CI->getArgOperand(0), NewI,
-                                         "upgraded."+CI->getName(), CI);
-      } else if (Name.compare(14, 6, "vmovn.", 6) == 0) {
-        NewI = new TruncInst(CI->getArgOperand(0), CI->getType(),
-                             "upgraded." + CI->getName(), CI);
-      } else {
-        llvm_unreachable("Unknown arm.neon function for CallInst upgrade.");
-      }
-      // Replace any uses of the old CallInst.
-      if (!CI->use_empty())
-        CI->replaceAllUsesWith(NewI);
-      CI->eraseFromParent();
-      return;
-    }
-
-    bool isLoadH = false, isLoadL = false, isMovL = false;
-    bool isMovSD = false, isShufPD = false;
-    bool isUnpckhPD = false, isUnpcklPD = false;
-    bool isPunpckhQPD = false, isPunpcklQPD = false;
-    if (F->getName() == "llvm.x86.sse2.loadh.pd")
-      isLoadH = true;
-    else if (F->getName() == "llvm.x86.sse2.loadl.pd")
-      isLoadL = true;
-    else if (F->getName() == "llvm.x86.sse2.movl.dq")
-      isMovL = true;
-    else if (F->getName() == "llvm.x86.sse2.movs.d")
-      isMovSD = true;
-    else if (F->getName() == "llvm.x86.sse2.shuf.pd")
-      isShufPD = true;
-    else if (F->getName() == "llvm.x86.sse2.unpckh.pd")
-      isUnpckhPD = true;
-    else if (F->getName() == "llvm.x86.sse2.unpckl.pd")
-      isUnpcklPD = true;
-    else if (F->getName() ==  "llvm.x86.sse2.punpckh.qdq")
-      isPunpckhQPD = true;
-    else if (F->getName() ==  "llvm.x86.sse2.punpckl.qdq")
-      isPunpcklQPD = true;
-
-    if (isLoadH || isLoadL || isMovL || isMovSD || isShufPD ||
-        isUnpckhPD || isUnpcklPD || isPunpckhQPD || isPunpcklQPD) {
-      std::vector<Constant*> Idxs;
-      Value *Op0 = CI->getArgOperand(0);
-      ShuffleVectorInst *SI = NULL;
-      if (isLoadH || isLoadL) {
-        Value *Op1 = UndefValue::get(Op0->getType());
-        Value *Addr = new BitCastInst(CI->getArgOperand(1), 
-                                  Type::getDoublePtrTy(C),
-                                      "upgraded.", CI);
-        Value *Load = new LoadInst(Addr, "upgraded.", false, 8, CI);
-        Value *Idx = ConstantInt::get(Type::getInt32Ty(C), 0);
-        Op1 = InsertElementInst::Create(Op1, Load, Idx, "upgraded.", CI);
-
-        if (isLoadH) {
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 0));
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
-        } else {
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
-        }
-        Value *Mask = ConstantVector::get(Idxs);
-        SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
-      } else if (isMovL) {
-        Constant *Zero = ConstantInt::get(Type::getInt32Ty(C), 0);
-        Idxs.push_back(Zero);
-        Idxs.push_back(Zero);
-        Idxs.push_back(Zero);
-        Idxs.push_back(Zero);
-        Value *ZeroV = ConstantVector::get(Idxs);
-
-        Idxs.clear(); 
-        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 4));
-        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 5));
-        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
-        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 3));
-        Value *Mask = ConstantVector::get(Idxs);
-        SI = new ShuffleVectorInst(ZeroV, Op0, Mask, "upgraded.", CI);
-      } else if (isMovSD ||
-                 isUnpckhPD || isUnpcklPD || isPunpckhQPD || isPunpcklQPD) {
-        Value *Op1 = CI->getArgOperand(1);
-        if (isMovSD) {
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
-        } else if (isUnpckhPD || isPunpckhQPD) {
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 3));
-        } else {
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 0));
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
-        }
-        Value *Mask = ConstantVector::get(Idxs);
-        SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
-      } else if (isShufPD) {
-        Value *Op1 = CI->getArgOperand(1);
-        unsigned MaskVal =
-                        cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
-        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), MaskVal & 1));
-        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C),
-                                               ((MaskVal >> 1) & 1)+2));
-        Value *Mask = ConstantVector::get(Idxs);
-        SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
-      }
-
-      assert(SI && "Unexpected!");
-
-      // Handle any uses of the old CallInst.
-      if (!CI->use_empty())
-        //  Replace all uses of the old call with the new cast which has the 
-        //  correct type.
-        CI->replaceAllUsesWith(SI);
-      
-      //  Clean up the old call now that it has been completely upgraded.
-      CI->eraseFromParent();
-    } else if (F->getName() == "llvm.x86.sse41.pmulld") {
-      // Upgrade this set of intrinsics into vector multiplies.
-      Instruction *Mul = BinaryOperator::CreateMul(CI->getArgOperand(0),
-                                                   CI->getArgOperand(1),
-                                                   CI->getName(),
-                                                   CI);
-      // Fix up all the uses with our new multiply.
-      if (!CI->use_empty())
-        CI->replaceAllUsesWith(Mul);
-        
-      // Remove upgraded multiply.
-      CI->eraseFromParent();
-    } else if (F->getName() == "llvm.x86.ssse3.palign.r") {
-      Value *Op1 = CI->getArgOperand(0);
-      Value *Op2 = CI->getArgOperand(1);
-      Value *Op3 = CI->getArgOperand(2);
-      unsigned shiftVal = cast<ConstantInt>(Op3)->getZExtValue();
-      Value *Rep;
-      IRBuilder<> Builder(C);
-      Builder.SetInsertPoint(CI->getParent(), CI);
-
-      // If palignr is shifting the pair of input vectors less than 9 bytes,
-      // emit a shuffle instruction.
-      if (shiftVal <= 8) {
-        const Type *IntTy = Type::getInt32Ty(C);
-        const Type *EltTy = Type::getInt8Ty(C);
-        const Type *VecTy = VectorType::get(EltTy, 8);
-        
-        Op2 = Builder.CreateBitCast(Op2, VecTy);
-        Op1 = Builder.CreateBitCast(Op1, VecTy);
-
-        llvm::SmallVector<llvm::Constant*, 8> Indices;
-        for (unsigned i = 0; i != 8; ++i)
-          Indices.push_back(ConstantInt::get(IntTy, shiftVal + i));
-
-        Value *SV = ConstantVector::get(Indices);
-        Rep = Builder.CreateShuffleVector(Op2, Op1, SV, "palignr");
-        Rep = Builder.CreateBitCast(Rep, F->getReturnType());
-      }
-
-      // If palignr is shifting the pair of input vectors more than 8 but less
-      // than 16 bytes, emit a logical right shift of the destination.
-      else if (shiftVal < 16) {
-        // MMX has these as 1 x i64 vectors for some odd optimization reasons.
-        const Type *EltTy = Type::getInt64Ty(C);
-        const Type *VecTy = VectorType::get(EltTy, 1);
-
-        Op1 = Builder.CreateBitCast(Op1, VecTy, "cast");
-        Op2 = ConstantInt::get(VecTy, (shiftVal-8) * 8);
-
-        // create i32 constant
-        Function *I =
-          Intrinsic::getDeclaration(F->getParent(), Intrinsic::x86_mmx_psrl_q);
-        Rep = Builder.CreateCall2(I, Op1, Op2, "palignr");
-      }
-
-      // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
-      else {
-        Rep = Constant::getNullValue(F->getReturnType());
-      }
-      
-      // Replace any uses with our new instruction.
-      if (!CI->use_empty())
-        CI->replaceAllUsesWith(Rep);
-        
-      // Remove upgraded instruction.
-      CI->eraseFromParent();
-      
-    } else if (F->getName() == "llvm.x86.ssse3.palign.r.128") {
-      Value *Op1 = CI->getArgOperand(0);
-      Value *Op2 = CI->getArgOperand(1);
-      Value *Op3 = CI->getArgOperand(2);
-      unsigned shiftVal = cast<ConstantInt>(Op3)->getZExtValue();
-      Value *Rep;
-      IRBuilder<> Builder(C);
-      Builder.SetInsertPoint(CI->getParent(), CI);
-
-      // If palignr is shifting the pair of input vectors less than 17 bytes,
-      // emit a shuffle instruction.
-      if (shiftVal <= 16) {
-        const Type *IntTy = Type::getInt32Ty(C);
-        const Type *EltTy = Type::getInt8Ty(C);
-        const Type *VecTy = VectorType::get(EltTy, 16);
-        
-        Op2 = Builder.CreateBitCast(Op2, VecTy);
-        Op1 = Builder.CreateBitCast(Op1, VecTy);
-
-        llvm::SmallVector<llvm::Constant*, 16> Indices;
-        for (unsigned i = 0; i != 16; ++i)
-          Indices.push_back(ConstantInt::get(IntTy, shiftVal + i));
-
-        Value *SV = ConstantVector::get(Indices);
-        Rep = Builder.CreateShuffleVector(Op2, Op1, SV, "palignr");
-        Rep = Builder.CreateBitCast(Rep, F->getReturnType());
-      }
-
-      // If palignr is shifting the pair of input vectors more than 16 but less
-      // than 32 bytes, emit a logical right shift of the destination.
-      else if (shiftVal < 32) {
-        const Type *EltTy = Type::getInt64Ty(C);
-        const Type *VecTy = VectorType::get(EltTy, 2);
-        const Type *IntTy = Type::getInt32Ty(C);
-
-        Op1 = Builder.CreateBitCast(Op1, VecTy, "cast");
-        Op2 = ConstantInt::get(IntTy, (shiftVal-16) * 8);
-
-        // create i32 constant
-        Function *I =
-          Intrinsic::getDeclaration(F->getParent(), Intrinsic::x86_sse2_psrl_dq);
-        Rep = Builder.CreateCall2(I, Op1, Op2, "palignr");
-      }
-
-      // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
-      else {
-        Rep = Constant::getNullValue(F->getReturnType());
-      }
-      
-      // Replace any uses with our new instruction.
-      if (!CI->use_empty())
-        CI->replaceAllUsesWith(Rep);
-        
-      // Remove upgraded instruction.
-      CI->eraseFromParent();
-    
-    } else if (F->getName() == "llvm.x86.sse.loadu.ps" ||
-               F->getName() == "llvm.x86.sse2.loadu.dq" ||
-               F->getName() == "llvm.x86.sse2.loadu.pd") {
+    if (F->getName() == "llvm.x86.sse.loadu.ps" ||
+        F->getName() == "llvm.x86.sse2.loadu.dq" ||
+        F->getName() == "llvm.x86.sse2.loadu.pd") {
       // Convert to a native, unaligned load.
       const Type *VecTy = CI->getType();
       const Type *IntTy = IntegerType::get(C, 128);
@@ -1064,310 +189,6 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
   }
 
   switch (NewFn->getIntrinsicID()) {
-  default: llvm_unreachable("Unknown function for CallInst upgrade.");
-  case Intrinsic::arm_neon_vld1:
-  case Intrinsic::arm_neon_vld2:
-  case Intrinsic::arm_neon_vld3:
-  case Intrinsic::arm_neon_vld4:
-  case Intrinsic::arm_neon_vst1:
-  case Intrinsic::arm_neon_vst2:
-  case Intrinsic::arm_neon_vst3:
-  case Intrinsic::arm_neon_vst4:
-  case Intrinsic::arm_neon_vld2lane:
-  case Intrinsic::arm_neon_vld3lane:
-  case Intrinsic::arm_neon_vld4lane:
-  case Intrinsic::arm_neon_vst2lane:
-  case Intrinsic::arm_neon_vst3lane:
-  case Intrinsic::arm_neon_vst4lane: {
-    // Add a default alignment argument of 1.
-    SmallVector<Value*, 8> Operands(CS.arg_begin(), CS.arg_end());
-    Operands.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
-    CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(),
-                                       CI->getName(), CI);
-    NewCI->setTailCall(CI->isTailCall());
-    NewCI->setCallingConv(CI->getCallingConv());
-
-    //  Handle any uses of the old CallInst.
-    if (!CI->use_empty())
-      //  Replace all uses of the old call with the new cast which has the 
-      //  correct type.
-      CI->replaceAllUsesWith(NewCI);
-    
-    //  Clean up the old call now that it has been completely upgraded.
-    CI->eraseFromParent();
-    break;
-  }        
-
-  case Intrinsic::x86_mmx_padd_b:
-  case Intrinsic::x86_mmx_padd_w:
-  case Intrinsic::x86_mmx_padd_d:
-  case Intrinsic::x86_mmx_padd_q:
-  case Intrinsic::x86_mmx_padds_b:
-  case Intrinsic::x86_mmx_padds_w:
-  case Intrinsic::x86_mmx_paddus_b:
-  case Intrinsic::x86_mmx_paddus_w:
-  case Intrinsic::x86_mmx_psub_b:
-  case Intrinsic::x86_mmx_psub_w:
-  case Intrinsic::x86_mmx_psub_d:
-  case Intrinsic::x86_mmx_psub_q:
-  case Intrinsic::x86_mmx_psubs_b:
-  case Intrinsic::x86_mmx_psubs_w:
-  case Intrinsic::x86_mmx_psubus_b:
-  case Intrinsic::x86_mmx_psubus_w:
-  case Intrinsic::x86_mmx_pmulh_w:
-  case Intrinsic::x86_mmx_pmull_w:
-  case Intrinsic::x86_mmx_pmulhu_w:
-  case Intrinsic::x86_mmx_pmulu_dq:
-  case Intrinsic::x86_mmx_pmadd_wd:
-  case Intrinsic::x86_mmx_pand:
-  case Intrinsic::x86_mmx_pandn:
-  case Intrinsic::x86_mmx_por:
-  case Intrinsic::x86_mmx_pxor:
-  case Intrinsic::x86_mmx_pavg_b:
-  case Intrinsic::x86_mmx_pavg_w:
-  case Intrinsic::x86_mmx_pmaxu_b:
-  case Intrinsic::x86_mmx_pmaxs_w:
-  case Intrinsic::x86_mmx_pminu_b:
-  case Intrinsic::x86_mmx_pmins_w:
-  case Intrinsic::x86_mmx_psad_bw:
-  case Intrinsic::x86_mmx_psll_w:
-  case Intrinsic::x86_mmx_psll_d:
-  case Intrinsic::x86_mmx_psll_q:
-  case Intrinsic::x86_mmx_pslli_w:
-  case Intrinsic::x86_mmx_pslli_d:
-  case Intrinsic::x86_mmx_pslli_q:
-  case Intrinsic::x86_mmx_psrl_w:
-  case Intrinsic::x86_mmx_psrl_d:
-  case Intrinsic::x86_mmx_psrl_q:
-  case Intrinsic::x86_mmx_psrli_w:
-  case Intrinsic::x86_mmx_psrli_d:
-  case Intrinsic::x86_mmx_psrli_q:
-  case Intrinsic::x86_mmx_psra_w:
-  case Intrinsic::x86_mmx_psra_d:
-  case Intrinsic::x86_mmx_psrai_w:
-  case Intrinsic::x86_mmx_psrai_d:
-  case Intrinsic::x86_mmx_packsswb:
-  case Intrinsic::x86_mmx_packssdw:
-  case Intrinsic::x86_mmx_packuswb:
-  case Intrinsic::x86_mmx_punpckhbw:
-  case Intrinsic::x86_mmx_punpckhwd:
-  case Intrinsic::x86_mmx_punpckhdq:
-  case Intrinsic::x86_mmx_punpcklbw:
-  case Intrinsic::x86_mmx_punpcklwd:
-  case Intrinsic::x86_mmx_punpckldq:
-  case Intrinsic::x86_mmx_pcmpeq_b:
-  case Intrinsic::x86_mmx_pcmpeq_w:
-  case Intrinsic::x86_mmx_pcmpeq_d:
-  case Intrinsic::x86_mmx_pcmpgt_b:
-  case Intrinsic::x86_mmx_pcmpgt_w:
-  case Intrinsic::x86_mmx_pcmpgt_d: {
-    Value *Operands[2];
-    
-    // Cast the operand to the X86 MMX type.
-    Operands[0] = new BitCastInst(CI->getArgOperand(0), 
-                                  NewFn->getFunctionType()->getParamType(0),
-                                  "upgraded.", CI);
-
-    switch (NewFn->getIntrinsicID()) {
-    default:
-      // Cast to the X86 MMX type.
-      Operands[1] = new BitCastInst(CI->getArgOperand(1), 
-                                    NewFn->getFunctionType()->getParamType(1),
-                                    "upgraded.", CI);
-      break;
-    case Intrinsic::x86_mmx_pslli_w:
-    case Intrinsic::x86_mmx_pslli_d:
-    case Intrinsic::x86_mmx_pslli_q:
-    case Intrinsic::x86_mmx_psrli_w:
-    case Intrinsic::x86_mmx_psrli_d:
-    case Intrinsic::x86_mmx_psrli_q:
-    case Intrinsic::x86_mmx_psrai_w:
-    case Intrinsic::x86_mmx_psrai_d:
-      // These take an i32 as their second parameter.
-      Operands[1] = CI->getArgOperand(1);
-      break;
-    }
-
-    ConstructNewCallInst(NewFn, CI, Operands, 2);
-    break;
-  }
-  case Intrinsic::x86_mmx_maskmovq: {
-    Value *Operands[3];
-
-    // Cast the operands to the X86 MMX type.
-    Operands[0] = new BitCastInst(CI->getArgOperand(0), 
-                                  NewFn->getFunctionType()->getParamType(0),
-                                  "upgraded.", CI);
-    Operands[1] = new BitCastInst(CI->getArgOperand(1), 
-                                  NewFn->getFunctionType()->getParamType(1),
-                                  "upgraded.", CI);
-    Operands[2] = CI->getArgOperand(2);
-
-    ConstructNewCallInst(NewFn, CI, Operands, 3, false);
-    break;
-  }
-  case Intrinsic::x86_mmx_pmovmskb: {
-    Value *Operands[1];
-
-    // Cast the operand to the X86 MMX type.
-    Operands[0] = new BitCastInst(CI->getArgOperand(0), 
-                                  NewFn->getFunctionType()->getParamType(0),
-                                  "upgraded.", CI);
-
-    ConstructNewCallInst(NewFn, CI, Operands, 1);
-    break;
-  }
-  case Intrinsic::x86_mmx_movnt_dq: {
-    Value *Operands[2];
-
-    Operands[0] = CI->getArgOperand(0);
-
-    // Cast the operand to the X86 MMX type.
-    Operands[1] = new BitCastInst(CI->getArgOperand(1),
-                                  NewFn->getFunctionType()->getParamType(1),
-                                  "upgraded.", CI);
-
-    ConstructNewCallInst(NewFn, CI, Operands, 2, false);
-    break;
-  }
-  case Intrinsic::x86_mmx_palignr_b: {
-    Value *Operands[3];
-
-    // Cast the operands to the X86 MMX type.
-    Operands[0] = new BitCastInst(CI->getArgOperand(0),
-                                  NewFn->getFunctionType()->getParamType(0),
-                                  "upgraded.", CI);
-    Operands[1] = new BitCastInst(CI->getArgOperand(1),
-                                  NewFn->getFunctionType()->getParamType(1),
-                                  "upgraded.", CI);
-    Operands[2] = CI->getArgOperand(2);
-
-    ConstructNewCallInst(NewFn, CI, Operands, 3);
-    break;
-  }
-  case Intrinsic::x86_mmx_pextr_w: {
-    Value *Operands[2];
-
-    // Cast the operands to the X86 MMX type.
-    Operands[0] = new BitCastInst(CI->getArgOperand(0),
-                                  NewFn->getFunctionType()->getParamType(0),
-                                  "upgraded.", CI);
-    Operands[1] = CI->getArgOperand(1);
-
-    ConstructNewCallInst(NewFn, CI, Operands, 2);
-    break;
-  }
-  case Intrinsic::x86_mmx_pinsr_w: {
-    Value *Operands[3];
-
-    // Cast the operands to the X86 MMX type.
-    Operands[0] = new BitCastInst(CI->getArgOperand(0),
-                                  NewFn->getFunctionType()->getParamType(0),
-                                  "upgraded.", CI);
-    Operands[1] = CI->getArgOperand(1);
-    Operands[2] = CI->getArgOperand(2);
-
-    ConstructNewCallInst(NewFn, CI, Operands, 3);
-    break;
-  }
-  case Intrinsic::x86_sse_pshuf_w: {
-    IRBuilder<> Builder(C);
-    Builder.SetInsertPoint(CI->getParent(), CI);
-
-    // Cast the operand to the X86 MMX type.
-    Value *Operands[2];
-    Operands[0] =
-      Builder.CreateBitCast(CI->getArgOperand(0), 
-                            NewFn->getFunctionType()->getParamType(0),
-                            "upgraded.");
-    Operands[1] =
-      Builder.CreateTrunc(CI->getArgOperand(1),
-                          Type::getInt8Ty(C),
-                          "upgraded.");
-
-    ConstructNewCallInst(NewFn, CI, Operands, 2);
-    break;
-  }
-
-  case Intrinsic::ctlz:
-  case Intrinsic::ctpop:
-  case Intrinsic::cttz: {
-    //  Build a small vector of the original arguments.
-    SmallVector<Value*, 8> Operands(CS.arg_begin(), CS.arg_end());
-
-    //  Construct a new CallInst
-    CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(),
-                                       "upgraded."+CI->getName(), CI);
-    NewCI->setTailCall(CI->isTailCall());
-    NewCI->setCallingConv(CI->getCallingConv());
-
-    //  Handle any uses of the old CallInst.
-    if (!CI->use_empty()) {
-      //  Check for sign extend parameter attributes on the return values.
-      bool SrcSExt = NewFn->getAttributes().paramHasAttr(0, Attribute::SExt);
-      bool DestSExt = F->getAttributes().paramHasAttr(0, Attribute::SExt);
-      
-      //  Construct an appropriate cast from the new return type to the old.
-      CastInst *RetCast = CastInst::Create(
-                            CastInst::getCastOpcode(NewCI, SrcSExt,
-                                                    F->getReturnType(),
-                                                    DestSExt),
-                            NewCI, F->getReturnType(),
-                            NewCI->getName(), CI);
-      NewCI->moveBefore(RetCast);
-
-      //  Replace all uses of the old call with the new cast which has the 
-      //  correct type.
-      CI->replaceAllUsesWith(RetCast);
-    }
-
-    //  Clean up the old call now that it has been completely upgraded.
-    CI->eraseFromParent();
-  }
-  break;
-  case Intrinsic::eh_selector:
-  case Intrinsic::eh_typeid_for: {
-    // Only the return type changed.
-    SmallVector<Value*, 8> Operands(CS.arg_begin(), CS.arg_end());
-    CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(),
-                                       "upgraded." + CI->getName(), CI);
-    NewCI->setTailCall(CI->isTailCall());
-    NewCI->setCallingConv(CI->getCallingConv());
-
-    //  Handle any uses of the old CallInst.
-    if (!CI->use_empty()) {
-      //  Construct an appropriate cast from the new return type to the old.
-      CastInst *RetCast =
-        CastInst::Create(CastInst::getCastOpcode(NewCI, true,
-                                                 F->getReturnType(), true),
-                         NewCI, F->getReturnType(), NewCI->getName(), CI);
-      CI->replaceAllUsesWith(RetCast);
-    }
-    CI->eraseFromParent();
-  }
-  break;
-  case Intrinsic::memcpy:
-  case Intrinsic::memmove:
-  case Intrinsic::memset: {
-    // Add isVolatile
-    const llvm::Type *I1Ty = llvm::Type::getInt1Ty(CI->getContext());
-    Value *Operands[5] = { CI->getArgOperand(0), CI->getArgOperand(1),
-                           CI->getArgOperand(2), CI->getArgOperand(3),
-                           llvm::ConstantInt::get(I1Ty, 0) };
-    CallInst *NewCI = CallInst::Create(NewFn, Operands, Operands+5,
-                                       CI->getName(), CI);
-    NewCI->setTailCall(CI->isTailCall());
-    NewCI->setCallingConv(CI->getCallingConv());
-    //  Handle any uses of the old CallInst.
-    if (!CI->use_empty())
-      //  Replace all uses of the old call with the new cast which has the 
-      //  correct type.
-      CI->replaceAllUsesWith(NewCI);
-    
-    //  Clean up the old call now that it has been completely upgraded.
-    CI->eraseFromParent();
-    break;
-  }
   case Intrinsic::prefetch: {
     IRBuilder<> Builder(C);
     Builder.SetInsertPoint(CI->getParent(), CI);
@@ -1401,13 +222,13 @@ void llvm::UpgradeCallsToIntrinsic(Function* F) {
   assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
 
   // Upgrade the function and check if it is a totaly new function.
-  Function* NewFn;
+  Function *NewFn;
   if (UpgradeIntrinsicFunction(F, NewFn)) {
     if (NewFn != F) {
       // Replace all uses to the old function with the new one if necessary.
       for (Value::use_iterator UI = F->use_begin(), UE = F->use_end();
            UI != UE; ) {
-        if (CallInst* CI = dyn_cast<CallInst>(*UI++))
+        if (CallInst *CI = dyn_cast<CallInst>(*UI++))
           UpgradeIntrinsicCall(CI, NewFn);
       }
       // Remove old function, no longer used, from the module.
@@ -1420,37 +241,27 @@ void llvm::UpgradeCallsToIntrinsic(Function* F) {
 /// If an llvm.dbg.declare intrinsic is invalid, then this function simply
 /// strips that use.
 void llvm::CheckDebugInfoIntrinsics(Module *M) {
-
-
   if (Function *FuncStart = M->getFunction("llvm.dbg.func.start")) {
-    while (!FuncStart->use_empty()) {
-      CallInst *CI = cast<CallInst>(FuncStart->use_back());
-      CI->eraseFromParent();
-    }
+    while (!FuncStart->use_empty())
+      cast<CallInst>(FuncStart->use_back())->eraseFromParent();
     FuncStart->eraseFromParent();
   }
   
   if (Function *StopPoint = M->getFunction("llvm.dbg.stoppoint")) {
-    while (!StopPoint->use_empty()) {
-      CallInst *CI = cast<CallInst>(StopPoint->use_back());
-      CI->eraseFromParent();
-    }
+    while (!StopPoint->use_empty())
+      cast<CallInst>(StopPoint->use_back())->eraseFromParent();
     StopPoint->eraseFromParent();
   }
 
   if (Function *RegionStart = M->getFunction("llvm.dbg.region.start")) {
-    while (!RegionStart->use_empty()) {
-      CallInst *CI = cast<CallInst>(RegionStart->use_back());
-      CI->eraseFromParent();
-    }
+    while (!RegionStart->use_empty())
+      cast<CallInst>(RegionStart->use_back())->eraseFromParent();
     RegionStart->eraseFromParent();
   }
 
   if (Function *RegionEnd = M->getFunction("llvm.dbg.region.end")) {
-    while (!RegionEnd->use_empty()) {
-      CallInst *CI = cast<CallInst>(RegionEnd->use_back());
-      CI->eraseFromParent();
-    }
+    while (!RegionEnd->use_empty())
+      cast<CallInst>(RegionEnd->use_back())->eraseFromParent();
     RegionEnd->eraseFromParent();
   }
   
diff --git a/lib/VMCore/BasicBlock.cpp b/lib/VMCore/BasicBlock.cpp
index 955a028..7d47044 100644
--- a/lib/VMCore/BasicBlock.cpp
+++ b/lib/VMCore/BasicBlock.cpp
@@ -227,8 +227,8 @@ void BasicBlock::removePredecessor(BasicBlock *Pred,
 
       // If the PHI _HAD_ two uses, replace PHI node with its now *single* value
       if (max_idx == 2) {
-        if (PN->getOperand(0) != PN)
-          PN->replaceAllUsesWith(PN->getOperand(0));
+        if (PN->getIncomingValue(0) != PN)
+          PN->replaceAllUsesWith(PN->getIncomingValue(0));
         else
           // We are left with an infinite loop with no entries: kill the PHI.
           PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
@@ -308,3 +308,19 @@ BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName) {
   return New;
 }
 
+void BasicBlock::replaceSuccessorsPhiUsesWith(BasicBlock *New) {
+  TerminatorInst *TI = getTerminator();
+  if (!TI)
+    // Cope with being called on a BasicBlock that doesn't have a terminator
+    // yet. Clang's CodeGenFunction::EmitReturnBlock() likes to do this.
+    return;
+  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+    BasicBlock *Succ = TI->getSuccessor(i);
+    for (iterator II = Succ->begin(); PHINode *PN = dyn_cast<PHINode>(II);
+         ++II) {
+      int i;
+      while ((i = PN->getBasicBlockIndex(this)) >= 0)
+        PN->setIncomingBlock(i, New);
+    }
+  }
+}
diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp
index 9985ada..579d356 100644
--- a/lib/VMCore/ConstantFold.cpp
+++ b/lib/VMCore/ConstantFold.cpp
@@ -559,7 +559,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
       for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
         res.push_back(ConstantExpr::getCast(opc,
                                             CV->getOperand(i), DstEltTy));
-      return ConstantVector::get(DestVecTy, res);
+      return ConstantVector::get(res);
     }
 
   // We actually have to do a cast now. Perform the cast according to the
@@ -942,7 +942,7 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
     }
     
     if (const StructType* ST = dyn_cast<StructType>(AggTy))
-      return ConstantStruct::get(ST->getContext(), Ops, ST->isPacked());
+      return ConstantStruct::get(ST, Ops);
     return ConstantArray::get(cast<ArrayType>(AggTy), Ops);
   }
   
@@ -973,7 +973,7 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
     }
     
     if (const StructType *ST = dyn_cast<StructType>(AggTy))
-      return ConstantStruct::get(ST->getContext(), Ops, ST->isPacked());
+      return ConstantStruct::get(ST, Ops);
     return ConstantArray::get(cast<ArrayType>(AggTy), Ops);
   }
   
@@ -988,7 +988,7 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
     }
     
     if (const StructType* ST = dyn_cast<StructType>(Agg->getType()))
-      return ConstantStruct::get(ST->getContext(), Ops, ST->isPacked());
+      return ConstantStruct::get(ST, Ops);
     return ConstantArray::get(cast<ArrayType>(Agg->getType()), Ops);
   }
 
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index 15d7793..87f2fe6 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -571,8 +571,7 @@ ConstantArray::ConstantArray(const ArrayType *T,
   }
 }
 
-Constant *ConstantArray::get(const ArrayType *Ty, 
-                             const std::vector<Constant*> &V) {
+Constant *ConstantArray::get(const ArrayType *Ty, ArrayRef<Constant*> V) {
   for (unsigned i = 0, e = V.size(); i != e; ++i) {
     assert(V[i]->getType() == Ty->getElementType() &&
            "Wrong type in array element initializer");
@@ -592,13 +591,6 @@ Constant *ConstantArray::get(const ArrayType *Ty,
   return ConstantAggregateZero::get(Ty);
 }
 
-
-Constant *ConstantArray::get(const ArrayType* T, Constant *const* Vals,
-                             unsigned NumVals) {
-  // FIXME: make this the primary ctor method.
-  return get(T, std::vector<Constant*>(Vals, Vals+NumVals));
-}
-
 /// ConstantArray::get(const string&) - Return an array that is initialized to
 /// contain the specified string.  If length is zero then a null terminator is 
 /// added to the specified string so that it may be used in a natural way. 
@@ -621,6 +613,27 @@ Constant *ConstantArray::get(LLVMContext &Context, StringRef Str,
   return get(ATy, ElementVals);
 }
 
+/// getTypeForElements - Return an anonymous struct type to use for a constant
+/// with the specified set of elements.  The list must not be empty.
+StructType *ConstantStruct::getTypeForElements(LLVMContext &Context,
+                                               ArrayRef<Constant*> V,
+                                               bool Packed) {
+  SmallVector<const Type*, 16> EltTypes;
+  for (unsigned i = 0, e = V.size(); i != e; ++i)
+    EltTypes.push_back(V[i]->getType());
+  
+  return StructType::get(Context, EltTypes, Packed);
+}
+
+
+StructType *ConstantStruct::getTypeForElements(ArrayRef<Constant*> V,
+                                               bool Packed) {
+  assert(!V.empty() &&
+         "ConstantStruct::getTypeForElements cannot be called on empty list");
+  return getTypeForElements(V[0]->getContext(), V, Packed);
+}
+
+
 ConstantStruct::ConstantStruct(const StructType *T,
                                const std::vector<Constant*> &V)
   : Constant(T, ConstantStructVal,
@@ -639,45 +652,26 @@ ConstantStruct::ConstantStruct(const StructType *T,
 }
 
 // ConstantStruct accessors.
-Constant *ConstantStruct::get(const StructType* T,
-                              const std::vector<Constant*>& V) {
-  LLVMContextImpl* pImpl = T->getContext().pImpl;
+Constant *ConstantStruct::get(const StructType *ST, ArrayRef<Constant*> V) {
+  assert(ST->getNumElements() == V.size() &&
+         "Incorrect # elements specified to ConstantStruct::get");
   
-  // Create a ConstantAggregateZero value if all elements are zeros...
+  // Create a ConstantAggregateZero value if all elements are zeros.
   for (unsigned i = 0, e = V.size(); i != e; ++i)
     if (!V[i]->isNullValue())
-      return pImpl->StructConstants.getOrCreate(T, V);
+      return ST->getContext().pImpl->StructConstants.getOrCreate(ST, V);
 
-  return ConstantAggregateZero::get(T);
+  return ConstantAggregateZero::get(ST);
 }
 
-Constant *ConstantStruct::get(LLVMContext &Context,
-                              const std::vector<Constant*>& V, bool packed) {
-  std::vector<const Type*> StructEls;
-  StructEls.reserve(V.size());
-  for (unsigned i = 0, e = V.size(); i != e; ++i)
-    StructEls.push_back(V[i]->getType());
-  return get(StructType::get(Context, StructEls, packed), V);
-}
-
-Constant *ConstantStruct::get(LLVMContext &Context,
-                              Constant *const *Vals, unsigned NumVals,
-                              bool Packed) {
-  // FIXME: make this the primary ctor method.
-  return get(Context, std::vector<Constant*>(Vals, Vals+NumVals), Packed);
-}
-
-Constant* ConstantStruct::get(LLVMContext &Context, bool Packed,
-                              Constant * Val, ...) {
+Constant* ConstantStruct::get(const StructType *T, ...) {
   va_list ap;
-  std::vector<Constant*> Values;
-  va_start(ap, Val);
-  while (Val) {
+  SmallVector<Constant*, 8> Values;
+  va_start(ap, T);
+  while (Constant *Val = va_arg(ap, llvm::Constant*))
     Values.push_back(Val);
-    Val = va_arg(ap, llvm::Constant*);
-  }
   va_end(ap);
-  return get(Context, Values, Packed);
+  return get(T, Values);
 }
 
 ConstantVector::ConstantVector(const VectorType *T,
@@ -696,9 +690,9 @@ ConstantVector::ConstantVector(const VectorType *T,
 }
 
 // ConstantVector accessors.
-Constant *ConstantVector::get(const VectorType *T,
-                              const std::vector<Constant*> &V) {
+Constant *ConstantVector::get(ArrayRef<Constant*> V) {
   assert(!V.empty() && "Vectors can't be empty");
+  const VectorType *T = VectorType::get(V.front()->getType(), V.size());
   LLVMContextImpl *pImpl = T->getContext().pImpl;
 
   // If this is an all-undef or all-zero vector, return a
@@ -723,12 +717,6 @@ Constant *ConstantVector::get(const VectorType *T,
   return pImpl->VectorConstants.getOrCreate(T, V);
 }
 
-Constant *ConstantVector::get(ArrayRef<Constant*> V) {
-  // FIXME: make this the primary ctor method.
-  assert(!V.empty() && "Vectors cannot be empty");
-  return get(VectorType::get(V.front()->getType(), V.size()), V.vec());
-}
-
 // Utility function for determining if a ConstantExpr is a CastOp or not. This
 // can't be inline because we don't want to #include Instruction.h into
 // Constant.h
@@ -1537,8 +1525,8 @@ Constant *ConstantExpr::getSizeOf(const Type* Ty) {
 Constant *ConstantExpr::getAlignOf(const Type* Ty) {
   // alignof is implemented as: (i64) gep ({i1,Ty}*)null, 0, 1
   // Note that a non-inbounds gep is used, as null isn't within any object.
-  const Type *AligningTy = StructType::get(Ty->getContext(),
-                                   Type::getInt1Ty(Ty->getContext()), Ty, NULL);
+  const Type *AligningTy = 
+    StructType::get(Type::getInt1Ty(Ty->getContext()), Ty, NULL);
   Constant *NullPtr = Constant::getNullValue(AligningTy->getPointerTo());
   Constant *Zero = ConstantInt::get(Type::getInt64Ty(Ty->getContext()), 0);
   Constant *One = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
@@ -2116,7 +2104,7 @@ void ConstantVector::replaceUsesOfWithOnConstant(Value *From, Value *To,
     Values.push_back(Val);
   }
   
-  Constant *Replacement = get(cast<VectorType>(getRawType()), Values);
+  Constant *Replacement = get(Values);
   assert(Replacement != this && "I didn't contain From!");
   
   // Everyone using this now uses the replacement.
diff --git a/lib/VMCore/ConstantsContext.h b/lib/VMCore/ConstantsContext.h
index 1395754..ea6ebe9 100644
--- a/lib/VMCore/ConstantsContext.h
+++ b/lib/VMCore/ConstantsContext.h
@@ -568,7 +568,7 @@ struct ConstantKeyData<InlineAsm> {
   }
 };
 
-template<class ValType, class TypeClass, class ConstantClass,
+template<class ValType, class ValRefType, class TypeClass, class ConstantClass,
          bool HasLargeKey = false /*true for arrays and structs*/ >
 class ConstantUniqueMap : public AbstractTypeUser {
 public:
@@ -656,7 +656,7 @@ private:
     }
   }
 
-  ConstantClass* Create(const TypeClass *Ty, const ValType &V,
+  ConstantClass* Create(const TypeClass *Ty, ValRefType V,
                         typename MapTy::iterator I) {
     ConstantClass* Result =
       ConstantCreator<ConstantClass,TypeClass,ValType>::create(Ty, V);
@@ -675,7 +675,7 @@ public:
     
   /// getOrCreate - Return the specified constant from the map, creating it if
   /// necessary.
-  ConstantClass *getOrCreate(const TypeClass *Ty, const ValType &V) {
+  ConstantClass *getOrCreate(const TypeClass *Ty, ValRefType V) {
     MapKey Lookup(Ty, V);
     ConstantClass* Result = 0;
     
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
index 92f9440..bdd988e 100644
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@@ -612,9 +612,10 @@ LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str,
 LLVMValueRef LLVMConstStructInContext(LLVMContextRef C, 
                                       LLVMValueRef *ConstantVals,
                                       unsigned Count, LLVMBool Packed) {
-  return wrap(ConstantStruct::get(*unwrap(C),
-                                  unwrap<Constant>(ConstantVals, Count),
-                                  Count, Packed != 0));
+  Constant **Elements = unwrap<Constant>(ConstantVals, Count);
+  return wrap(ConstantStruct::getAnon(*unwrap(C),
+                                      ArrayRef<Constant*>(Elements, Count),
+                                      Packed != 0));
 }
 
 LLVMValueRef LLVMConstString(const char *Str, unsigned Length,
@@ -624,9 +625,8 @@ LLVMValueRef LLVMConstString(const char *Str, unsigned Length,
 }
 LLVMValueRef LLVMConstArray(LLVMTypeRef ElementTy,
                             LLVMValueRef *ConstantVals, unsigned Length) {
-  return wrap(ConstantArray::get(ArrayType::get(unwrap(ElementTy), Length),
-                                 unwrap<Constant>(ConstantVals, Length),
-                                 Length));
+  ArrayRef<Constant*> V(unwrap<Constant>(ConstantVals, Length), Length);
+  return wrap(ConstantArray::get(ArrayType::get(unwrap(ElementTy), Length), V));
 }
 LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count,
                              LLVMBool Packed) {
diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp
index 0ae0bdb..b8fa60a 100644
--- a/lib/VMCore/Function.cpp
+++ b/lib/VMCore/Function.cpp
@@ -417,7 +417,7 @@ bool Function::hasAddressTaken(const User* *PutOffender) const {
 /// setjmp or other function that gcc recognizes as "returning twice".
 ///
 /// FIXME: Remove after <rdar://problem/8031714> is fixed.
-/// FIXME: Is the obove FIXME valid?
+/// FIXME: Is the above FIXME valid?
 bool Function::callsFunctionThatReturnsTwice() const {
   const Module *M = this->getParent();
   static const char *ReturnsTwiceFns[] = {
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index 8f4eabe..0eddd5a 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -87,11 +87,8 @@ PHINode::PHINode(const PHINode &PN)
   : Instruction(PN.getType(), Instruction::PHI,
                 allocHungoffUses(PN.getNumOperands()), PN.getNumOperands()),
     ReservedSpace(PN.getNumOperands()) {
-  Use *OL = OperandList;
-  for (unsigned i = 0, e = PN.getNumOperands(); i != e; i+=2) {
-    OL[i] = PN.getOperand(i);
-    OL[i+1] = PN.getOperand(i+1);
-  }
+  std::copy(PN.op_begin(), PN.op_end(), op_begin());
+  std::copy(PN.block_begin(), PN.block_end(), block_begin());
   SubclassOptionalData = PN.SubclassOptionalData;
 }
 
@@ -99,31 +96,37 @@ PHINode::~PHINode() {
   dropHungoffUses();
 }
 
+Use *PHINode::allocHungoffUses(unsigned N) const {
+  // Allocate the array of Uses of the incoming values, followed by a pointer
+  // (with bottom bit set) to the User, followed by the array of pointers to
+  // the incoming basic blocks.
+  size_t size = N * sizeof(Use) + sizeof(Use::UserRef)
+    + N * sizeof(BasicBlock*);
+  Use *Begin = static_cast<Use*>(::operator new(size));
+  Use *End = Begin + N;
+  (void) new(End) Use::UserRef(const_cast<PHINode*>(this), 1);
+  return Use::initTags(Begin, End);
+}
+
 // removeIncomingValue - Remove an incoming value.  This is useful if a
 // predecessor basic block is deleted.
 Value *PHINode::removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty) {
-  unsigned NumOps = getNumOperands();
-  Use *OL = OperandList;
-  assert(Idx*2 < NumOps && "BB not in PHI node!");
-  Value *Removed = OL[Idx*2];
+  Value *Removed = getIncomingValue(Idx);
 
   // Move everything after this operand down.
   //
   // FIXME: we could just swap with the end of the list, then erase.  However,
-  // client might not expect this to happen.  The code as it is thrashes the
+  // clients might not expect this to happen.  The code as it is thrashes the
   // use/def lists, which is kinda lame.
-  for (unsigned i = (Idx+1)*2; i != NumOps; i += 2) {
-    OL[i-2] = OL[i];
-    OL[i-2+1] = OL[i+1];
-  }
+  std::copy(op_begin() + Idx + 1, op_end(), op_begin() + Idx);
+  std::copy(block_begin() + Idx + 1, block_end(), block_begin() + Idx);
 
   // Nuke the last value.
-  OL[NumOps-2].set(0);
-  OL[NumOps-2+1].set(0);
-  NumOperands = NumOps-2;
+  Op<-1>().set(0);
+  --NumOperands;
 
   // If the PHI node is dead, because it has zero entries, nuke it now.
-  if (NumOps == 2 && DeletePHIIfEmpty) {
+  if (getNumOperands() == 0 && DeletePHIIfEmpty) {
     // If anyone is using this PHI, make them use a dummy value instead...
     replaceAllUsesWith(UndefValue::get(getType()));
     eraseFromParent();
@@ -137,15 +140,18 @@ Value *PHINode::removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty) {
 ///
 void PHINode::growOperands() {
   unsigned e = getNumOperands();
-  // Multiply by 1.5 and round down so the result is still even.
-  unsigned NumOps = e + e / 4 * 2;
-  if (NumOps < 4) NumOps = 4;      // 4 op PHI nodes are VERY common.
+  unsigned NumOps = e + e / 2;
+  if (NumOps < 2) NumOps = 2;      // 2 op PHI nodes are VERY common.
+
+  Use *OldOps = op_begin();
+  BasicBlock **OldBlocks = block_begin();
 
   ReservedSpace = NumOps;
-  Use *OldOps = OperandList;
-  Use *NewOps = allocHungoffUses(NumOps);
-  std::copy(OldOps, OldOps + e, NewOps);
-  OperandList = NewOps;
+  OperandList = allocHungoffUses(ReservedSpace);
+
+  std::copy(OldOps, OldOps + e, op_begin());
+  std::copy(OldBlocks, OldBlocks + e, block_begin());
+
   Use::zap(OldOps, OldOps + e, true);
 }
 
diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h
index 6ea4b48e..d8808b0 100644
--- a/lib/VMCore/LLVMContextImpl.h
+++ b/lib/VMCore/LLVMContextImpl.h
@@ -22,10 +22,10 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Metadata.h"
-#include "llvm/Assembly/Writer.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -139,27 +139,30 @@ public:
   // on Context destruction.
   SmallPtrSet<MDNode*, 1> NonUniquedMDNodes;
   
-  ConstantUniqueMap<char, Type, ConstantAggregateZero> AggZeroConstants;
+  ConstantUniqueMap<char, char, Type, ConstantAggregateZero> AggZeroConstants;
 
-  typedef ConstantUniqueMap<std::vector<Constant*>, ArrayType,
-    ConstantArray, true /*largekey*/> ArrayConstantsTy;
+  typedef ConstantUniqueMap<std::vector<Constant*>, ArrayRef<Constant*>,
+    ArrayType, ConstantArray, true /*largekey*/> ArrayConstantsTy;
   ArrayConstantsTy ArrayConstants;
   
-  typedef ConstantUniqueMap<std::vector<Constant*>, StructType,
-    ConstantStruct, true /*largekey*/> StructConstantsTy;
+  typedef ConstantUniqueMap<std::vector<Constant*>, ArrayRef<Constant*>,
+    StructType, ConstantStruct, true /*largekey*/> StructConstantsTy;
   StructConstantsTy StructConstants;
   
-  typedef ConstantUniqueMap<std::vector<Constant*>, VectorType,
-                            ConstantVector> VectorConstantsTy;
+  typedef ConstantUniqueMap<std::vector<Constant*>, ArrayRef<Constant*>,
+                            VectorType, ConstantVector> VectorConstantsTy;
   VectorConstantsTy VectorConstants;
   
-  ConstantUniqueMap<char, PointerType, ConstantPointerNull> NullPtrConstants;
-  ConstantUniqueMap<char, Type, UndefValue> UndefValueConstants;
+  ConstantUniqueMap<char, char, PointerType, ConstantPointerNull>
+    NullPtrConstants;
+  ConstantUniqueMap<char, char, Type, UndefValue> UndefValueConstants;
   
   DenseMap<std::pair<Function*, BasicBlock*> , BlockAddress*> BlockAddresses;
-  ConstantUniqueMap<ExprMapKeyType, Type, ConstantExpr> ExprConstants;
+  ConstantUniqueMap<ExprMapKeyType, const ExprMapKeyType&, Type, ConstantExpr>
+    ExprConstants;
 
-  ConstantUniqueMap<InlineAsmKeyType, PointerType, InlineAsm> InlineAsms;
+  ConstantUniqueMap<InlineAsmKeyType, const InlineAsmKeyType&, PointerType,
+                    InlineAsm> InlineAsms;
   
   ConstantInt *TheTrueVal;
   ConstantInt *TheFalseVal;
@@ -182,13 +185,6 @@ public:
   const IntegerType Int32Ty;
   const IntegerType Int64Ty;
 
-  // Concrete/Abstract TypeDescriptions - We lazily calculate type descriptions
-  // for types as they are needed.  Because resolution of types must invalidate
-  // all of the abstract type descriptions, we keep them in a separate map to
-  // make this easy.
-  TypePrinting ConcreteTypeDescriptions;
-  TypePrinting AbstractTypeDescriptions;
-  
   TypeMap<ArrayValType, ArrayType> ArrayTypes;
   TypeMap<VectorValType, VectorType> VectorTypes;
   TypeMap<PointerValType, PointerType> PointerTypes;
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index 566bb28..9299070 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -286,44 +286,41 @@ void Type::typeBecameConcrete(const DerivedType *AbsTy) {
   llvm_unreachable("DerivedType is already a concrete type!");
 }
 
-
-std::string Type::getDescription() const {
-  LLVMContextImpl *pImpl = getContext().pImpl;
-  TypePrinting &Map =
-    isAbstract() ?
-      pImpl->AbstractTypeDescriptions :
-      pImpl->ConcreteTypeDescriptions;
-  
-  std::string DescStr;
-  raw_string_ostream DescOS(DescStr);
-  Map.print(this, DescOS);
-  return DescOS.str();
-}
-
-
-bool StructType::indexValid(const Value *V) const {
-  // Structure indexes require 32-bit integer constants.
-  if (V->getType()->isIntegerTy(32))
-    if (const ConstantInt *CU = dyn_cast<ConstantInt>(V))
-      return indexValid(CU->getZExtValue());
-  return false;
-}
-
-bool StructType::indexValid(unsigned V) const {
-  return V < NumContainedTys;
+const Type *CompositeType::getTypeAtIndex(const Value *V) const {
+  if (const StructType *STy = dyn_cast<StructType>(this)) {
+    unsigned Idx = (unsigned)cast<ConstantInt>(V)->getZExtValue();
+    assert(indexValid(Idx) && "Invalid structure index!");
+    return STy->getElementType(Idx);
+  }
+    
+  return cast<SequentialType>(this)->getElementType();
 }
-
-// getTypeAtIndex - Given an index value into the type, return the type of the
-// element.  For a structure type, this must be a constant value...
-//
-const Type *StructType::getTypeAtIndex(const Value *V) const {
-  unsigned Idx = (unsigned)cast<ConstantInt>(V)->getZExtValue();
-  return getTypeAtIndex(Idx);
+const Type *CompositeType::getTypeAtIndex(unsigned Idx) const {
+  if (const StructType *STy = dyn_cast<StructType>(this)) {
+    assert(indexValid(Idx) && "Invalid structure index!");
+    return STy->getElementType(Idx);
+  }
+  
+  return cast<SequentialType>(this)->getElementType();
+}
+bool CompositeType::indexValid(const Value *V) const {
+  if (const StructType *STy = dyn_cast<StructType>(this)) {
+    // Structure indexes require 32-bit integer constants.
+    if (V->getType()->isIntegerTy(32))
+      if (const ConstantInt *CU = dyn_cast<ConstantInt>(V))
+        return CU->getZExtValue() < STy->getNumElements();
+    return false;
+  }
+  
+  // Sequential types can be indexed by any integer.
+  return V->getType()->isIntegerTy();
 }
 
-const Type *StructType::getTypeAtIndex(unsigned Idx) const {
-  assert(indexValid(Idx) && "Invalid structure index!");
-  return ContainedTys[Idx];
+bool CompositeType::indexValid(unsigned Idx) const {
+  if (const StructType *STy = dyn_cast<StructType>(this))
+    return Idx < STy->getNumElements();
+  // Sequential types can be indexed by any integer.
+  return true;
 }
 
 
@@ -942,15 +939,17 @@ StructType *StructType::get(LLVMContext &Context,
   return ST;
 }
 
-StructType *StructType::get(LLVMContext &Context, const Type *type, ...) {
+StructType *StructType::get(const Type *type, ...) {
+  assert(type != 0 && "Cannot create a struct type with no elements with this");
+  LLVMContext &Ctx = type->getContext();
   va_list ap;
-  std::vector<const llvm::Type*> StructFields;
+  SmallVector<const llvm::Type*, 8> StructFields;
   va_start(ap, type);
   while (type) {
     StructFields.push_back(type);
     type = va_arg(ap, llvm::Type*);
   }
-  return llvm::StructType::get(Context, StructFields);
+  return llvm::StructType::get(Ctx, StructFields);
 }
 
 bool StructType::isValidElementType(const Type *ElemTy) {
@@ -1067,11 +1066,6 @@ void DerivedType::refineAbstractTypeTo(const Type *NewType) {
   assert(this != NewType && "Can't refine to myself!");
   assert(ForwardType == 0 && "This type has already been refined!");
 
-  LLVMContextImpl *pImpl = getContext().pImpl;
-
-  // The descriptions may be out of date.  Conservatively clear them all!
-  pImpl->AbstractTypeDescriptions.clear();
-
 #ifdef DEBUG_MERGE_TYPES
   DEBUG(dbgs() << "REFINING abstract type [" << (void*)this << " "
                << *this << "] to [" << (void*)NewType << " "
@@ -1221,12 +1215,6 @@ void PointerType::typeBecameConcrete(const DerivedType *AbsTy) {
   pImpl->PointerTypes.TypeBecameConcrete(this, AbsTy);
 }
 
-bool SequentialType::indexValid(const Value *V) const {
-  if (V->getType()->isIntegerTy()) 
-    return true;
-  return false;
-}
-
 namespace llvm {
 raw_ostream &operator<<(raw_ostream &OS, const Type &T) {
   T.print(OS);
diff --git a/lib/VMCore/TypeSymbolTable.cpp b/lib/VMCore/TypeSymbolTable.cpp
index d68a44b..80c6a74 100644
--- a/lib/VMCore/TypeSymbolTable.cpp
+++ b/lib/VMCore/TypeSymbolTable.cpp
@@ -59,7 +59,7 @@ Type* TypeSymbolTable::remove(iterator Entry) {
 
 #if DEBUG_SYMBOL_TABLE
   dump();
-  dbgs() << " Removing Value: " << Result->getDescription() << "\n";
+  dbgs() << " Removing Value: " << *Result << "\n";
 #endif
 
   tmap.erase(Entry);
@@ -69,8 +69,7 @@ Type* TypeSymbolTable::remove(iterator Entry) {
   if (Result->isAbstract()) {
 #if DEBUG_ABSTYPE
     dbgs() << "Removing abstract type from symtab"
-           << Result->getDescription()
-           << "\n";
+           << *Result << "\n";
 #endif
     cast<DerivedType>(Result)->removeAbstractTypeUser(this);
   }
@@ -88,7 +87,7 @@ void TypeSymbolTable::insert(StringRef Name, const Type* T) {
     
 #if DEBUG_SYMBOL_TABLE
     dump();
-    dbgs() << " Inserted type: " << Name << ": " << T->getDescription() << "\n";
+    dbgs() << " Inserted type: " << Name << ": " << *T << "\n";
 #endif
   } else {
     // If there is a name conflict...
@@ -101,7 +100,7 @@ void TypeSymbolTable::insert(StringRef Name, const Type* T) {
 #if DEBUG_SYMBOL_TABLE
     dump();
     dbgs() << " Inserting type: " << UniqueName << ": "
-           << T->getDescription() << "\n";
+           << *T << "\n";
 #endif
 
     // Insert the tmap entry
@@ -112,7 +111,7 @@ void TypeSymbolTable::insert(StringRef Name, const Type* T) {
   if (T->isAbstract()) {
     cast<DerivedType>(T)->addAbstractTypeUser(this);
 #if DEBUG_ABSTYPE
-    dbgs() << "Added abstract type to ST: " << T->getDescription() << "\n";
+    dbgs() << "Added abstract type to ST: " << *T << "\n";
 #endif
   }
 }
@@ -129,7 +128,7 @@ void TypeSymbolTable::refineAbstractType(const DerivedType *OldType,
     // FIXME when Types aren't const.
     if (I->second == const_cast<DerivedType *>(OldType)) {
 #if DEBUG_ABSTYPE
-      dbgs() << "Removing type " << OldType->getDescription() << "\n";
+      dbgs() << "Removing type " << *OldType << "\n";
 #endif
       OldType->removeAbstractTypeUser(this);
 
@@ -137,7 +136,7 @@ void TypeSymbolTable::refineAbstractType(const DerivedType *OldType,
       I->second = const_cast<Type *>(NewType);
       if (NewType->isAbstract()) {
 #if DEBUG_ABSTYPE
-        dbgs() << "Added type " << NewType->getDescription() << "\n";
+        dbgs() << "Added type " << *NewType << "\n";
 #endif
         cast<DerivedType>(NewType)->addAbstractTypeUser(this);
       }
diff --git a/lib/VMCore/Use.cpp b/lib/VMCore/Use.cpp
index 2258b8d..359a151 100644
--- a/lib/VMCore/Use.cpp
+++ b/lib/VMCore/Use.cpp
@@ -135,11 +135,9 @@ void Use::zap(Use *Start, const Use *Stop, bool del) {
 
 User *Use::getUser() const {
   const Use *End = getImpliedUser();
-  const PointerIntPair<User*, 1, unsigned>&
-    ref(static_cast<const AugmentedUse*>(End - 1)->ref);
-  User *She = ref.getPointer();
-  return ref.getInt()
-    ? She
+  const UserRef *ref = reinterpret_cast<const UserRef*>(End);
+  return ref->getInt()
+    ? ref->getPointer()
     : (User*)End;
 }
 
diff --git a/lib/VMCore/User.cpp b/lib/VMCore/User.cpp
index 2f4587d..f01fa34 100644
--- a/lib/VMCore/User.cpp
+++ b/lib/VMCore/User.cpp
@@ -40,14 +40,12 @@ void User::replaceUsesOfWith(Value *From, Value *To) {
 //===----------------------------------------------------------------------===//
 
 Use *User::allocHungoffUses(unsigned N) const {
-  Use *Begin = static_cast<Use*>(::operator new(sizeof(Use) * N
-                                                + sizeof(AugmentedUse)
-                                                - sizeof(Use)));
+  // Allocate the array of Uses, followed by a pointer (with bottom bit set) to
+  // the User.
+  size_t size = N * sizeof(Use) + sizeof(Use::UserRef);
+  Use *Begin = static_cast<Use*>(::operator new(size));
   Use *End = Begin + N;
-  PointerIntPair<User*, 1, unsigned>&
-    ref(static_cast<AugmentedUse&>(End[-1]).ref);
-  ref.setPointer(const_cast<User*>(this));
-  ref.setInt(1);
+  (void) new(End) Use::UserRef(const_cast<User*>(this), 1);
   return Use::initTags(Begin, End);
 }
 
diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp
index 29f6a80..a03cddc 100644
--- a/lib/VMCore/Value.cpp
+++ b/lib/VMCore/Value.cpp
@@ -305,6 +305,9 @@ void Value::uncheckedReplaceAllUsesWith(Value *New) {
 
     U.set(New);
   }
+
+  if (BasicBlock *BB = dyn_cast<BasicBlock>(this))
+    BB->replaceSuccessorsPhiUsesWith(cast<BasicBlock>(New));
 }
 
 void Value::replaceAllUsesWith(Value *New) {
diff --git a/lib/VMCore/ValueSymbolTable.cpp b/lib/VMCore/ValueSymbolTable.cpp
index 254bf06..f1c9703 100644
--- a/lib/VMCore/ValueSymbolTable.cpp
+++ b/lib/VMCore/ValueSymbolTable.cpp
@@ -25,7 +25,7 @@ ValueSymbolTable::~ValueSymbolTable() {
 #ifndef NDEBUG   // Only do this in -g mode...
   for (iterator VI = vmap.begin(), VE = vmap.end(); VI != VE; ++VI)
     dbgs() << "Value still in symbol table! Type = '"
-           << VI->getValue()->getType()->getDescription() << "' Name = '"
+           << *VI->getValue()->getType() << "' Name = '"
            << VI->getKeyData() << "'\n";
   assert(vmap.empty() && "Values remain in symbol table!");
 #endif
diff --git a/lib/VMCore/ValueTypes.cpp b/lib/VMCore/ValueTypes.cpp
index c054ae4..21a1f03 100644
--- a/lib/VMCore/ValueTypes.cpp
+++ b/lib/VMCore/ValueTypes.cpp
@@ -133,6 +133,7 @@ std::string EVT::getEVTString() const {
   case MVT::v2f64:   return "v2f64";
   case MVT::v4f64:   return "v4f64";
   case MVT::Metadata:return "Metadata";
+  case MVT::untyped: return "untyped";
   }
 }
 
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index 139e035..18de671 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -1139,9 +1139,6 @@ void Verifier::visitPHINode(PHINode &PN) {
   for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
     Assert1(PN.getType() == PN.getIncomingValue(i)->getType(),
             "PHI node operands are not the same type as the result!", &PN);
-    Assert1(isa<BasicBlock>(PN.getOperand(
-                PHINode::getOperandNumForIncomingBlock(i))),
-            "PHI node incoming block is not a BasicBlock!", &PN);
   }
 
   // All other PHI node constraints are checked in the visitBasicBlock method.
@@ -1482,8 +1479,10 @@ void Verifier::visitInstruction(Instruction &I) {
         // PHI nodes differ from other nodes because they actually "use" the
         // value in the predecessor basic blocks they correspond to.
         BasicBlock *UseBlock = BB;
-        if (isa<PHINode>(I))
-          UseBlock = dyn_cast<BasicBlock>(I.getOperand(i+1));
+        if (PHINode *PN = dyn_cast<PHINode>(&I)) {
+          unsigned j = PHINode::getIncomingValueNumForOperand(i);
+          UseBlock = PN->getIncomingBlock(j);
+        }
         Assert2(UseBlock, "Invoke operand is PHI node with bad incoming-BB",
                 Op, &I);
 
@@ -1515,10 +1514,11 @@ void Verifier::visitInstruction(Instruction &I) {
                 return;
               }
         }
-      } else if (isa<PHINode>(I)) {
+      } else if (PHINode *PN = dyn_cast<PHINode>(&I)) {
         // PHI nodes are more difficult than other nodes because they actually
         // "use" the value in the predecessor basic blocks they correspond to.
-        BasicBlock *PredBB = dyn_cast<BasicBlock>(I.getOperand(i+1));
+        unsigned j = PHINode::getIncomingValueNumForOperand(i);
+        BasicBlock *PredBB = PN->getIncomingBlock(j);
         Assert2(PredBB && (DT->dominates(OpBlock, PredBB) ||
                            !DT->isReachableFromEntry(PredBB)),
                 "Instruction does not dominate all uses!", Op, &I);
diff --git a/test/Analysis/BasicAA/2004-12-08-BasicAACrash.ll b/test/Analysis/BasicAA/2004-12-08-BasicAACrash.ll
deleted file mode 100644
index 50fb222..0000000
--- a/test/Analysis/BasicAA/2004-12-08-BasicAACrash.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: opt < %s -basicaa -licm
-
-%"java/lang/Object" = type { %struct.llvm_java_object_base }
-%"java/lang/StringBuffer" = type { "java/lang/Object", i32, { "java/lang/Object", i32, [0 x i8] }*, i1 }
-%struct.llvm_java_object_base = type opaque
-
-define void @"java/lang/StringBuffer/setLength(I)V"(%struct.llvm_java_object_base*) {
-bc0:
-	br i1 false, label %bc40, label %bc65
-
-bc65:		; preds = %bc0, %bc40
-	ret void
-
-bc40:		; preds = %bc0, %bc40
-	%tmp75 = bitcast %struct.llvm_java_object_base* %0 to %"java/lang/StringBuffer"*		; <"java/lang/StringBuffer"*> [#uses=1]
-	%tmp76 = getelementptr %"java/lang/StringBuffer"* %tmp75, i32 0, i32 1		; <i32*> [#uses=1]
-	store i32 0, i32* %tmp76
-	%tmp381 = bitcast %struct.llvm_java_object_base* %0 to %"java/lang/StringBuffer"*		; <"java/lang/StringBuffer"*> [#uses=1]
-	%tmp392 = getelementptr %"java/lang/StringBuffer"* %tmp381, i32 0, i32 1		; <i32*> [#uses=1]
-	%tmp403 = load i32* %tmp392		; <i32> [#uses=0]
-	br i1 false, label %bc40, label %bc65
-}
diff --git a/test/Analysis/BasicAA/2004-12-08-BasicAACrash2.ll b/test/Analysis/BasicAA/2004-12-08-BasicAACrash2.ll
deleted file mode 100644
index cc84314..0000000
--- a/test/Analysis/BasicAA/2004-12-08-BasicAACrash2.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: opt < %s -basicaa -dse
-
-%"java/lang/Object" = type { %struct.llvm_java_object_base }
-%"java/lang/StringBuffer" = type { "java/lang/Object", i32, { "java/lang/Object", i32, [0 x i8] }*, i1 }
-%struct.llvm_java_object_base = type opaque
-
-define void @"java/lang/StringBuffer/ensureCapacity_unsynchronized(I)V"() {
-bc0:
-	%tmp = getelementptr %"java/lang/StringBuffer"* null, i32 0, i32 3		; <i1*> [#uses=1]
-	br i1 false, label %bc16, label %bc7
-
-bc16:		; preds = %bc0
-	%tmp91 = getelementptr %"java/lang/StringBuffer"* null, i32 0, i32 2		; <{ "java/lang/Object", i32, [0 x i8] }**> [#uses=1]
-	store { %"java/lang/Object", i32, [0 x i8] }* null, { %"java/lang/Object", i32, [0 x i8] }** %tmp91
-	store i1 false, i1* %tmp
-	ret void
-
-bc7:		; preds = %bc0
-	ret void
-}
diff --git a/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll b/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll
index 062ea59..ebd349a 100644
--- a/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll
+++ b/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll
@@ -15,12 +15,12 @@ define void @test0() {
 ; CHECK: NoModRef:   call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i32 1, i1 false) <->   call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i32 1, i1 false)
 ; CHECK: NoModRef:   call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i32 1, i1 false) <->   call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i32 1, i1 false)
 
-declare void @llvm.memset.i64(i8*, i8, i64, i32)
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
 
 @A = external global i8
 @B = external global i8
 define void @test1() {
-  call void @llvm.memset.i64(i8* @A, i8 0, i64 1, i32 1)
-  call void @llvm.memset.i64(i8* @B, i8 0, i64 1, i32 1)
+  call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i32 1, i1 false)
   ret void
 }
diff --git a/test/Analysis/BasicAA/modref.ll b/test/Analysis/BasicAA/modref.ll
index 7a71e3e..233396b 100644
--- a/test/Analysis/BasicAA/modref.ll
+++ b/test/Analysis/BasicAA/modref.ll
@@ -1,10 +1,6 @@
 ; RUN: opt < %s -basicaa -gvn -dse -S | FileCheck %s
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
-declare void @llvm.memset.i32(i8*, i8, i32, i32)
-declare void @llvm.memset.i8(i8*, i8, i8, i32)
-declare void @llvm.memcpy.i8(i8*, i8*, i8, i32)
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
 declare void @llvm.lifetime.end(i64, i8* nocapture)
 
 declare void @external(i32*) 
@@ -15,7 +11,7 @@ define i32 @test0(i8* %P) {
   
   store i32 0, i32* %A
   
-  call void @llvm.memset.i32(i8* %P, i8 0, i32 42, i32 1)
+  call void @llvm.memset.p0i8.i32(i8* %P, i8 0, i32 42, i32 1, i1 false)
   
   %B = load i32* %A
   ret i32 %B
@@ -24,8 +20,6 @@ define i32 @test0(i8* %P) {
 ; CHECK: ret i32 0
 }
 
-declare void @llvm.memcpy.i8(i8*, i8*, i8, i32)
-
 define i8 @test1() {
 ; CHECK: @test1
   %A = alloca i8
@@ -33,7 +27,7 @@ define i8 @test1() {
 
   store i8 2, i8* %B  ;; Not written to by memcpy
 
-  call void @llvm.memcpy.i8(i8* %A, i8* %B, i8 -1, i32 0)
+  call void @llvm.memcpy.p0i8.p0i8.i8(i8* %A, i8* %B, i8 -1, i32 0, i1 false)
 
   %C = load i8* %B
   ret i8 %C
@@ -44,7 +38,7 @@ define i8 @test2(i8* %P) {
 ; CHECK: @test2
   %P2 = getelementptr i8* %P, i32 127
   store i8 1, i8* %P2  ;; Not dead across memset
-  call void @llvm.memset.i8(i8* %P, i8 2, i8 127, i32 0)
+  call void @llvm.memset.p0i8.i8(i8* %P, i8 2, i8 127, i32 0, i1 false)
   %A = load i8* %P2
   ret i8 %A
 ; CHECK: ret i8 1
@@ -57,7 +51,7 @@ define i8 @test2a(i8* %P) {
   ;; FIXME: DSE isn't zapping this dead store.
   store i8 1, i8* %P2  ;; Dead, clobbered by memset.
   
-  call void @llvm.memset.i8(i8* %P, i8 2, i8 127, i32 0)
+  call void @llvm.memset.p0i8.i8(i8* %P, i8 2, i8 127, i32 0, i1 false)
   %A = load i8* %P2
   ret i8 %A
 ; CHECK-NOT: load
@@ -97,7 +91,7 @@ define void @test3a(i8* %P, i8 %X) {
 
 define i32 @test4(i8* %P) {
   %tmp = load i32* @G1
-  call void @llvm.memset.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8 0, i32 4000, i32 1)
+  call void @llvm.memset.p0i8.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8 0, i32 4000, i32 1, i1 false)
   %tmp2 = load i32* @G1
   %sub = sub i32 %tmp2, %tmp
   ret i32 %sub
@@ -112,7 +106,7 @@ define i32 @test4(i8* %P) {
 ; write to G1.
 define i32 @test5(i8* %P, i32 %Len) {
   %tmp = load i32* @G1
-  call void @llvm.memcpy.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8* bitcast (i32* @G1 to i8*), i32 %Len, i32 1)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8* bitcast (i32* @G1 to i8*), i32 %Len, i32 1, i1 false)
   %tmp2 = load i32* @G1
   %sub = sub i32 %tmp2, %tmp
   ret i32 %sub
@@ -134,3 +128,9 @@ define i8 @test6(i8* %p, i8* noalias %a) {
 ; CHECK-NOT: load
 ; CHECK: ret
 }
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
diff --git a/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll b/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll
index 9573aed..ec95141 100644
--- a/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll
+++ b/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll
@@ -7,7 +7,7 @@
 
 define void @test(i32 %N) {
 entry:
-        "alloca point" = bitcast i32 0 to i32           ; <i32> [#uses=0]
+        %"alloca point" = bitcast i32 0 to i32           ; <i32> [#uses=0]
         br label %bb3
 
 bb:             ; preds = %bb3
diff --git a/test/Analysis/ScalarEvolution/avoid-infinite-recursion-1.ll b/test/Analysis/ScalarEvolution/avoid-infinite-recursion-1.ll
index 31b95e1..f61b667 100644
--- a/test/Analysis/ScalarEvolution/avoid-infinite-recursion-1.ll
+++ b/test/Analysis/ScalarEvolution/avoid-infinite-recursion-1.ll
@@ -7,32 +7,32 @@ target triple = "x86_64-unknown-freebsd8.0"
 module asm ".ident\09\22$FreeBSD: head/sys/kern/vfs_subr.c 195285 2009-07-02 14:19:33Z jamie $\22"
 module asm ".section set_pcpu, \22aw\22, @progbits"
 module asm ".previous"
-	type <{ [40 x i8] }>		; type %0
-	type <{ %struct.vm_object*, %struct.vm_object** }>		; type %1
-	type <{ %struct.vm_object* }>		; type %2
-	type <{ %struct.vm_page*, %struct.vm_page** }>		; type %3
-	type <{ %struct.pv_entry*, %struct.pv_entry** }>		; type %4
-	type <{ %struct.vm_reserv* }>		; type %5
-	type <{ %struct.bufobj*, %struct.bufobj** }>		; type %6
-	type <{ %struct.proc*, %struct.proc** }>		; type %7
-	type <{ %struct.thread*, %struct.thread** }>		; type %8
-	type <{ %struct.prison*, %struct.prison** }>		; type %9
-	type <{ %struct.prison* }>		; type %10
-	type <{ %struct.task* }>		; type %11
-	type <{ %struct.osd*, %struct.osd** }>		; type %12
-	type <{ %struct.proc* }>		; type %13
-	type <{ %struct.ksiginfo*, %struct.ksiginfo** }>		; type %14
-	type <{ %struct.pv_chunk*, %struct.pv_chunk** }>		; type %15
-	type <{ %struct.pgrp*, %struct.pgrp** }>		; type %16
-	type <{ %struct.knote*, %struct.knote** }>		; type %17
-	type <{ %struct.ktr_request*, %struct.ktr_request** }>		; type %18
-	type <{ %struct.mqueue_notifier* }>		; type %19
-	type <{ %struct.turnstile* }>		; type %20
-	type <{ %struct.namecache* }>		; type %21
-	type <{ %struct.namecache*, %struct.namecache** }>		; type %22
-	type <{ %struct.lockf*, %struct.lockf** }>		; type %23
-	type <{ %struct.lockf_entry*, %struct.lockf_entry** }>		; type %24
-	type <{ %struct.lockf_edge*, %struct.lockf_edge** }>		; type %25
+	%0 = type <{ [40 x i8] }>		; type %0
+	%1 = type <{ %struct.vm_object*, %struct.vm_object** }>		; type %1
+	%2 = type <{ %struct.vm_object* }>		; type %2
+	%3 = type <{ %struct.vm_page*, %struct.vm_page** }>		; type %3
+	%4 = type <{ %struct.pv_entry*, %struct.pv_entry** }>		; type %4
+	%5 = type <{ %struct.vm_reserv* }>		; type %5
+	%6 = type <{ %struct.bufobj*, %struct.bufobj** }>		; type %6
+	%7 = type <{ %struct.proc*, %struct.proc** }>		; type %7
+	%8 = type <{ %struct.thread*, %struct.thread** }>		; type %8
+	%9 = type <{ %struct.prison*, %struct.prison** }>		; type %9
+	%10 = type <{ %struct.prison* }>		; type %10
+	%11 = type <{ %struct.task* }>		; type %11
+	%12 = type <{ %struct.osd*, %struct.osd** }>		; type %12
+	%13 = type <{ %struct.proc* }>		; type %13
+	%14 = type <{ %struct.ksiginfo*, %struct.ksiginfo** }>		; type %14
+	%15 = type <{ %struct.pv_chunk*, %struct.pv_chunk** }>		; type %15
+	%16 = type <{ %struct.pgrp*, %struct.pgrp** }>		; type %16
+	%17 = type <{ %struct.knote*, %struct.knote** }>		; type %17
+	%18 = type <{ %struct.ktr_request*, %struct.ktr_request** }>		; type %18
+	%19 = type <{ %struct.mqueue_notifier* }>		; type %19
+	%20 = type <{ %struct.turnstile* }>		; type %20
+	%21 = type <{ %struct.namecache* }>		; type %21
+	%22 = type <{ %struct.namecache*, %struct.namecache** }>		; type %22
+	%23 = type <{ %struct.lockf*, %struct.lockf** }>		; type %23
+	%24 = type <{ %struct.lockf_entry*, %struct.lockf_entry** }>		; type %24
+	%25 = type <{ %struct.lockf_edge*, %struct.lockf_edge** }>		; type %25
 	%struct.__siginfo = type <{ i32, i32, i32, i32, i32, i32, i8*, %union.sigval, %0 }>
 	%struct.__sigset = type <{ [4 x i32] }>
 	%struct.acl = type <{ i32, i32, [4 x i32], [254 x %struct.acl_entry] }>
diff --git a/test/Analysis/ScalarEvolution/avoid-smax-1.ll b/test/Analysis/ScalarEvolution/avoid-smax-1.ll
index 89e8b98..474d564 100644
--- a/test/Analysis/ScalarEvolution/avoid-smax-1.ll
+++ b/test/Analysis/ScalarEvolution/avoid-smax-1.ll
@@ -173,7 +173,7 @@ bb23:		; preds = %bb24, %bb.nph
 	%55 = mul i32 %y.21, %w		; <i32> [#uses=1]
 	%.sum5 = add i32 %55, %.sum3		; <i32> [#uses=1]
 	%56 = getelementptr i8* %j, i32 %.sum5		; <i8*> [#uses=1]
-	tail call void @llvm.memcpy.i32(i8* %56, i8* %54, i32 %w, i32 1)
+	tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %56, i8* %54, i32 %w, i32 1, i1 false)
 	%57 = add i32 %y.21, 1		; <i32> [#uses=2]
 	br label %bb24
 
@@ -190,7 +190,7 @@ bb26:		; preds = %bb24.bb26_crit_edge, %bb22
 	%60 = getelementptr i8* %j, i32 %.sum4		; <i8*> [#uses=1]
 	%61 = mul i32 %x, %w		; <i32> [#uses=1]
 	%62 = sdiv i32 %61, 2		; <i32> [#uses=1]
-	tail call void @llvm.memset.i32(i8* %60, i8 -128, i32 %62, i32 1)
+	tail call void @llvm.memset.p0i8.i32(i8* %60, i8 -128, i32 %62, i32 1, i1 false)
 	ret void
 
 bb29:		; preds = %bb20, %entry
@@ -208,7 +208,7 @@ bb30:		; preds = %bb31, %bb.nph11
 	%67 = getelementptr i8* %r, i32 %66		; <i8*> [#uses=1]
 	%68 = mul i32 %y.310, %w		; <i32> [#uses=1]
 	%69 = getelementptr i8* %j, i32 %68		; <i8*> [#uses=1]
-	tail call void @llvm.memcpy.i32(i8* %69, i8* %67, i32 %w, i32 1)
+	tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %69, i8* %67, i32 %w, i32 1, i1 false)
 	%70 = add i32 %y.310, 1		; <i32> [#uses=2]
 	br label %bb31
 
@@ -224,13 +224,12 @@ bb33:		; preds = %bb31.bb33_crit_edge, %bb29
 	%73 = getelementptr i8* %j, i32 %72		; <i8*> [#uses=1]
 	%74 = mul i32 %x, %w		; <i32> [#uses=1]
 	%75 = sdiv i32 %74, 2		; <i32> [#uses=1]
-	tail call void @llvm.memset.i32(i8* %73, i8 -128, i32 %75, i32 1)
+	tail call void @llvm.memset.p0i8.i32(i8* %73, i8 -128, i32 %75, i32 1, i1 false)
 	ret void
 
 return:		; preds = %bb20
 	ret void
 }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
-
-declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/Analysis/ScalarEvolution/pr3909.ll b/test/Analysis/ScalarEvolution/pr3909.ll
index 10e328d..cf7531d 100644
--- a/test/Analysis/ScalarEvolution/pr3909.ll
+++ b/test/Analysis/ScalarEvolution/pr3909.ll
@@ -2,8 +2,8 @@
 ; PR 3909
 
 
-	type { i32, %1* }		; type %0
-	type { i32, i8* }		; type %1
+	%0 = type { i32, %1* }		; type %0
+	%1 = type { i32, i8* }		; type %1
 
 define x86_stdcallcc i32 @_Dmain(%0 %unnamed) {
 entry:
diff --git a/test/Analysis/ScalarEvolution/trip-count.ll b/test/Analysis/ScalarEvolution/trip-count.ll
index d750d4a..cb4e267 100644
--- a/test/Analysis/ScalarEvolution/trip-count.ll
+++ b/test/Analysis/ScalarEvolution/trip-count.ll
@@ -7,7 +7,6 @@
 
 define void @test(i32 %N) {
 entry:
-        "alloca point" = bitcast i32 0 to i32           ; <i32> [#uses=0]
         br label %bb3
 
 bb:             ; preds = %bb3
diff --git a/test/Analysis/ScalarEvolution/trip-count2.ll b/test/Analysis/ScalarEvolution/trip-count2.ll
index 79f3161..e26cbea 100644
--- a/test/Analysis/ScalarEvolution/trip-count2.ll
+++ b/test/Analysis/ScalarEvolution/trip-count2.ll
@@ -7,7 +7,6 @@
 
 define void @test(i32 %N) {
 entry:
-        "alloca point" = bitcast i32 0 to i32           ; <i32> [#uses=0]
         br label %bb3
 
 bb:             ; preds = %bb3
diff --git a/test/Analysis/ScalarEvolution/trip-count3.ll b/test/Analysis/ScalarEvolution/trip-count3.ll
index 10b798b..1bf86ae 100644
--- a/test/Analysis/ScalarEvolution/trip-count3.ll
+++ b/test/Analysis/ScalarEvolution/trip-count3.ll
@@ -5,15 +5,14 @@
 ; dividing by the stride will have a remainder. This could theoretically
 ; be teaching it how to use a more elaborate trip count computation.
 
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-target triple = "x86_64-unknown-linux-gnu"
-	%struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
-	%struct.SHA_INFO = type { [5 x i32], i32, i32, [16 x i32] }
-	%struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }
-@_2E_str = external constant [26 x i8]		; <[26 x i8]*> [#uses=0]
-@stdin = external global %struct.FILE*		; <%struct.FILE**> [#uses=0]
-@_2E_str1 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
-@_2E_str12 = external constant [30 x i8]		; <[30 x i8]*> [#uses=0]
+%struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
+%struct.SHA_INFO = type { [5 x i32], i32, i32, [16 x i32] }
+%struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }
+
+@_2E_str = external constant [26 x i8]
+@stdin = external global %struct.FILE*
+@_2E_str1 = external constant [3 x i8]
+@_2E_str12 = external constant [30 x i8]
 
 declare void @sha_init(%struct.SHA_INFO* nocapture) nounwind
 
@@ -25,12 +24,8 @@ declare i32 @printf(i8* nocapture, ...) nounwind
 
 declare void @sha_final(%struct.SHA_INFO* nocapture) nounwind
 
-declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
-
 declare void @sha_update(%struct.SHA_INFO* nocapture, i8* nocapture, i32) nounwind
 
-declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
-
 declare i64 @fread(i8* noalias nocapture, i64, i64, %struct.FILE* noalias nocapture) nounwind
 
 declare i32 @main(i32, i8** nocapture) nounwind
@@ -43,36 +38,41 @@ declare void @sha_stream(%struct.SHA_INFO* nocapture, %struct.FILE* nocapture) n
 
 define void @sha_stream_bb3_2E_i(%struct.SHA_INFO* %sha_info, i8* %data1, i32, i8** %buffer_addr.0.i.out, i32* %count_addr.0.i.out) nounwind {
 newFuncRoot:
-	br label %bb3.i
-
-sha_update.exit.exitStub:		; preds = %bb3.i
-	store i8* %buffer_addr.0.i, i8** %buffer_addr.0.i.out
-	store i32 %count_addr.0.i, i32* %count_addr.0.i.out
-	ret void
-
-bb2.i:		; preds = %bb3.i
-	%1 = getelementptr %struct.SHA_INFO* %sha_info, i64 0, i32 3		; <[16 x i32]*> [#uses=1]
-	%2 = bitcast [16 x i32]* %1 to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i64(i8* %2, i8* %buffer_addr.0.i, i64 64, i32 1) nounwind
-	%3 = getelementptr %struct.SHA_INFO* %sha_info, i64 0, i32 3, i64 0		; <i32*> [#uses=1]
-	%4 = bitcast i32* %3 to i8*		; <i8*> [#uses=1]
-	br label %codeRepl
-
-codeRepl:		; preds = %bb2.i
-	call void @sha_stream_bb3_2E_i_bb1_2E_i_2E_i(i8* %4)
-	br label %byte_reverse.exit.i
-
-byte_reverse.exit.i:		; preds = %codeRepl
-	call fastcc void @sha_transform(%struct.SHA_INFO* %sha_info) nounwind
-	%5 = getelementptr i8* %buffer_addr.0.i, i64 64		; <i8*> [#uses=1]
-	%6 = add i32 %count_addr.0.i, -64		; <i32> [#uses=1]
-	br label %bb3.i
-
-bb3.i:		; preds = %byte_reverse.exit.i, %newFuncRoot
-	%buffer_addr.0.i = phi i8* [ %data1, %newFuncRoot ], [ %5, %byte_reverse.exit.i ]		; <i8*> [#uses=3]
-	%count_addr.0.i = phi i32 [ %0, %newFuncRoot ], [ %6, %byte_reverse.exit.i ]		; <i32> [#uses=3]
-	%7 = icmp sgt i32 %count_addr.0.i, 63		; <i1> [#uses=1]
-	br i1 %7, label %bb2.i, label %sha_update.exit.exitStub
+  br label %bb3.i
+
+sha_update.exit.exitStub:                         ; preds = %bb3.i
+  store i8* %buffer_addr.0.i, i8** %buffer_addr.0.i.out
+  store i32 %count_addr.0.i, i32* %count_addr.0.i.out
+  ret void
+
+bb2.i:                                            ; preds = %bb3.i
+  %1 = getelementptr %struct.SHA_INFO* %sha_info, i64 0, i32 3
+  %2 = bitcast [16 x i32]* %1 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %buffer_addr.0.i, i64 64, i32 1, i1 false)
+  %3 = getelementptr %struct.SHA_INFO* %sha_info, i64 0, i32 3, i64 0
+  %4 = bitcast i32* %3 to i8*
+  br label %codeRepl
+
+codeRepl:                                         ; preds = %bb2.i
+  call void @sha_stream_bb3_2E_i_bb1_2E_i_2E_i(i8* %4)
+  br label %byte_reverse.exit.i
+
+byte_reverse.exit.i:                              ; preds = %codeRepl
+  call fastcc void @sha_transform(%struct.SHA_INFO* %sha_info) nounwind
+  %5 = getelementptr i8* %buffer_addr.0.i, i64 64
+  %6 = add i32 %count_addr.0.i, -64
+  br label %bb3.i
+
+bb3.i:                                            ; preds = %byte_reverse.exit.i, %newFuncRoot
+  %buffer_addr.0.i = phi i8* [ %data1, %newFuncRoot ], [ %5, %byte_reverse.exit.i ]
+  %count_addr.0.i = phi i32 [ %0, %newFuncRoot ], [ %6, %byte_reverse.exit.i ]
+  %7 = icmp sgt i32 %count_addr.0.i, 63
+  br i1 %7, label %bb2.i, label %sha_update.exit.exitStub
 }
 
 declare void @sha_stream_bb3_2E_i_bb1_2E_i_2E_i(i8*) nounwind
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
diff --git a/test/Assembler/2002-04-04-PureVirtMethCall.ll b/test/Assembler/2002-04-04-PureVirtMethCall.ll
deleted file mode 100644
index 29aed55..0000000
--- a/test/Assembler/2002-04-04-PureVirtMethCall.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llvm-as %s -o /dev/null
-
-  type { { \2 *, \4 ** },
-         { \2 *, \4 ** }
-       }
-
diff --git a/test/Assembler/2002-04-04-PureVirtMethCall2.ll b/test/Assembler/2002-04-04-PureVirtMethCall2.ll
deleted file mode 100644
index a096899..0000000
--- a/test/Assembler/2002-04-04-PureVirtMethCall2.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: llvm-as %s -o /dev/null
-
-%t = type { { \2*, \2 },
-            { \2*, \2 }
-          }
diff --git a/test/Assembler/2002-07-14-InternalLossage.ll b/test/Assembler/2002-07-14-InternalLossage.ll
deleted file mode 100644
index f93f1c4..0000000
--- a/test/Assembler/2002-07-14-InternalLossage.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; Test to make sure that the 'internal' tag is not lost!
-;
-; RUN: llvm-as < %s | llvm-dis | grep internal
-
-declare void @foo()
-
-define internal void @foo() {
-        ret void
-}
diff --git a/test/Assembler/2002-10-15-NameClash.ll b/test/Assembler/2002-10-15-NameClash.ll
deleted file mode 100644
index 89346cb..0000000
--- a/test/Assembler/2002-10-15-NameClash.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llvm-as %s -o /dev/null
-
-declare i32 @"ArrayRef"([100 x i32] * %Array)
-
-define i32 @"ArrayRef"([100 x i32] * %Array) {
-	ret i32 0
-}
diff --git a/test/Assembler/2008-02-20-MultipleReturnValue.ll b/test/Assembler/2008-02-20-MultipleReturnValue.ll
deleted file mode 100644
index 32c893a..0000000
--- a/test/Assembler/2008-02-20-MultipleReturnValue.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: opt < %s -verify -S | llvm-as -disable-output
-
-define {i32, i8} @foo(i32 %p) {
-  ret i32 1, i8 2
-}
-
-define i8 @f2(i32 %p) {
-   %c = call {i32, i8} @foo(i32 %p)
-   %d = getresult {i32, i8} %c, 1
-   %e = add i8 %d, 1
-   ret i8 %e
-}
-
-define i32 @f3(i32 %p) {
-   %c = invoke {i32, i8} @foo(i32 %p)
-         to label %L unwind label %L2
-   L: 
-   %d = getresult {i32, i8} %c, 0
-   ret i32 %d
-   L2:
-   ret i32 0
-}
diff --git a/test/Assembler/2009-02-28-CastOpc.ll b/test/Assembler/2009-02-28-CastOpc.ll
index ee98d41..6035643 100644
--- a/test/Assembler/2009-02-28-CastOpc.ll
+++ b/test/Assembler/2009-02-28-CastOpc.ll
@@ -1,8 +1,7 @@
 ; RUN: llvm-as < %s | llvm-dis
 
-type i32
 
 define void @foo() {
-  bitcast %0* null to i32*
+  bitcast i32* null to i32*
   ret void
 }
diff --git a/test/Assembler/AutoUpgradeIntrinsics.ll b/test/Assembler/AutoUpgradeIntrinsics.ll
index 20beb49..eb4ac76 100644
--- a/test/Assembler/AutoUpgradeIntrinsics.ll
+++ b/test/Assembler/AutoUpgradeIntrinsics.ll
@@ -1,87 +1,6 @@
 ; Tests to make sure intrinsics are automatically upgraded.
-; RUN: llvm-as < %s | llvm-dis | not grep {i32 @llvm\\.ct}
-; RUN: llvm-as < %s | llvm-dis | \
-; RUN:   not grep {llvm\\.part\\.set\\.i\[0-9\]*\\.i\[0-9\]*\\.i\[0-9\]*}
-; RUN: llvm-as < %s | llvm-dis | \
-; RUN:   not grep {llvm\\.part\\.select\\.i\[0-9\]*\\.i\[0-9\]*}
-; RUN: llvm-as < %s | llvm-dis | \
-; RUN:   not grep {llvm\\.bswap\\.i\[0-9\]*\\.i\[0-9\]*}
-; RUN: llvm-as < %s | llvm-dis | \
-; RUN:   not grep {llvm\\.x86\\.sse2\\.loadu}
-; RUN: llvm-as < %s | llvm-dis | \
-; RUN:   grep {llvm\\.x86\\.mmx\\.ps} | grep {x86_mmx} | count 16
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
 
-declare i32 @llvm.ctpop.i28(i28 %val)
-declare i32 @llvm.cttz.i29(i29 %val)
-declare i32 @llvm.ctlz.i30(i30 %val)
-
-define i32 @test_ct(i32 %A) {
-  %c1 = call i32 @llvm.ctpop.i28(i28 1234)
-  %c2 = call i32 @llvm.cttz.i29(i29 2345)
-  %c3 = call i32 @llvm.ctlz.i30(i30 3456)
-  %r1 = add i32 %c1, %c2
-  %r2 = add i32 %r1, %c3
-  ret i32 %r2
-}
-
-declare i32 @llvm.part.set.i32.i32.i32(i32 %x, i32 %rep, i32 %hi, i32 %lo)
-declare i16 @llvm.part.set.i16.i16.i16(i16 %x, i16 %rep, i32 %hi, i32 %lo)
-define i32 @test_part_set(i32 %A, i16 %B) {
-  %a = call i32 @llvm.part.set.i32.i32.i32(i32 %A, i32 27, i32 8, i32 0)
-  %b = call i16 @llvm.part.set.i16.i16.i16(i16 %B, i16 27, i32 8, i32 0)
-  %c = zext i16 %b to i32
-  %d = add i32 %a, %c
-  ret i32 %d
-}
-
-declare i32 @llvm.part.select.i32.i32(i32 %x, i32 %hi, i32 %lo)
-declare i16 @llvm.part.select.i16.i16(i16 %x, i32 %hi, i32 %lo)
-define i32 @test_part_select(i32 %A, i16 %B) {
-  %a = call i32 @llvm.part.select.i32.i32(i32 %A, i32 8, i32 0)
-  %b = call i16 @llvm.part.select.i16.i16(i16 %B, i32 8, i32 0)
-  %c = zext i16 %b to i32
-  %d = add i32 %a, %c
-  ret i32 %d
-}
-
-declare i32 @llvm.bswap.i32.i32(i32 %x)
-declare i16 @llvm.bswap.i16.i16(i16 %x)
-define i32 @test_bswap(i32 %A, i16 %B) {
-  %a = call i32 @llvm.bswap.i32.i32(i32 %A)
-  %b = call i16 @llvm.bswap.i16.i16(i16 %B)
-  %c = zext i16 %b to i32
-  %d = add i32 %a, %c
-  ret i32 %d
-}
-
-declare <4 x i16> @llvm.x86.mmx.psra.w(<4 x i16>, <2 x i32>) nounwind readnone 
-declare <4 x i16> @llvm.x86.mmx.psll.w(<4 x i16>, <2 x i32>) nounwind readnone 
-declare <4 x i16> @llvm.x86.mmx.psrl.w(<4 x i16>, <2 x i32>) nounwind readnone 
-define void @sh16(<4 x i16> %A, <2 x i32> %B) {
-	%r1 = call <4 x i16> @llvm.x86.mmx.psra.w( <4 x i16> %A, <2 x i32> %B )		; <<4 x i16>> [#uses=0]
-	%r2 = call <4 x i16> @llvm.x86.mmx.psll.w( <4 x i16> %A, <2 x i32> %B )		; <<4 x i16>> [#uses=0]
-	%r3 = call <4 x i16> @llvm.x86.mmx.psrl.w( <4 x i16> %A, <2 x i32> %B )		; <<4 x i16>> [#uses=0]
-	ret void
-}
-
-declare <2 x i32> @llvm.x86.mmx.psra.d(<2 x i32>, <2 x i32>) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.psll.d(<2 x i32>, <2 x i32>) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.psrl.d(<2 x i32>, <2 x i32>) nounwind readnone 
-define void @sh32(<2 x i32> %A, <2 x i32> %B) {
-	%r1 = call <2 x i32> @llvm.x86.mmx.psra.d( <2 x i32> %A, <2 x i32> %B )		; <<2 x i32>> [#uses=0]
-	%r2 = call <2 x i32> @llvm.x86.mmx.psll.d( <2 x i32> %A, <2 x i32> %B )		; <<2 x i32>> [#uses=0]
-	%r3 = call <2 x i32> @llvm.x86.mmx.psrl.d( <2 x i32> %A, <2 x i32> %B )		; <<2 x i32>> [#uses=0]
-	ret void
-}
-
-declare <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64>, <2 x i32>) nounwind readnone 
-declare <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64>, <2 x i32>) nounwind readnone 
-define void @sh64(<1 x i64> %A, <2 x i32> %B) {
-	%r1 = call <1 x i64> @llvm.x86.mmx.psll.q( <1 x i64> %A, <2 x i32> %B )		; <<1 x i64>> [#uses=0]
-	%r2 = call <1 x i64> @llvm.x86.mmx.psrl.q( <1 x i64> %A, <2 x i32> %B )		; <<1 x i64>> [#uses=0]
-	ret void
-}
 
 declare <4 x float> @llvm.x86.sse.loadu.ps(i8*) nounwind readnone
 declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*) nounwind readnone
@@ -90,6 +9,10 @@ define void @test_loadu(i8* %a, double* %b) {
   %v0 = call <4 x float> @llvm.x86.sse.loadu.ps(i8* %a)
   %v1 = call <16 x i8> @llvm.x86.sse2.loadu.dq(i8* %a)
   %v2 = call <2 x double> @llvm.x86.sse2.loadu.pd(double* %b)
+
+; CHECK: load i128* {{.*}}, align 1
+; CHECK: load i128* {{.*}}, align 1
+; CHECK: load i128* {{.*}}, align 1
   ret void
 }
 
diff --git a/test/Assembler/AutoUpgradeMMXIntrinsics.ll b/test/Assembler/AutoUpgradeMMXIntrinsics.ll
deleted file mode 100644
index 54120ff..0000000
--- a/test/Assembler/AutoUpgradeMMXIntrinsics.ll
+++ /dev/null
@@ -1,223 +0,0 @@
-; Tests to make sure MMX intrinsics are automatically upgraded.
-; RUN: llvm-as < %s | llvm-dis -o %t
-; RUN: grep {llvm\\.x86\\.mmx} %t | not grep {\\\<1 x i64\\\>}
-; RUN: grep {llvm\\.x86\\.mmx} %t | not grep {\\\<2 x i32\\\>}
-; RUN: grep {llvm\\.x86\\.mmx} %t | not grep {\\\<4 x i16\\\>}
-; RUN: grep {llvm\\.x86\\.mmx} %t | not grep {\\\<8 x i8\\\>}
-; RUN: grep {llvm\\.x86\\.sse\\.pshuf\\.w} %t | not grep i32
-
-; Addition
-declare <8 x i8>  @llvm.x86.mmx.padd.b(<8 x i8>,  <8 x i8>)  nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.padd.w(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.x86.mmx.padd.d(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64>, <1 x i64>) nounwind readnone
-declare <8 x i8>  @llvm.x86.mmx.padds.b(<8 x i8>,  <8 x i8>)  nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.padds.w(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i8>  @llvm.x86.mmx.paddus.b(<8 x i8>,  <8 x i8>)  nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.paddus.w(<4 x i16>, <4 x i16>) nounwind readnone
-define void @add(<8 x i8> %A,  <8 x i8> %B,  <4 x i16> %C, <4 x i16> %D,
-                 <2 x i32> %E, <2 x i32> %F, <1 x i64> %G, <1 x i64> %H) {
-  %r1 = call <8 x i8>  @llvm.x86.mmx.padd.b(<8 x i8> %A,  <8 x i8> %B)
-  %r2 = call <4 x i16> @llvm.x86.mmx.padd.w(<4 x i16> %C, <4 x i16> %D)
-  %r3 = call <2 x i32> @llvm.x86.mmx.padd.d(<2 x i32> %E, <2 x i32> %F)
-  %r4 = call <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64> %G, <1 x i64> %H)
-  %r5 = call <8 x i8>  @llvm.x86.mmx.padds.b(<8 x i8>  %A, <8 x i8>  %B)
-  %r6 = call <4 x i16> @llvm.x86.mmx.padds.w(<4 x i16> %C, <4 x i16> %D)
-  %r7 = call <8 x i8>  @llvm.x86.mmx.paddus.b(<8 x i8>  %A, <8 x i8>  %B)
-  %r8 = call <4 x i16> @llvm.x86.mmx.paddus.w(<4 x i16> %C, <4 x i16> %D)
-  ret void
-}
-
-; Subtraction
-declare <8 x i8>  @llvm.x86.mmx.psub.b(<8 x i8>,  <8 x i8>)  nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.psub.w(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.x86.mmx.psub.d(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64>, <1 x i64>) nounwind readnone
-declare <8 x i8>  @llvm.x86.mmx.psubs.b(<8 x i8>,  <8 x i8>)  nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.psubs.w(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i8>  @llvm.x86.mmx.psubus.b(<8 x i8>,  <8 x i8>)  nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.psubus.w(<4 x i16>, <4 x i16>) nounwind readnone
-define void @sub(<8 x i8> %A,  <8 x i8> %B,  <4 x i16> %C, <4 x i16> %D,
-                 <2 x i32> %E, <2 x i32> %F, <1 x i64> %G, <1 x i64> %H) {
-  %r1 = call <8 x i8>  @llvm.x86.mmx.psub.b(<8 x i8> %A,  <8 x i8> %B)
-  %r2 = call <4 x i16> @llvm.x86.mmx.psub.w(<4 x i16> %C, <4 x i16> %D)
-  %r3 = call <2 x i32> @llvm.x86.mmx.psub.d(<2 x i32> %E, <2 x i32> %F)
-  %r4 = call <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64> %G, <1 x i64> %H)
-  %r5 = call <8 x i8>  @llvm.x86.mmx.psubs.b(<8 x i8>  %A, <8 x i8>  %B)
-  %r6 = call <4 x i16> @llvm.x86.mmx.psubs.w(<4 x i16> %C, <4 x i16> %D)
-  %r7 = call <8 x i8>  @llvm.x86.mmx.psubus.b(<8 x i8>  %A, <8 x i8>  %B)
-  %r8 = call <4 x i16> @llvm.x86.mmx.psubus.w(<4 x i16> %C, <4 x i16> %D)
-  ret void
-}
-
-; Multiplication
-declare <4 x i16> @llvm.x86.mmx.pmulh.w(<4 x i16>, <4 x i16>) nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.pmull.w(<4 x i16>, <4 x i16>) nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.pmulhu.w(<4 x i16>, <4 x i16>) nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.pmulu.dq(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.x86.mmx.pmadd.wd(<4 x i16>, <4 x i16>) nounwind readnone
-define void @mul(<4 x i16> %A, <4 x i16> %B) {
-  %r1 = call <4 x i16> @llvm.x86.mmx.pmulh.w(<4 x i16> %A, <4 x i16> %B)
-  %r2 = call <4 x i16> @llvm.x86.mmx.pmull.w(<4 x i16> %A, <4 x i16> %B)
-  %r3 = call <4 x i16> @llvm.x86.mmx.pmulhu.w(<4 x i16> %A, <4 x i16> %B)
-  %r4 = call <4 x i16> @llvm.x86.mmx.pmulu.dq(<4 x i16> %A, <4 x i16> %B)
-  %r5 = call <2 x i32> @llvm.x86.mmx.pmadd.wd(<4 x i16> %A, <4 x i16> %B)
-  ret void
-}
-
-; Bitwise operations
-declare <1 x i64> @llvm.x86.mmx.pand(<1 x i64>, <1 x i64>)  nounwind readnone
-declare <1 x i64> @llvm.x86.mmx.pandn(<1 x i64>, <1 x i64>) nounwind readnone
-declare <1 x i64> @llvm.x86.mmx.por(<1 x i64>, <1 x i64>)   nounwind readnone
-declare <1 x i64> @llvm.x86.mmx.pxor(<1 x i64>, <1 x i64>)  nounwind readnone
-define void @bit(<1 x i64> %A, <1 x i64> %B) {
-  %r1 = call <1 x i64> @llvm.x86.mmx.pand(<1 x i64> %A, <1 x i64> %B)
-  %r2 = call <1 x i64> @llvm.x86.mmx.pandn(<1 x i64> %A, <1 x i64> %B)
-  %r3 = call <1 x i64> @llvm.x86.mmx.por(<1 x i64> %A, <1 x i64> %B)
-  %r4 = call <1 x i64> @llvm.x86.mmx.pxor(<1 x i64> %A, <1 x i64> %B)
-  ret void
-}
-
-; Averages
-declare <8 x i8>  @llvm.x86.mmx.pavg.b(<8 x i8>,  <8 x i8>)  nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.pavg.w(<4 x i16>, <4 x i16>) nounwind readnone
-define void @avg(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D) {
-  %r1 = call <8 x i8>  @llvm.x86.mmx.pavg.b(<8 x i8>  %A, <8 x i8>  %B)
-  %r2 = call <4 x i16> @llvm.x86.mmx.pavg.w(<4 x i16> %C, <4 x i16> %D)
-  ret void
-}
-
-; Maximum
-declare <8 x i8>  @llvm.x86.mmx.pmaxu.b(<8 x i8>,  <8 x i8>)  nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.pmaxs.w(<4 x i16>, <4 x i16>) nounwind readnone
-define void @max(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D) {
-  %r1 = call <8 x i8>  @llvm.x86.mmx.pmaxu.b(<8 x i8>  %A, <8 x i8>  %B)
-  %r2 = call <4 x i16> @llvm.x86.mmx.pmaxs.w(<4 x i16> %C, <4 x i16> %D)
-  ret void
-}
-
-; Minimum
-declare <8 x i8>  @llvm.x86.mmx.pminu.b(<8 x i8>,  <8 x i8>)  nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.pmins.w(<4 x i16>, <4 x i16>) nounwind readnone
-define void @min(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D) {
-  %r1 = call <8 x i8>  @llvm.x86.mmx.pminu.b(<8 x i8>  %A, <8 x i8>  %B)
-  %r2 = call <4 x i16> @llvm.x86.mmx.pmins.w(<4 x i16> %C, <4 x i16> %D)
-  ret void
-}
-
-; Packed sum of absolute differences
-declare <4 x i16> @llvm.x86.mmx.psad.bw(<8 x i8>, <8 x i8>) nounwind readnone
-define void @psad(<8 x i8> %A, <8 x i8> %B) {
-  %r1 = call <4 x i16> @llvm.x86.mmx.psad.bw(<8 x i8> %A, <8 x i8> %B)
-  ret void
-}
-
-; Shift left
-declare <4 x i16> @llvm.x86.mmx.psll.w(<4 x i16>, <1 x i64>) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.psll.d(<2 x i32>, <1 x i64>) nounwind readnone 
-declare <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64>, <1 x i64>) nounwind readnone 
-declare <4 x i16> @llvm.x86.mmx.pslli.w(<4 x i16>, i32) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.pslli.d(<2 x i32>, i32) nounwind readnone 
-declare <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64>, i32) nounwind readnone 
-define void @shl(<4 x i16> %A, <2 x i32> %B, <1 x i64> %C, i32 %D) {
-  %r1 = call <4 x i16> @llvm.x86.mmx.psll.w(<4 x i16> %A, <1 x i64> %C)
-  %r2 = call <2 x i32> @llvm.x86.mmx.psll.d(<2 x i32> %B, <1 x i64> %C)
-  %r3 = call <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64> %C, <1 x i64> %C)
-  %r4 = call <4 x i16> @llvm.x86.mmx.pslli.w(<4 x i16> %A, i32 %D)
-  %r5 = call <2 x i32> @llvm.x86.mmx.pslli.d(<2 x i32> %B, i32 %D)
-  %r6 = call <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64> %C, i32 %D)
-  ret void
-}
-
-; Shift right logical
-declare <4 x i16> @llvm.x86.mmx.psrl.w(<4 x i16>, <1 x i64>) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.psrl.d(<2 x i32>, <1 x i64>) nounwind readnone 
-declare <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64>, <1 x i64>) nounwind readnone 
-declare <4 x i16> @llvm.x86.mmx.psrli.w(<4 x i16>, i32) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.psrli.d(<2 x i32>, i32) nounwind readnone 
-declare <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64>, i32) nounwind readnone 
-define void @shr(<4 x i16> %A, <2 x i32> %B, <1 x i64> %C, i32 %D) {
-  %r1 = call <4 x i16> @llvm.x86.mmx.psrl.w(<4 x i16> %A, <1 x i64> %C)
-  %r2 = call <2 x i32> @llvm.x86.mmx.psrl.d(<2 x i32> %B, <1 x i64> %C)
-  %r3 = call <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64> %C, <1 x i64> %C)
-  %r4 = call <4 x i16> @llvm.x86.mmx.psrli.w(<4 x i16> %A, i32 %D)
-  %r5 = call <2 x i32> @llvm.x86.mmx.psrli.d(<2 x i32> %B, i32 %D)
-  %r6 = call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> %C, i32 %D)
-  ret void
-}
-
-; Shift right arithmetic
-declare <4 x i16> @llvm.x86.mmx.psra.w(<4 x i16>, <1 x i64>) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.psra.d(<2 x i32>, <1 x i64>) nounwind readnone 
-declare <4 x i16> @llvm.x86.mmx.psrai.w(<4 x i16>, i32) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.psrai.d(<2 x i32>, i32) nounwind readnone 
-define void @sra(<4 x i16> %A, <2 x i32> %B, <1 x i64> %C, i32 %D) {
-  %r1 = call <4 x i16> @llvm.x86.mmx.psra.w(<4 x i16> %A, <1 x i64> %C)
-  %r2 = call <2 x i32> @llvm.x86.mmx.psra.d(<2 x i32> %B, <1 x i64> %C)
-  %r3 = call <4 x i16> @llvm.x86.mmx.psrai.w(<4 x i16> %A, i32 %D)
-  %r4 = call <2 x i32> @llvm.x86.mmx.psrai.d(<2 x i32> %B, i32 %D)
-  ret void
-}
-
-; Pack/Unpack ops
-declare <8 x i8>  @llvm.x86.mmx.packsswb(<4 x i16>, <4 x i16>) nounwind readnone 
-declare <4 x i16> @llvm.x86.mmx.packssdw(<2 x i32>, <2 x i32>) nounwind readnone 
-declare <8 x i8>  @llvm.x86.mmx.packuswb(<4 x i16>, <4 x i16>) nounwind readnone 
-declare <8 x i8>  @llvm.x86.mmx.punpckhbw(<8 x i8>, <8 x i8>) nounwind readnone 
-declare <4 x i16> @llvm.x86.mmx.punpckhwd(<4 x i16>, <4 x i16>) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.punpckhdq(<2 x i32>, <2 x i32>) nounwind readnone 
-declare <8 x i8>  @llvm.x86.mmx.punpcklbw(<8 x i8>, <8 x i8>) nounwind readnone 
-declare <4 x i16> @llvm.x86.mmx.punpcklwd(<4 x i16>, <4 x i16>) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.punpckldq(<2 x i32>, <2 x i32>) nounwind readnone 
-define void @pack_unpack(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D,
-                         <2 x i32> %E, <2 x i32> %F) {
-  %r1 = call <8 x i8>  @llvm.x86.mmx.packsswb(<4 x i16> %C, <4 x i16> %D)
-  %r2 = call <4 x i16> @llvm.x86.mmx.packssdw(<2 x i32> %E, <2 x i32> %F)
-  %r3 = call <8 x i8>  @llvm.x86.mmx.packuswb(<4 x i16> %C, <4 x i16> %D)
-  %r4 = call <8 x i8>  @llvm.x86.mmx.punpckhbw(<8 x i8>  %A, <8 x i8>  %B)
-  %r5 = call <4 x i16> @llvm.x86.mmx.punpckhwd(<4 x i16> %C, <4 x i16> %D)
-  %r6 = call <2 x i32> @llvm.x86.mmx.punpckhdq(<2 x i32> %E, <2 x i32> %F)
-  %r7 = call <8 x i8>  @llvm.x86.mmx.punpcklbw(<8 x i8>  %A, <8 x i8>  %B)
-  %r8 = call <4 x i16> @llvm.x86.mmx.punpcklwd(<4 x i16> %C, <4 x i16> %D)
-  %r9 = call <2 x i32> @llvm.x86.mmx.punpckldq(<2 x i32> %E, <2 x i32> %F)
-  ret void
-}
-
-; Integer comparison ops
-declare <8 x i8>  @llvm.x86.mmx.pcmpeq.b(<8 x i8>, <8 x i8>) nounwind readnone 
-declare <4 x i16> @llvm.x86.mmx.pcmpeq.w(<4 x i16>, <4 x i16>) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.pcmpeq.d(<2 x i32>, <2 x i32>) nounwind readnone 
-declare <8 x i8>  @llvm.x86.mmx.pcmpgt.b(<8 x i8>, <8 x i8>) nounwind readnone 
-declare <4 x i16> @llvm.x86.mmx.pcmpgt.w(<4 x i16>, <4 x i16>) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.pcmpgt.d(<2 x i32>, <2 x i32>) nounwind readnone 
-define void @cmp(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D,
-                 <2 x i32> %E, <2 x i32> %F) {
-  %r1 = call <8 x i8>  @llvm.x86.mmx.pcmpeq.b(<8 x i8>  %A, <8 x i8>  %B)
-  %r2 = call <4 x i16> @llvm.x86.mmx.pcmpeq.w(<4 x i16> %C, <4 x i16> %D)
-  %r3 = call <2 x i32> @llvm.x86.mmx.pcmpeq.d(<2 x i32> %E, <2 x i32> %F)
-  %r4 = call <8 x i8>  @llvm.x86.mmx.pcmpgt.b(<8 x i8>  %A, <8 x i8>  %B)
-  %r5 = call <4 x i16> @llvm.x86.mmx.pcmpgt.w(<4 x i16> %C, <4 x i16> %D)
-  %r6 = call <2 x i32> @llvm.x86.mmx.pcmpgt.d(<2 x i32> %E, <2 x i32> %F)
-  ret void
-}
-
-; Miscellaneous
-declare void      @llvm.x86.mmx.maskmovq(<8 x i8>, <8 x i8>, i32*) nounwind readnone 
-declare i32       @llvm.x86.mmx.pmovmskb(<8 x i8>) nounwind readnone 
-declare void      @llvm.x86.mmx.movnt.dq(i32*, <1 x i64>) nounwind readnone 
-declare <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64>, <1 x i64>,  i8) nounwind readnone 
-declare i32       @llvm.x86.mmx.pextr.w(<1 x i64>, i32) nounwind readnone 
-declare <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64>, i32, i32) nounwind readnone 
-declare <4 x i16> @llvm.x86.ssse3.pshuf.w(<4 x i16>, i32) nounwind readnone 
-define void @misc(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D,
-                  <2 x i32> %E, <2 x i32> %F, <1 x i64> %G, <1 x i64> %H,
-                  i32* %I, i8 %J, i16 %K, i32 %L) {
-        call void      @llvm.x86.mmx.maskmovq(<8 x i8> %A, <8 x i8> %B, i32* %I)
-  %r1 = call i32       @llvm.x86.mmx.pmovmskb(<8 x i8> %A)
-        call void      @llvm.x86.mmx.movnt.dq(i32* %I, <1 x i64> %G)
-  %r2 = call <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64> %G, <1 x i64> %H, i8 %J)
-  %r3 = call i32       @llvm.x86.mmx.pextr.w(<1 x i64> %G, i32 37)
-  %r4 = call <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64> %G, i32 37, i32 927)
-  %r5 = call <4 x i16> @llvm.x86.ssse3.pshuf.w(<4 x i16> %C, i32 37)
-  ret void
-}
diff --git a/test/Assembler/private.ll b/test/Assembler/private.ll
deleted file mode 100644
index 3714572..0000000
--- a/test/Assembler/private.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; Test to make sure that the 'private' tag is not lost!
-;
-; RUN: llvm-as < %s | llvm-dis | grep private
-
-declare void @foo()
-
-define private void @foo() {
-        ret void
-}
diff --git a/test/Bitcode/AutoUpgradeIntrinsics.ll b/test/Bitcode/AutoUpgradeIntrinsics.ll
deleted file mode 100644
index c3e2e9e..0000000
--- a/test/Bitcode/AutoUpgradeIntrinsics.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; This isn't really an assembly file. It just runs test on bitcode to ensure
-; it is auto-upgraded.
-; RUN: llvm-dis < %s.bc | FileCheck %s
-; CHECK-NOT: {i32 @llvm\\.ct}
-; CHECK-NOT: {llvm\\.part\\.set\\.i\[0-9\]*\\.i\[0-9\]*\\.i\[0-9\]*}
-; CHECK-NOT: {llvm\\.part\\.select\\.i\[0-9\]*\\.i\[0-9\]*}
-; CHECK-NOT: {llvm\\.bswap\\.i\[0-9\]*\\.i\[0-9\]*}
-
diff --git a/test/Bitcode/AutoUpgradeIntrinsics.ll.bc b/test/Bitcode/AutoUpgradeIntrinsics.ll.bc
deleted file mode 100644
index 9de756b..0000000
--- a/test/Bitcode/AutoUpgradeIntrinsics.ll.bc
+++ /dev/null
diff --git a/test/Bitcode/memcpy.ll b/test/Bitcode/memcpy.ll
deleted file mode 100644
index 299eb1e..0000000
--- a/test/Bitcode/memcpy.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llvm-as %s -o /dev/null
-
-define void @test(i32* %P, i32* %Q) {
-entry:
-        %tmp.1 = bitcast i32* %P to i8*         ; <i8*> [#uses=3]
-        %tmp.3 = bitcast i32* %Q to i8*         ; <i8*> [#uses=4]
-        tail call void @llvm.memcpy.i32( i8* %tmp.1, i8* %tmp.3, i32 100000, i32 1 )
-        tail call void @llvm.memcpy.i64( i8* %tmp.1, i8* %tmp.3, i64 100000, i32 1 )
-        tail call void @llvm.memset.i32( i8* %tmp.3, i8 14, i32 10000, i32 0 )
-        tail call void @llvm.memmove.i32( i8* %tmp.1, i8* %tmp.3, i32 123124, i32 1 )
-        tail call void @llvm.memmove.i64( i8* %tmp.1, i8* %tmp.3, i64 123124, i32 1 )
-        ret void
-}
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
-
-declare void @llvm.memset.i32(i8*, i8, i32, i32)
-
-declare void @llvm.memmove.i32(i8*, i8*, i32, i32)
-
-declare void @llvm.memmove.i64(i8*, i8*, i64, i32)
diff --git a/test/Bitcode/metadata-2.ll b/test/Bitcode/metadata-2.ll
index 1a59ce6..dbf46b0 100644
--- a/test/Bitcode/metadata-2.ll
+++ b/test/Bitcode/metadata-2.ll
@@ -1,7 +1,7 @@
 ; RUN: llvm-as < %s | llvm-dis -o /dev/null
-	type { %object.ModuleInfo.__vtbl*, i8*, %"byte[]", %1, %"ClassInfo[]", i32, void ()*, void ()*, void ()*, i8*, void ()* }		; type %0
-	type { i64, %object.ModuleInfo* }		; type %1
-	type { i32, void ()* }		; type %2
+	%0 = type { %object.ModuleInfo.__vtbl*, i8*, %"byte[]", %1, %"ClassInfo[]", i32, void ()*, void ()*, void ()*, i8*, void ()* }		; type %0
+	%1 = type { i64, %object.ModuleInfo* }		; type %1
+	%2 = type { i32, void ()* }		; type %2
 	%"ClassInfo[]" = type { i64, %object.ClassInfo** }
 	%"Interface[]" = type { i64, %object.Interface* }
 	%"ModuleInfo[]" = type { i64, %object.ModuleInfo** }
diff --git a/test/Bitcode/neon-intrinsics.ll b/test/Bitcode/neon-intrinsics.ll
deleted file mode 100644
index feb2d74..0000000
--- a/test/Bitcode/neon-intrinsics.ll
+++ /dev/null
@@ -1,206 +0,0 @@
-; RUN: llvm-dis < %s.bc | FileCheck %s
-
-; vmovls should be auto-upgraded to sext
-
-; CHECK: vmovls8
-; CHECK-NOT: arm.neon.vmovls.v8i16
-; CHECK: sext <8 x i8>
-
-; CHECK: vmovls16
-; CHECK-NOT: arm.neon.vmovls.v4i32
-; CHECK: sext <4 x i16>
-
-; CHECK: vmovls32
-; CHECK-NOT: arm.neon.vmovls.v2i64
-; CHECK: sext <2 x i32>
-
-; vmovlu should be auto-upgraded to zext
-
-; CHECK: vmovlu8
-; CHECK-NOT: arm.neon.vmovlu.v8i16
-; CHECK: zext <8 x i8>
-
-; CHECK: vmovlu16
-; CHECK-NOT: arm.neon.vmovlu.v4i32
-; CHECK: zext <4 x i16>
-
-; CHECK: vmovlu32
-; CHECK-NOT: arm.neon.vmovlu.v2i64
-; CHECK: zext <2 x i32>
-
-; vaddl/vaddw should be auto-upgraded to add with sext/zext
-
-; CHECK: vaddls16
-; CHECK-NOT: arm.neon.vaddls.v4i32
-; CHECK: sext <4 x i16>
-; CHECK-NEXT: sext <4 x i16>
-; CHECK-NEXT: add <4 x i32>
-
-; CHECK: vaddlu32
-; CHECK-NOT: arm.neon.vaddlu.v2i64
-; CHECK: zext <2 x i32>
-; CHECK-NEXT: zext <2 x i32>
-; CHECK-NEXT: add <2 x i64>
-
-; CHECK: vaddws8
-; CHECK-NOT: arm.neon.vaddws.v8i16
-; CHECK: sext <8 x i8>
-; CHECK-NEXT: add <8 x i16>
-
-; CHECK: vaddwu16
-; CHECK-NOT: arm.neon.vaddwu.v4i32
-; CHECK: zext <4 x i16>
-; CHECK-NEXT: add <4 x i32>
-
-; vsubl/vsubw should be auto-upgraded to subtract with sext/zext
-
-; CHECK: vsubls16
-; CHECK-NOT: arm.neon.vsubls.v4i32
-; CHECK: sext <4 x i16>
-; CHECK-NEXT: sext <4 x i16>
-; CHECK-NEXT: sub <4 x i32>
-
-; CHECK: vsublu32
-; CHECK-NOT: arm.neon.vsublu.v2i64
-; CHECK: zext <2 x i32>
-; CHECK-NEXT: zext <2 x i32>
-; CHECK-NEXT: sub <2 x i64>
-
-; CHECK: vsubws8
-; CHECK-NOT: arm.neon.vsubws.v8i16
-; CHECK: sext <8 x i8>
-; CHECK-NEXT: sub <8 x i16>
-
-; CHECK: vsubwu16
-; CHECK-NOT: arm.neon.vsubwu.v4i32
-; CHECK: zext <4 x i16>
-; CHECK-NEXT: sub <4 x i32>
-
-; vmull* intrinsics will remain intrinsics
-
-; CHECK: vmulls8
-; CHECK: arm.neon.vmulls.v8i16
-
-; CHECK: vmullu16
-; CHECK: arm.neon.vmullu.v4i32
-
-; CHECK: vmullp8
-; CHECK: arm.neon.vmullp.v8i16
-
-; vmlal should be auto-upgraded to multiply/add with sext/zext
-
-; CHECK: vmlals32
-; CHECK-NOT: arm.neon.vmlals.v2i64
-; CHECK: sext <2 x i32>
-; CHECK-NEXT: sext <2 x i32>
-; CHECK-NEXT: mul <2 x i64>
-; CHECK-NEXT: add <2 x i64>
-
-; CHECK: vmlalu8
-; CHECK-NOT: arm.neon.vmlalu.v8i16
-; CHECK: zext <8 x i8>
-; CHECK-NEXT: zext <8 x i8>
-; CHECK-NEXT: mul <8 x i16>
-; CHECK-NEXT: add <8 x i16>
-
-; vmlsl should be auto-upgraded to multiply/sub with sext/zext
-
-; CHECK: vmlsls16
-; CHECK-NOT: arm.neon.vmlsls.v4i32
-; CHECK: sext <4 x i16>
-; CHECK-NEXT: sext <4 x i16>
-; CHECK-NEXT: mul <4 x i32>
-; CHECK-NEXT: sub <4 x i32>
-
-; CHECK: vmlslu32
-; CHECK-NOT: arm.neon.vmlslu.v2i64
-; CHECK: zext <2 x i32>
-; CHECK-NEXT: zext <2 x i32>
-; CHECK-NEXT: mul <2 x i64>
-; CHECK-NEXT: sub <2 x i64>
-
-; vaba should be auto-upgraded to vabd + add
-
-; CHECK: vabas32
-; CHECK-NOT: arm.neon.vabas.v2i32
-; CHECK: arm.neon.vabds.v2i32
-; CHECK-NEXT: add <2 x i32>
-
-; CHECK: vabaQu8
-; CHECK-NOT: arm.neon.vabau.v16i8
-; CHECK: arm.neon.vabdu.v16i8
-; CHECK-NEXT: add <16 x i8>
-
-; vabal should be auto-upgraded to vabd with zext + add
-
-; CHECK: vabals16
-; CHECK-NOT: arm.neon.vabals.v4i32
-; CHECK: arm.neon.vabds.v4i16
-; CHECK-NEXT: zext <4 x i16>
-; CHECK-NEXT: add <4 x i32>
-
-; CHECK: vabalu32
-; CHECK-NOT: arm.neon.vabalu.v2i64
-; CHECK: arm.neon.vabdu.v2i32
-; CHECK-NEXT: zext <2 x i32>
-; CHECK-NEXT: add <2 x i64>
-
-; vabdl should be auto-upgraded to vabd with zext
-
-; CHECK: vabdls8
-; CHECK-NOT: arm.neon.vabdls.v8i16
-; CHECK: arm.neon.vabds.v8i8
-; CHECK-NEXT: zext <8 x i8>
-
-; CHECK: vabdlu16
-; CHECK-NOT: arm.neon.vabdlu.v4i32
-; CHECK: arm.neon.vabdu.v4i16
-; CHECK-NEXT: zext <4 x i16>
-
-; vmovn should be auto-upgraded to trunc
-
-; CHECK: vmovni16
-; CHECK-NOT: arm.neon.vmovn.v8i8
-; CHECK: trunc <8 x i16>
-
-; CHECK: vmovni32
-; CHECK-NOT: arm.neon.vmovn.v4i16
-; CHECK: trunc <4 x i32>
-
-; CHECK: vmovni64
-; CHECK-NOT: arm.neon.vmovn.v2i32
-; CHECK: trunc <2 x i64>
-
-; vld* and vst* intrinsic calls need an alignment argument (defaulted to 1)
-
-; CHECK: vld1i8
-; CHECK: i32 1
-; CHECK: vld2Qi16
-; CHECK: i32 1
-; CHECK: vld3i32
-; CHECK: i32 1
-; CHECK: vld4Qf
-; CHECK: i32 1
-
-; CHECK: vst1i8
-; CHECK: i32 1
-; CHECK: vst2Qi16
-; CHECK: i32 1
-; CHECK: vst3i32
-; CHECK: i32 1
-; CHECK: vst4Qf
-; CHECK: i32 1
-
-; CHECK: vld2laneQi16
-; CHECK: i32 1
-; CHECK: vld3lanei32
-; CHECK: i32 1
-; CHECK: vld4laneQf
-; CHECK: i32 1
-
-; CHECK: vst2laneQi16
-; CHECK: i32 1
-; CHECK: vst3lanei32
-; CHECK: i32 1
-; CHECK: vst4laneQf
-; CHECK: i32 1
diff --git a/test/Bitcode/neon-intrinsics.ll.bc b/test/Bitcode/neon-intrinsics.ll.bc
deleted file mode 100644
index cabc3c9..0000000
--- a/test/Bitcode/neon-intrinsics.ll.bc
+++ /dev/null
diff --git a/test/Bitcode/sse2_loadl_pd.ll b/test/Bitcode/sse2_loadl_pd.ll
deleted file mode 100644
index 6cb0da5..0000000
--- a/test/Bitcode/sse2_loadl_pd.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: llvm-dis < %s.bc | FileCheck %s
-; CHECK-NOT: {i32 @llvm\\.loadl.pd} 
-; CHECK: shufflevector
diff --git a/test/Bitcode/sse2_loadl_pd.ll.bc b/test/Bitcode/sse2_loadl_pd.ll.bc
deleted file mode 100644
index 402cbe1..0000000
--- a/test/Bitcode/sse2_loadl_pd.ll.bc
+++ /dev/null
diff --git a/test/Bitcode/sse2_movl_dq.ll b/test/Bitcode/sse2_movl_dq.ll
deleted file mode 100644
index 2fc0149..0000000
--- a/test/Bitcode/sse2_movl_dq.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: llvm-dis < %s.bc | FileCheck %s 
-; CHECK-NOT: {i32 @llvm\\.movl.dq}
-; CHECK: shufflevector
diff --git a/test/Bitcode/sse2_movl_dq.ll.bc b/test/Bitcode/sse2_movl_dq.ll.bc
deleted file mode 100644
index 74d1826..0000000
--- a/test/Bitcode/sse2_movl_dq.ll.bc
+++ /dev/null
diff --git a/test/Bitcode/sse2_movs_d.ll b/test/Bitcode/sse2_movs_d.ll
deleted file mode 100644
index ab82c43..0000000
--- a/test/Bitcode/sse2_movs_d.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: llvm-dis < %s.bc | FileCheck %s
-; CHECK-NOT: {i32 @llvm\\.movs.d}
-; CHECK: shufflevector
diff --git a/test/Bitcode/sse2_movs_d.ll.bc b/test/Bitcode/sse2_movs_d.ll.bc
deleted file mode 100644
index 719d529..0000000
--- a/test/Bitcode/sse2_movs_d.ll.bc
+++ /dev/null
diff --git a/test/Bitcode/sse2_punpck_qdq.ll b/test/Bitcode/sse2_punpck_qdq.ll
deleted file mode 100644
index 4c68af5..0000000
--- a/test/Bitcode/sse2_punpck_qdq.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; RUN: llvm-dis < %s.bc | FileCheck %s
-; CHECK-NOT: {i32 @llvm\\.punpckh.qdq}
-; CHECK-NOT: {i32 @llvm\\.punpckl.qdq}
-; CHECK: shufflevector
diff --git a/test/Bitcode/sse2_punpck_qdq.ll.bc b/test/Bitcode/sse2_punpck_qdq.ll.bc
deleted file mode 100644
index 7c1b7ed..0000000
--- a/test/Bitcode/sse2_punpck_qdq.ll.bc
+++ /dev/null
diff --git a/test/Bitcode/sse2_shuf_pd.ll b/test/Bitcode/sse2_shuf_pd.ll
deleted file mode 100644
index 1ba6a1d..0000000
--- a/test/Bitcode/sse2_shuf_pd.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: llvm-dis < %s.bc | FileCheck %s
-; CHECK-NOT: {i32 @llvm\\.shuf.pd}
-; CHECK: shufflevector
diff --git a/test/Bitcode/sse2_shuf_pd.ll.bc b/test/Bitcode/sse2_shuf_pd.ll.bc
deleted file mode 100644
index 832c39e..0000000
--- a/test/Bitcode/sse2_shuf_pd.ll.bc
+++ /dev/null
diff --git a/test/Bitcode/sse2_unpck_pd.ll b/test/Bitcode/sse2_unpck_pd.ll
deleted file mode 100644
index 99b61b6..0000000
--- a/test/Bitcode/sse2_unpck_pd.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; RUN: llvm-dis < %s.bc | FileCheck %s
-; CHECK-NOT: {i32 @llvm\\.unpckh.pd}
-; CHECK-NOT: {i32 @llvm\\.unpckl.pd}
-; CHECK: shufflevector
diff --git a/test/Bitcode/sse2_unpck_pd.ll.bc b/test/Bitcode/sse2_unpck_pd.ll.bc
deleted file mode 100644
index 4fb829c..0000000
--- a/test/Bitcode/sse2_unpck_pd.ll.bc
+++ /dev/null
diff --git a/test/Bitcode/sse41_pmulld.ll b/test/Bitcode/sse41_pmulld.ll
deleted file mode 100644
index 752786d..0000000
--- a/test/Bitcode/sse41_pmulld.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: llvm-dis < %s.bc | FileCheck %s
-; CHECK-NOT: {i32 @llvm\\.pmulld}
-; CHECK: mul
diff --git a/test/Bitcode/sse41_pmulld.ll.bc b/test/Bitcode/sse41_pmulld.ll.bc
deleted file mode 100644
index bd66f0a..0000000
--- a/test/Bitcode/sse41_pmulld.ll.bc
+++ /dev/null
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 82eac60..2db58b9 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -71,8 +71,6 @@ if(PYTHONINTERP_FOUND)
   MAKE_DIRECTORY(${CMAKE_CURRENT_BINARY_DIR}/Unit)
 
   # Configuration-time: See Unit/lit.site.cfg.in
-  set(LLVM_BUILD_MODE "${LLVM_BUILD_MODE}")
-
   set(LLVM_SOURCE_DIR ${LLVM_MAIN_SRC_DIR})
   set(LLVM_BINARY_DIR ${LLVM_BINARY_DIR})
   set(LLVM_TOOLS_DIR "${LLVM_TOOLS_BINARY_DIR}/%(build_config)s")
@@ -81,10 +79,17 @@ if(PYTHONINTERP_FOUND)
   set(ENABLE_SHARED ${LLVM_SHARED_LIBS_ENABLED})
   set(SHLIBPATH_VAR ${SHLIBPATH_VAR})
 
+  # lit.site.cfg uses the config-time build mode
+  set(LLVM_BUILD_MODE "${LLVM_BUILD_MODE}")
+
   configure_file(
     ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
     ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
     @ONLY)
+
+  # Unit/lit.site.cfg substitutes the runtime build_mode
+  set(LLVM_BUILD_MODE "%(build_mode)s")
+
   configure_file(
     ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.site.cfg.in
     ${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg
diff --git a/test/CodeGen/ARM/2007-05-07-jumptoentry.ll b/test/CodeGen/ARM/2007-05-07-jumptoentry.ll
deleted file mode 100644
index 26864f1..0000000
--- a/test/CodeGen/ARM/2007-05-07-jumptoentry.ll
+++ /dev/null
@@ -1,58 +0,0 @@
-; RUN: llc < %s | not grep 1_0
-; This used to create an extra branch to 'entry', LBB1_0.
-
-; ModuleID = 'bug.bc'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
-target triple = "arm-apple-darwin8"
-	%struct.HexxagonMove = type { i8, i8, i32 }
-	%struct.HexxagonMoveList = type { i32, %struct.HexxagonMove* }
-
-define void @_ZN16HexxagonMoveList8sortListEv(%struct.HexxagonMoveList* %this) {
-entry:
-	%tmp51 = getelementptr %struct.HexxagonMoveList* %this, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp2 = getelementptr %struct.HexxagonMoveList* %this, i32 0, i32 1		; <%struct.HexxagonMove**> [#uses=2]
-	br label %bb49
-
-bb1:		; preds = %bb49
-	%tmp3 = load %struct.HexxagonMove** %tmp2		; <%struct.HexxagonMove*> [#uses=5]
-	%tmp6 = getelementptr %struct.HexxagonMove* %tmp3, i32 %i.1, i32 2		; <i32*> [#uses=1]
-	%tmp7 = load i32* %tmp6		; <i32> [#uses=2]
-	%tmp12 = add i32 %i.1, 1		; <i32> [#uses=7]
-	%tmp14 = getelementptr %struct.HexxagonMove* %tmp3, i32 %tmp12, i32 2		; <i32*> [#uses=1]
-	%tmp15 = load i32* %tmp14		; <i32> [#uses=1]
-	%tmp16 = icmp slt i32 %tmp7, %tmp15		; <i1> [#uses=1]
-	br i1 %tmp16, label %cond_true, label %bb49
-
-cond_true:		; preds = %bb1
-	%tmp23.0 = getelementptr %struct.HexxagonMove* %tmp3, i32 %i.1, i32 0		; <i8*> [#uses=2]
-	%tmp67 = load i8* %tmp23.0		; <i8> [#uses=1]
-	%tmp23.1 = getelementptr %struct.HexxagonMove* %tmp3, i32 %i.1, i32 1		; <i8*> [#uses=1]
-	%tmp68 = load i8* %tmp23.1		; <i8> [#uses=1]
-	%tmp3638 = getelementptr %struct.HexxagonMove* %tmp3, i32 %tmp12, i32 0		; <i8*> [#uses=1]
-	tail call void @llvm.memcpy.i32( i8* %tmp23.0, i8* %tmp3638, i32 8, i32 4 )
-	%tmp41 = load %struct.HexxagonMove** %tmp2		; <%struct.HexxagonMove*> [#uses=3]
-	%tmp44.0 = getelementptr %struct.HexxagonMove* %tmp41, i32 %tmp12, i32 0		; <i8*> [#uses=1]
-	store i8 %tmp67, i8* %tmp44.0
-	%tmp44.1 = getelementptr %struct.HexxagonMove* %tmp41, i32 %tmp12, i32 1		; <i8*> [#uses=1]
-	store i8 %tmp68, i8* %tmp44.1
-	%tmp44.2 = getelementptr %struct.HexxagonMove* %tmp41, i32 %tmp12, i32 2		; <i32*> [#uses=1]
-	store i32 %tmp7, i32* %tmp44.2
-	br label %bb49
-
-bb49:		; preds = %bb59, %cond_true, %bb1, %entry
-	%i.1 = phi i32 [ 0, %entry ], [ %tmp12, %bb1 ], [ %tmp12, %cond_true ], [ 0, %bb59 ]		; <i32> [#uses=5]
-	%move.2 = phi i32 [ 0, %entry ], [ 1, %cond_true ], [ %move.2, %bb1 ], [ 0, %bb59 ]		; <i32> [#uses=2]
-	%tmp52 = load i32* %tmp51		; <i32> [#uses=1]
-	%tmp53 = add i32 %tmp52, -1		; <i32> [#uses=1]
-	%tmp55 = icmp sgt i32 %tmp53, %i.1		; <i1> [#uses=1]
-	br i1 %tmp55, label %bb1, label %bb59
-
-bb59:		; preds = %bb49
-	%tmp61 = icmp eq i32 %move.2, 0		; <i1> [#uses=1]
-	br i1 %tmp61, label %return, label %bb49
-
-return:		; preds = %bb59
-	ret void
-}
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
diff --git a/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll b/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
index 52937c1..55cea3a 100644
--- a/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
+++ b/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
@@ -12,7 +12,6 @@ entry:
 	%i_addr = alloca i32		; <i32*> [#uses=2]
 	%q_addr = alloca i32		; <i32*> [#uses=2]
 	%retval = alloca i32, align 4		; <i32*> [#uses=1]
-	"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store i32 %i, i32* %i_addr
 	store i32 %q, i32* %q_addr
 	%tmp = load i32* %i_addr		; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll b/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
index c925fa8..4894116 100644
--- a/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
+++ b/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
@@ -12,7 +12,6 @@ entry:
 	%i_addr = alloca i32		; <i32*> [#uses=2]
 	%q_addr = alloca i32		; <i32*> [#uses=2]
 	%retval = alloca i32, align 4		; <i32*> [#uses=1]
-	"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store i32 %i, i32* %i_addr
 	store i32 %q, i32* %q_addr
 	%tmp = load i32* %i_addr		; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll b/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
index 9df5af5..acbab8a 100644
--- a/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
+++ b/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
@@ -14,7 +14,6 @@ entry:
 	%i_addr = alloca i32		; <i32*> [#uses=2]
 	%q_addr = alloca i32		; <i32*> [#uses=2]
 	%retval = alloca i32, align 4		; <i32*> [#uses=1]
-	"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store i32 %i, i32* %i_addr
 	store i32 %q, i32* %q_addr
 	%tmp = load i32* %i_addr		; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll b/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll
deleted file mode 100644
index 7ba2a19..0000000
--- a/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll
+++ /dev/null
@@ -1,237 +0,0 @@
-; RUN: llc < %s 
-; PR1424
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "arm-unknown-linux-gnueabi"
-	%struct.AVClass = type { i8*, i8* (i8*)*, %struct.AVOption* }
-	%struct.AVCodec = type { i8*, i32, i32, i32, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32, i8*)*, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32*, i8*, i32)*, i32, %struct.AVCodec*, void (%struct.AVCodecContext*)*, %struct.AVRational*, i32* }
-	%struct.AVCodecContext = type { %struct.AVClass*, i32, i32, i32, i32, i32, i8*, i32, %struct.AVRational, i32, i32, i32, i32, i32, void (%struct.AVCodecContext*, %struct.AVFrame*, i32*, i32, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, float, float, i32, i32, i32, i32, float, i32, i32, i32, %struct.AVCodec*, i8*, i32, i32, void (%struct.AVCodecContext*, i8*, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, [32 x i8], i32, i32, i32, i32, i32, i32, i32, float, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, void (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i8*, i8*, float, float, i32, %struct.RcOverride*, i32, i8*, i32, i32, i32, float, float, float, float, i32, float, float, float, float, float, i32, i32, i32, i32*, i32, i32, i32, i32, %struct.AVRational, %struct.AVFrame*, i32, i32, [4 x i64], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32*)*, i32, i32, i32, i32, i32, i32, i8*, i32, i32, i32, i32, i32, i32, i16*, i16*, i32, i32, i32, i32, %struct.AVPaletteControl*, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32 (%struct.AVCodecContext*, i8*)*, i8**, i32*, i32)*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64 }
-	%struct.AVEvalExpr = type opaque
-	%struct.AVFrame = type { [4 x i8*], [4 x i32], [4 x i8*], i32, i32, i64, i32, i32, i32, i32, i32, i8*, i32, i8*, [2 x [2 x i16]*], i32*, i8, i8*, [4 x i64], i32, i32, i32, i32, i32, %struct.AVPanScan*, i32, i32, i16*, [2 x i8*] }
-	%struct.AVOption = type opaque
-	%struct.AVPaletteControl = type { i32, [256 x i32] }
-	%struct.AVPanScan = type { i32, i32, i32, [3 x [2 x i16]] }
-	%struct.AVRational = type { i32, i32 }
-	%struct.BlockNode = type { i16, i16, i8, [3 x i8], i8, i8 }
-	%struct.DSPContext = type { void (i16*, i8*, i32)*, void (i16*, i8*, i8*, i32)*, void (i16*, i8*, i32)*, void (i16*, i8*, i32)*, void (i16*, i8*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, i32 (i16*)*, void (i8*, i8*, i32, i32, i32, i32, i32)*, void (i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)*, void (i16*)*, i32 (i8*, i32)*, i32 (i8*, i32)*, [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], i32 (i8*, i16*, i32)*, [4 x [4 x void (i8*, i8*, i32, i32)*]], [4 x [4 x void (i8*, i8*, i32, i32)*]], [4 x [4 x void (i8*, i8*, i32, i32)*]], [4 x [4 x void (i8*, i8*, i32, i32)*]], [2 x void (i8*, i8*, i8*, i32, i32)*], [11 x void (i8*, i8*, i32, i32, i32)*], [11 x void (i8*, i8*, i32, i32, i32)*], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], [8 x void (i8*, i8*, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [10 x void (i8*, i32, i32, i32, i32)*], [10 x void (i8*, i8*, i32, i32, i32, i32, i32)*], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i16*, i32)*, [2 x [4 x i32 (i8*, i8*, i8*, i32, i32)*]], void (i8*, i8*, i32)*, void (i8*, i8*, i8*, i32)*, void (i8*, i8*, i8*, i32, i32*, i32*)*, void (i32*, i32*, i32)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32)*, void (i8*, i32, i32, i32)*, void ([4 x [4 x i16]]*, i8*, [40 x i8]*, [40 x [2 x i16]]*, i32, i32, i32, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32)*, void (float*, float*, i32)*, void (float*, float*, i32)*, void (float*, float*, float*, i32)*, void (float*, float*, float*, float*, i32, i32, i32)*, void (i16*, float*, i32)*, void (i16*)*, void (i16*)*, void (i16*)*, void (i8*, i32, i16*)*, void (i8*, i32, i16*)*, [64 x i8], i32, i32 (i16*, i16*, i16*, i32)*, void (i16*, i16*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, void ([4 x i16]*)*, void (i32*, i32*, i32*, i32*, i32*, i32*, i32)*, void (i32*, i32)*, void (i8*, i32, i8**, i32, i32, i32, i32, i32, %struct.slice_buffer*, i32, i8*)*, void (i8*, i32, i32)*, [4 x void (i8*, i32, i8*, i32, i32, i32)*], void (i16*)*, void (i16*, i32)*, void (i16*, i32)*, void (i16*, i32)*, void (i8*, i32)*, void (i8*, i32)*, [16 x void (i8*, i8*, i32, i32)*] }
-	%struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
-	%struct.GetBitContext = type { i8*, i8*, i32*, i32, i32, i32, i32 }
-	%struct.MJpegContext = type opaque
-	%struct.MotionEstContext = type { %struct.AVCodecContext*, i32, [4 x [2 x i32]], [4 x [2 x i32]], i8*, i8*, [2 x i8*], i8*, i32, i32*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [4 x [4 x i8*]], [4 x [4 x i8*]], i32, i32, i32, i32, i32, [4 x void (i8*, i8*, i32, i32)*]*, [4 x void (i8*, i8*, i32, i32)*]*, [16 x void (i8*, i8*, i32)*]*, [16 x void (i8*, i8*, i32)*]*, [4097 x i8]*, i8*, i32 (%struct.MpegEncContext*, i32*, i32*, i32, i32, i32, i32, i32)* }
-	%struct.MpegEncContext = type { %struct.AVCodecContext*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.PutBitContext, i32, i32, i32, i32, i32, i32, i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.Picture*, %struct.Picture**, %struct.Picture**, i32, i32, [8 x %struct.MpegEncContext*], %struct.Picture, %struct.Picture, %struct.Picture, %struct.Picture, %struct.Picture*, %struct.Picture*, %struct.Picture*, [3 x i8*], [3 x i32], i16*, [3 x i16*], [20 x i16], i32, i32, i8*, i8*, i8*, i8*, i8*, [16 x i16]*, [3 x [16 x i16]*], i32, i8*, i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i32, i32, i32, i32, i32*, i32, i32, i32, i32, i32, i32, i32, [5 x i32], i32, i32, i32, i32, %struct.DSPContext, i32, i32, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x [2 x [2 x i16]*]], [2 x [2 x [2 x [2 x i16]*]]], [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x [2 x [2 x i16]*]], [2 x [2 x [2 x [2 x i16]*]]], [2 x i8*], [2 x [2 x i8*]], i32, i32, i32, [2 x [4 x [2 x i32]]], [2 x [2 x i32]], [2 x [2 x [2 x i32]]], i8*, [2 x [64 x i16]], %struct.MotionEstContext, i32, i32, i32, i32, i32, i32, i16*, [6 x i32], [6 x i32], [3 x i8*], i32*, [64 x i16], [64 x i16], [64 x i16], [64 x i16], i32, i32, i32, i32, i32, i8*, i8*, i8*, i8*, i8*, i8*, [8 x i32], [64 x i32]*, [64 x i32]*, [2 x [64 x i16]]*, [2 x [64 x i16]]*, [12 x i32], %struct.ScanTable, %struct.ScanTable, %struct.ScanTable, %struct.ScanTable, [64 x i32]*, [2 x i32], [64 x i16]*, i8*, i64, i64, i32, i32, %struct.RateControlContext, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i32, i32, %struct.GetBitContext, i32, i32, i32, %struct.ParseContext, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i16, i16, i16, i16, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x [2 x i32]], [2 x [2 x i32]], [2 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.PutBitContext, %struct.PutBitContext, i32, i32, i32, i32, i32, i32, i8*, i32, i32, i32, i32, i32, [3 x i32], %struct.MJpegContext*, [3 x i32], [3 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x [65 x [65 x [2 x i32]]]]*, i32, i32, %struct.GetBitContext, i32, i32, i32, i8*, i32, [2 x [2 x i32]], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x i32], i32, i32, i32, i32, i8*, i32, [12 x i16*], [64 x i16]*, [8 x [64 x i16]]*, i32 (%struct.MpegEncContext*, [64 x i16]*)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, i32 (%struct.MpegEncContext*, i16*, i32, i32, i32*)*, i32 (%struct.MpegEncContext*, i16*, i32, i32, i32*)*, void (%struct.MpegEncContext*, i16*)* }
-	%struct.ParseContext = type { i8*, i32, i32, i32, i32, i32, i32, i32 }
-	%struct.Picture = type { [4 x i8*], [4 x i32], [4 x i8*], i32, i32, i64, i32, i32, i32, i32, i32, i8*, i32, i8*, [2 x [2 x i16]*], i32*, i8, i8*, [4 x i64], i32, i32, i32, i32, i32, %struct.AVPanScan*, i32, i32, i16*, [2 x i8*], [3 x i8*], [2 x [2 x i16]*], i32*, [2 x i32], i32, i32, i32, i32, [2 x [16 x i32]], [2 x i32], i32, i32, i16*, i16*, i8*, i32*, i32 }
-	%struct.Plane = type { i32, i32, [8 x [4 x %struct.SubBand]] }
-	%struct.Predictor = type { double, double, double }
-	%struct.PutBitContext = type { i32, i32, i8*, i8*, i8* }
-	%struct.RangeCoder = type { i32, i32, i32, i32, [256 x i8], [256 x i8], i8*, i8*, i8* }
-	%struct.RateControlContext = type { %struct.FILE*, i32, %struct.RateControlEntry*, double, [5 x %struct.Predictor], double, double, double, double, double, [5 x double], i32, i32, [5 x i64], [5 x i64], [5 x i64], [5 x i64], [5 x i32], i32, i8*, float, i32, %struct.AVEvalExpr* }
-	%struct.RateControlEntry = type { i32, float, i32, i32, i32, i32, i32, i64, i32, float, i32, i32, i32, i32, i32, i32 }
-	%struct.RcOverride = type { i32, i32, i32, float }
-	%struct.ScanTable = type { i8*, [64 x i8], [64 x i8] }
-	%struct.SnowContext = type { %struct.AVCodecContext*, %struct.RangeCoder, %struct.DSPContext, %struct.AVFrame, %struct.AVFrame, %struct.AVFrame, [8 x %struct.AVFrame], %struct.AVFrame, [32 x i8], [4224 x i8], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [8 x [2 x i16]*], [8 x i32*], i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [4 x %struct.Plane], %struct.BlockNode*, [1024 x i32], i32, %struct.slice_buffer, %struct.MpegEncContext }
-	%struct.SubBand = type { i32, i32, i32, i32, i32, i32*, i32, i32, i32, %struct.x_and_coeff*, %struct.SubBand*, [519 x [32 x i8]] }
-	%struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }
-	%struct.slice_buffer = type { i32**, i32**, i32, i32, i32, i32, i32* }
-	%struct.x_and_coeff = type { i16, i16 }
-
-define fastcc void @iterative_me(%struct.SnowContext* %s) {
-entry:
-	%state = alloca [4224 x i8], align 8		; <[4224 x i8]*> [#uses=0]
-	%best_rd4233 = alloca i32, align 4		; <i32*> [#uses=0]
-	%tmp21 = getelementptr %struct.SnowContext* %s, i32 0, i32 36		; <i32*> [#uses=2]
-	br label %bb4198
-
-bb79:		; preds = %bb4189.preheader
-	br i1 false, label %cond_next239, label %cond_true
-
-cond_true:		; preds = %bb79
-	ret void
-
-cond_next239:		; preds = %bb79
-	%tmp286 = alloca i8, i32 0		; <i8*> [#uses=0]
-	ret void
-
-bb4198:		; preds = %bb4189.preheader, %entry
-	br i1 false, label %bb4189.preheader, label %bb4204
-
-bb4189.preheader:		; preds = %bb4198
-	br i1 false, label %bb79, label %bb4198
-
-bb4204:		; preds = %bb4198
-	br i1 false, label %bb4221, label %cond_next4213
-
-cond_next4213:		; preds = %bb4204
-	ret void
-
-bb4221:		; preds = %bb4204
-	br i1 false, label %bb5242.preheader, label %UnifiedReturnBlock
-
-bb5242.preheader:		; preds = %bb4221
-	br label %bb5242
-
-bb4231:		; preds = %bb5233
-	%tmp4254.sum = add i32 0, 1		; <i32> [#uses=2]
-	br i1 false, label %bb4559, label %cond_next4622
-
-bb4559:		; preds = %bb4231
-	ret void
-
-cond_next4622:		; preds = %bb4231
-	%tmp4637 = load i16* null		; <i16> [#uses=1]
-	%tmp46374638 = sext i16 %tmp4637 to i32		; <i32> [#uses=1]
-	%tmp4642 = load i16* null		; <i16> [#uses=1]
-	%tmp46424643 = sext i16 %tmp4642 to i32		; <i32> [#uses=1]
-	%tmp4648 = load i16* null		; <i16> [#uses=1]
-	%tmp46484649 = sext i16 %tmp4648 to i32		; <i32> [#uses=1]
-	%tmp4653 = getelementptr %struct.BlockNode* null, i32 %tmp4254.sum, i32 0		; <i16*> [#uses=1]
-	%tmp4654 = load i16* %tmp4653		; <i16> [#uses=1]
-	%tmp46544655 = sext i16 %tmp4654 to i32		; <i32> [#uses=1]
-	%tmp4644 = add i32 %tmp46374638, 2		; <i32> [#uses=1]
-	%tmp4650 = add i32 %tmp4644, %tmp46424643		; <i32> [#uses=1]
-	%tmp4656 = add i32 %tmp4650, %tmp46484649		; <i32> [#uses=1]
-	%tmp4657 = add i32 %tmp4656, %tmp46544655		; <i32> [#uses=2]
-	%tmp4658 = ashr i32 %tmp4657, 2		; <i32> [#uses=1]
-	%tmp4662 = load i16* null		; <i16> [#uses=1]
-	%tmp46624663 = sext i16 %tmp4662 to i32		; <i32> [#uses=1]
-	%tmp4672 = getelementptr %struct.BlockNode* null, i32 0, i32 1		; <i16*> [#uses=1]
-	%tmp4673 = load i16* %tmp4672		; <i16> [#uses=1]
-	%tmp46734674 = sext i16 %tmp4673 to i32		; <i32> [#uses=1]
-	%tmp4678 = getelementptr %struct.BlockNode* null, i32 %tmp4254.sum, i32 1		; <i16*> [#uses=1]
-	%tmp4679 = load i16* %tmp4678		; <i16> [#uses=1]
-	%tmp46794680 = sext i16 %tmp4679 to i32		; <i32> [#uses=1]
-	%tmp4669 = add i32 %tmp46624663, 2		; <i32> [#uses=1]
-	%tmp4675 = add i32 %tmp4669, 0		; <i32> [#uses=1]
-	%tmp4681 = add i32 %tmp4675, %tmp46734674		; <i32> [#uses=1]
-	%tmp4682 = add i32 %tmp4681, %tmp46794680		; <i32> [#uses=2]
-	%tmp4683 = ashr i32 %tmp4682, 2		; <i32> [#uses=1]
-	%tmp4703 = load i32* %tmp21		; <i32> [#uses=1]
-	%tmp4707 = shl i32 %tmp4703, 0		; <i32> [#uses=4]
-	%tmp4710 = load %struct.BlockNode** null		; <%struct.BlockNode*> [#uses=6]
-	%tmp4713 = mul i32 %tmp4707, %mb_y.4		; <i32> [#uses=1]
-	%tmp4715 = add i32 %tmp4713, %mb_x.7		; <i32> [#uses=7]
-	store i8 0, i8* null
-	store i8 0, i8* null
-	%tmp47594761 = bitcast %struct.BlockNode* null to i8*		; <i8*> [#uses=2]
-	call void @llvm.memcpy.i32( i8* null, i8* %tmp47594761, i32 10, i32 0 )
-	%tmp4716.sum5775 = add i32 %tmp4715, 1		; <i32> [#uses=1]
-	%tmp4764 = getelementptr %struct.BlockNode* %tmp4710, i32 %tmp4716.sum5775		; <%struct.BlockNode*> [#uses=1]
-	%tmp47644766 = bitcast %struct.BlockNode* %tmp4764 to i8*		; <i8*> [#uses=1]
-	%tmp4716.sum5774 = add i32 %tmp4715, %tmp4707		; <i32> [#uses=0]
-	%tmp47704772 = bitcast %struct.BlockNode* null to i8*		; <i8*> [#uses=1]
-	%tmp4774 = add i32 %tmp4707, 1		; <i32> [#uses=1]
-	%tmp4716.sum5773 = add i32 %tmp4774, %tmp4715		; <i32> [#uses=1]
-	%tmp4777 = getelementptr %struct.BlockNode* %tmp4710, i32 %tmp4716.sum5773		; <%struct.BlockNode*> [#uses=1]
-	%tmp47774779 = bitcast %struct.BlockNode* %tmp4777 to i8*		; <i8*> [#uses=1]
-	%tmp4781 = icmp slt i32 %mb_x.7, 0		; <i1> [#uses=1]
-	%tmp4788 = or i1 %tmp4781, %tmp4784		; <i1> [#uses=2]
-	br i1 %tmp4788, label %cond_true4791, label %cond_next4794
-
-cond_true4791:		; preds = %cond_next4622
-	unreachable
-
-cond_next4794:		; preds = %cond_next4622
-	%tmp4797 = icmp slt i32 %mb_x.7, %tmp4707		; <i1> [#uses=1]
-	br i1 %tmp4797, label %cond_next4803, label %cond_true4800
-
-cond_true4800:		; preds = %cond_next4794
-	unreachable
-
-cond_next4803:		; preds = %cond_next4794
-	%tmp4825 = ashr i32 %tmp4657, 12		; <i32> [#uses=1]
-	shl i32 %tmp4682, 4		; <i32>:0 [#uses=1]
-	%tmp4828 = and i32 %0, -64		; <i32> [#uses=1]
-	%tmp4831 = getelementptr %struct.BlockNode* %tmp4710, i32 %tmp4715, i32 2		; <i8*> [#uses=0]
-	%tmp4826 = add i32 %tmp4828, %tmp4825		; <i32> [#uses=1]
-	%tmp4829 = add i32 %tmp4826, 0		; <i32> [#uses=1]
-	%tmp4835 = add i32 %tmp4829, 0		; <i32> [#uses=1]
-	store i32 %tmp4835, i32* null
-	%tmp48534854 = trunc i32 %tmp4658 to i16		; <i16> [#uses=1]
-	%tmp4856 = getelementptr %struct.BlockNode* %tmp4710, i32 %tmp4715, i32 0		; <i16*> [#uses=1]
-	store i16 %tmp48534854, i16* %tmp4856
-	%tmp48574858 = trunc i32 %tmp4683 to i16		; <i16> [#uses=1]
-	%tmp4860 = getelementptr %struct.BlockNode* %tmp4710, i32 %tmp4715, i32 1		; <i16*> [#uses=1]
-	store i16 %tmp48574858, i16* %tmp4860
-	%tmp4866 = getelementptr %struct.BlockNode* %tmp4710, i32 %tmp4715, i32 4		; <i8*> [#uses=0]
-	br i1 false, label %bb4933, label %cond_false4906
-
-cond_false4906:		; preds = %cond_next4803
-	call void @llvm.memcpy.i32( i8* %tmp47594761, i8* null, i32 10, i32 0 )
-	call void @llvm.memcpy.i32( i8* %tmp47644766, i8* null, i32 10, i32 0 )
-	call void @llvm.memcpy.i32( i8* %tmp47704772, i8* null, i32 10, i32 0 )
-	call void @llvm.memcpy.i32( i8* %tmp47774779, i8* null, i32 10, i32 0 )
-	br label %bb5215
-
-bb4933:		; preds = %bb5215, %cond_next4803
-	br i1 false, label %cond_true4944, label %bb5215
-
-cond_true4944:		; preds = %bb4933
-	%tmp4982 = load i32* %tmp21		; <i32> [#uses=1]
-	%tmp4986 = shl i32 %tmp4982, 0		; <i32> [#uses=2]
-	%tmp4992 = mul i32 %tmp4986, %mb_y.4		; <i32> [#uses=1]
-	%tmp4994 = add i32 %tmp4992, %mb_x.7		; <i32> [#uses=5]
-	%tmp4995.sum5765 = add i32 %tmp4994, 1		; <i32> [#uses=1]
-	%tmp5043 = getelementptr %struct.BlockNode* null, i32 %tmp4995.sum5765		; <%struct.BlockNode*> [#uses=1]
-	%tmp50435045 = bitcast %struct.BlockNode* %tmp5043 to i8*		; <i8*> [#uses=2]
-	call void @llvm.memcpy.i32( i8* null, i8* %tmp50435045, i32 10, i32 0 )
-	%tmp4995.sum5764 = add i32 %tmp4994, %tmp4986		; <i32> [#uses=1]
-	%tmp5049 = getelementptr %struct.BlockNode* null, i32 %tmp4995.sum5764		; <%struct.BlockNode*> [#uses=1]
-	%tmp50495051 = bitcast %struct.BlockNode* %tmp5049 to i8*		; <i8*> [#uses=2]
-	call void @llvm.memcpy.i32( i8* null, i8* %tmp50495051, i32 10, i32 0 )
-	%tmp4995.sum5763 = add i32 0, %tmp4994		; <i32> [#uses=1]
-	%tmp5056 = getelementptr %struct.BlockNode* null, i32 %tmp4995.sum5763		; <%struct.BlockNode*> [#uses=1]
-	%tmp50565058 = bitcast %struct.BlockNode* %tmp5056 to i8*		; <i8*> [#uses=1]
-	br i1 %tmp4788, label %cond_true5070, label %cond_next5073
-
-cond_true5070:		; preds = %cond_true4944
-	unreachable
-
-cond_next5073:		; preds = %cond_true4944
-	%tmp5139 = getelementptr %struct.BlockNode* null, i32 %tmp4994, i32 1		; <i16*> [#uses=0]
-	%tmp5145 = getelementptr %struct.BlockNode* null, i32 %tmp4994, i32 4		; <i8*> [#uses=0]
-	call void @llvm.memcpy.i32( i8* %tmp50435045, i8* null, i32 10, i32 0 )
-	call void @llvm.memcpy.i32( i8* %tmp50495051, i8* null, i32 10, i32 0 )
-	call void @llvm.memcpy.i32( i8* %tmp50565058, i8* null, i32 10, i32 0 )
-	br label %bb5215
-
-bb5215:		; preds = %cond_next5073, %bb4933, %cond_false4906
-	%i4232.3 = phi i32 [ 0, %cond_false4906 ], [ 0, %cond_next5073 ], [ 0, %bb4933 ]		; <i32> [#uses=1]
-	%tmp5217 = icmp slt i32 %i4232.3, 4		; <i1> [#uses=1]
-	br i1 %tmp5217, label %bb4933, label %bb5220
-
-bb5220:		; preds = %bb5215
-	br i1 false, label %bb5230, label %cond_true5226
-
-cond_true5226:		; preds = %bb5220
-	ret void
-
-bb5230:		; preds = %bb5220
-	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
-	br label %bb5233
-
-bb5233:		; preds = %bb5233.preheader, %bb5230
-	%indvar = phi i32 [ 0, %bb5233.preheader ], [ %indvar.next, %bb5230 ]		; <i32> [#uses=2]
-	%mb_x.7 = shl i32 %indvar, 1		; <i32> [#uses=4]
-	br i1 false, label %bb4231, label %bb5239
-
-bb5239:		; preds = %bb5233
-	%indvar.next37882 = add i32 %indvar37881, 1		; <i32> [#uses=1]
-	br label %bb5242
-
-bb5242:		; preds = %bb5239, %bb5242.preheader
-	%indvar37881 = phi i32 [ 0, %bb5242.preheader ], [ %indvar.next37882, %bb5239 ]		; <i32> [#uses=2]
-	%mb_y.4 = shl i32 %indvar37881, 1		; <i32> [#uses=3]
-	br i1 false, label %bb5233.preheader, label %bb5248
-
-bb5233.preheader:		; preds = %bb5242
-	%tmp4784 = icmp slt i32 %mb_y.4, 0		; <i1> [#uses=1]
-	br label %bb5233
-
-bb5248:		; preds = %bb5242
-	ret void
-
-UnifiedReturnBlock:		; preds = %bb4221
-	ret void
-}
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
diff --git a/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll b/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll
index 77418be..c9a8a67 100644
--- a/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll
@@ -188,11 +188,6 @@ bb231:		; preds = %bb226
 	ret void
 }
 
-	%struct.III_psy_xmin = type { [22 x double], [13 x [3 x double]] }
-	%struct.III_scalefac_t = type { [22 x i32], [13 x [3 x i32]] }
-	%struct.gr_info = type { i32, i32, i32, i32, i32, i32, i32, i32, [3 x i32], [3 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, [4 x i32] }
-	%struct.lame_global_flags = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, float, float, float, float, i32, i32, i32, i32, i32, i32, i32, i32 }
-
 define fastcc void @outer_loop2(%struct.lame_global_flags* %gfp, double* %xr, i32 %targ_bits, double* %best_noise, %struct.III_psy_xmin* %l3_xmin, i32* %l3_enc, %struct.III_scalefac_t* %scalefac, %struct.gr_info* %cod_info, i32 %ch) {
 entry:
 	%cod_info.20128.1 = getelementptr %struct.gr_info* %cod_info, i32 0, i32 20, i32 1		; <i32*> [#uses=1]
diff --git a/test/CodeGen/ARM/2009-03-07-SpillerBug.ll b/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
index 0ec17ae..377bbd2 100644
--- a/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
+++ b/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
@@ -1,16 +1,15 @@
 ; RUN: llc < %s -mtriple=armv6-apple-darwin9 -mattr=+vfp2
 ; rdar://6653182
 
-	%struct.ggBRDF = type { i32 (...)** }
-	%struct.ggPoint2 = type { [2 x double] }
-	%struct.ggPoint3 = type { [3 x double] }
-	%struct.ggSpectrum = type { [8 x float] }
-	%struct.ggSphere = type { %struct.ggPoint3, double }
-	%struct.mrDiffuseAreaSphereLuminaire = type { %struct.mrSphere, %struct.ggSpectrum }
-	%struct.mrDiffuseCosineSphereLuminaire = type { %struct.mrDiffuseAreaSphereLuminaire }
-	%struct.mrSphere = type { %struct.ggBRDF, %struct.ggSphere }
 
-declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
+%struct.ggBRDF = type { i32 (...)** }
+%struct.ggPoint2 = type { [2 x double] }
+%struct.ggPoint3 = type { [3 x double] }
+%struct.ggSpectrum = type { [8 x float] }
+%struct.ggSphere = type { %struct.ggPoint3, double }
+%struct.mrDiffuseAreaSphereLuminaire = type { %struct.mrSphere, %struct.ggSpectrum }
+%struct.mrDiffuseCosineSphereLuminaire = type { %struct.mrDiffuseAreaSphereLuminaire }
+%struct.mrSphere = type { %struct.ggBRDF, %struct.ggSphere }
 
 declare double @llvm.sqrt.f64(double) nounwind readonly
 
@@ -20,59 +19,61 @@ declare double @acos(double) nounwind readonly
 
 define i32 @_ZNK34mrDiffuseSolidAngleSphereLuminaire18selectVisiblePointERK8ggPoint3RK9ggVector3RK8ggPoint2dRS0_Rd(%struct.mrDiffuseCosineSphereLuminaire* nocapture %this, %struct.ggPoint3* nocapture %x, %struct.ggPoint3* nocapture %unnamed_arg, %struct.ggPoint2* nocapture %uv, double %unnamed_arg2, %struct.ggPoint3* nocapture %on_light, double* nocapture %invProb) nounwind {
 entry:
-	%0 = call double @llvm.sqrt.f64(double 0.000000e+00) nounwind		; <double> [#uses=4]
-	%1 = fcmp ult double 0.000000e+00, %0		; <i1> [#uses=1]
-	br i1 %1, label %bb3, label %bb7
+  %0 = call double @llvm.sqrt.f64(double 0.000000e+00) nounwind
+  %1 = fcmp ult double 0.000000e+00, %0
+  br i1 %1, label %bb3, label %bb7
 
-bb3:		; preds = %entry
-	%2 = fdiv double 1.000000e+00, 0.000000e+00		; <double> [#uses=1]
-	%3 = fmul double 0.000000e+00, %2		; <double> [#uses=2]
-	%4 = call double @llvm.sqrt.f64(double 0.000000e+00) nounwind		; <double> [#uses=1]
-	%5 = fdiv double 1.000000e+00, %4		; <double> [#uses=2]
-	%6 = fmul double %3, %5		; <double> [#uses=2]
-	%7 = fmul double 0.000000e+00, %5		; <double> [#uses=2]
-	%8 = fmul double %3, %7		; <double> [#uses=1]
-	%9 = fsub double %8, 0.000000e+00		; <double> [#uses=1]
-	%10 = fmul double 0.000000e+00, %6		; <double> [#uses=1]
-	%11 = fsub double 0.000000e+00, %10		; <double> [#uses=1]
-	%12 = fsub double -0.000000e+00, %11		; <double> [#uses=1]
-	%13 = fmul double %0, %0		; <double> [#uses=2]
-	%14 = fsub double %13, 0.000000e+00		; <double> [#uses=1]
-	%15 = call double @llvm.sqrt.f64(double %14)		; <double> [#uses=1]
-	%16 = fmul double 0.000000e+00, %15		; <double> [#uses=1]
-	%17 = fdiv double %16, %0		; <double> [#uses=1]
-	%18 = fadd double 0.000000e+00, %17		; <double> [#uses=1]
-	%19 = call double @acos(double %18) nounwind readonly		; <double> [#uses=1]
-	%20 = load double* null, align 4		; <double> [#uses=1]
-	%21 = fmul double %20, 0x401921FB54442D18		; <double> [#uses=1]
-	%22 = call double @sin(double %19) nounwind readonly		; <double> [#uses=2]
-	%23 = fmul double %22, 0.000000e+00		; <double> [#uses=2]
-	%24 = fmul double %6, %23		; <double> [#uses=1]
-	%25 = fmul double %7, %23		; <double> [#uses=1]
-	%26 = call double @sin(double %21) nounwind readonly		; <double> [#uses=1]
-	%27 = fmul double %22, %26		; <double> [#uses=2]
-	%28 = fmul double %9, %27		; <double> [#uses=1]
-	%29 = fmul double %27, %12		; <double> [#uses=1]
-	%30 = fadd double %24, %28		; <double> [#uses=1]
-	%31 = fadd double 0.000000e+00, %29		; <double> [#uses=1]
-	%32 = fadd double %25, 0.000000e+00		; <double> [#uses=1]
-	%33 = fadd double %30, 0.000000e+00		; <double> [#uses=1]
-	%34 = fadd double %31, 0.000000e+00		; <double> [#uses=1]
-	%35 = fadd double %32, 0.000000e+00		; <double> [#uses=1]
-	%36 = bitcast %struct.ggPoint3* %x to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32(i8* null, i8* %36, i32 24, i32 4) nounwind
-	store double %33, double* null, align 8
-	br i1 false, label %_Z20ggRaySphereIntersectRK6ggRay3RK8ggSphereddRd.exit, label %bb5.i.i.i
+bb3:                                              ; preds = %entry
+  %2 = fdiv double 1.000000e+00, 0.000000e+00
+  %3 = fmul double 0.000000e+00, %2
+  %4 = call double @llvm.sqrt.f64(double 0.000000e+00) nounwind
+  %5 = fdiv double 1.000000e+00, %4
+  %6 = fmul double %3, %5
+  %7 = fmul double 0.000000e+00, %5
+  %8 = fmul double %3, %7
+  %9 = fsub double %8, 0.000000e+00
+  %10 = fmul double 0.000000e+00, %6
+  %11 = fsub double 0.000000e+00, %10
+  %12 = fsub double -0.000000e+00, %11
+  %13 = fmul double %0, %0
+  %14 = fsub double %13, 0.000000e+00
+  %15 = call double @llvm.sqrt.f64(double %14)
+  %16 = fmul double 0.000000e+00, %15
+  %17 = fdiv double %16, %0
+  %18 = fadd double 0.000000e+00, %17
+  %19 = call double @acos(double %18) nounwind readonly
+  %20 = load double* null, align 4
+  %21 = fmul double %20, 0x401921FB54442D18
+  %22 = call double @sin(double %19) nounwind readonly
+  %23 = fmul double %22, 0.000000e+00
+  %24 = fmul double %6, %23
+  %25 = fmul double %7, %23
+  %26 = call double @sin(double %21) nounwind readonly
+  %27 = fmul double %22, %26
+  %28 = fmul double %9, %27
+  %29 = fmul double %27, %12
+  %30 = fadd double %24, %28
+  %31 = fadd double 0.000000e+00, %29
+  %32 = fadd double %25, 0.000000e+00
+  %33 = fadd double %30, 0.000000e+00
+  %34 = fadd double %31, 0.000000e+00
+  %35 = fadd double %32, 0.000000e+00
+  %36 = bitcast %struct.ggPoint3* %x to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* null, i8* %36, i32 24, i32 4, i1 false)
+  store double %33, double* null, align 8
+  br i1 false, label %_Z20ggRaySphereIntersectRK6ggRay3RK8ggSphereddRd.exit, label %bb5.i.i.i
 
-bb5.i.i.i:		; preds = %bb3
-	unreachable
+bb5.i.i.i:                                        ; preds = %bb3
+  unreachable
 
-_Z20ggRaySphereIntersectRK6ggRay3RK8ggSphereddRd.exit:		; preds = %bb3
-	%37 = fsub double %13, 0.000000e+00		; <double> [#uses=0]
-	%38 = fsub double -0.000000e+00, %34		; <double> [#uses=0]
-	%39 = fsub double -0.000000e+00, %35		; <double> [#uses=0]
-	ret i32 1
+_Z20ggRaySphereIntersectRK6ggRay3RK8ggSphereddRd.exit: ; preds = %bb3
+  %37 = fsub double %13, 0.000000e+00
+  %38 = fsub double -0.000000e+00, %34
+  %39 = fsub double -0.000000e+00, %35
+  ret i32 1
 
-bb7:		; preds = %entry
-	ret i32 0
+bb7:                                              ; preds = %entry
+  ret i32 0
 }
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll
deleted file mode 100644
index 27888d7..0000000
--- a/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll
+++ /dev/null
@@ -1,77 +0,0 @@
-; RUN: llc < %s -mtriple=armv6-apple-darwin
-
-	type { i32, i32, %struct.D_Sym**, [3 x %struct.D_Sym*] }		; type %0
-	type { i32, %struct.D_Reduction** }		; type %1
-	type { i32, %struct.D_RightEpsilonHint* }		; type %2
-	type { i32, %struct.D_ErrorRecoveryHint* }		; type %3
-	type { i32, i32, %struct.D_Reduction**, [3 x %struct.D_Reduction*] }		; type %4
-	%struct.D_ErrorRecoveryHint = type { i16, i16, i8* }
-	%struct.D_ParseNode = type { i32, %struct.d_loc_t, i8*, i8*, %struct.D_Scope*, void (%struct.D_Parser*, %struct.d_loc_t*, i8**)*, i8*, i8* }
-	%struct.D_Parser = type { i8*, void (%struct.D_Parser*, %struct.d_loc_t*, i8**)*, %struct.D_Scope*, void (%struct.D_Parser*)*, %struct.D_ParseNode* (%struct.D_Parser*, i32, %struct.D_ParseNode**)*, void (%struct.D_ParseNode*)*, %struct.d_loc_t, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
-	%struct.D_ParserTables = type { i32, %struct.D_State*, i16*, i32, i32, %struct.D_Symbol*, void (%struct.D_Parser*, %struct.d_loc_t*, i8**)*, i32, %struct.D_Pass*, i32 }
-	%struct.D_Pass = type { i8*, i32, i32, i32 }
-	%struct.D_Reduction = type { i16, i16, i32 (i8*, i8**, i32, i32, %struct.D_Parser*)*, i32 (i8*, i8**, i32, i32, %struct.D_Parser*)*, i16, i16, i32, i32, i32, i32, i32 (i8*, i8**, i32, i32, %struct.D_Parser*)** }
-	%struct.D_RightEpsilonHint = type { i16, i16, %struct.D_Reduction* }
-	%struct.D_Scope = type { i8, %struct.D_Sym*, %struct.D_SymHash*, %struct.D_Sym*, %struct.D_Scope*, %struct.D_Scope*, %struct.D_Scope*, %struct.D_Scope*, %struct.D_Scope* }
-	%struct.D_Shift = type { i16, i8, i8, i32, i32, i32 (i8*, i8**, i32, i32, %struct.D_Parser*)* }
-	%struct.D_State = type { i8*, i32, %1, %2, %3, %struct.D_Shift**, i32 (i8**, i32*, i32*, i16*, i32*, i8*, i32*)*, i8*, i8, i8, i8, i8*, %struct.D_Shift***, i32 }
-	%struct.D_Sym = type { i8*, i32, i32, %struct.D_Sym*, %struct.D_Sym*, i32 }
-	%struct.D_SymHash = type { i32, i32, %0 }
-	%struct.D_Symbol = type { i32, i8*, i32 }
-	%struct.PNode = type { i32, i32, i32, i32, %struct.D_Reduction*, %struct.D_Shift*, i32, %struct.VecPNode, i32, i8, i8, %struct.PNode*, %struct.PNode*, %struct.PNode*, %struct.PNode*, i8*, i8*, %struct.D_Scope*, i8*, %struct.D_ParseNode }
-	%struct.PNodeHash = type { %struct.PNode**, i32, i32, i32, %struct.PNode* }
-	%struct.Parser = type { %struct.D_Parser, i8*, i8*, %struct.D_ParserTables*, i32, i32, i32, i32, i32, i32, i32, %struct.PNodeHash, %struct.SNodeHash, %struct.Reduction*, %struct.Shift*, %struct.D_Scope*, %struct.SNode*, i32, %struct.Reduction*, %struct.Shift*, i32, %struct.PNode*, %struct.SNode*, %struct.ZNode*, %4, %struct.ShiftResult*, %struct.D_Shift, %struct.Parser*, i8* }
-	%struct.Reduction = type { %struct.ZNode*, %struct.SNode*, %struct.D_Reduction*, %struct.SNode*, i32, %struct.Reduction* }
-	%struct.SNode = type { %struct.D_State*, %struct.D_Scope*, i8*, %struct.d_loc_t, i32, %struct.PNode*, %struct.VecZNode, i32, %struct.SNode*, %struct.SNode* }
-	%struct.SNodeHash = type { %struct.SNode**, i32, i32, i32, %struct.SNode*, %struct.SNode* }
-	%struct.Shift = type { %struct.SNode*, %struct.Shift* }
-	%struct.ShiftResult = type { %struct.D_Shift*, %struct.d_loc_t }
-	%struct.VecPNode = type { i32, i32, %struct.PNode**, [3 x %struct.PNode*] }
-	%struct.VecSNode = type { i32, i32, %struct.SNode**, [3 x %struct.SNode*] }
-	%struct.VecZNode = type { i32, i32, %struct.ZNode**, [3 x %struct.ZNode*] }
-	%struct.ZNode = type { %struct.PNode*, %struct.VecSNode }
-	%struct.d_loc_t = type { i8*, i8*, i32, i32, i32 }
-
-declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
-
-define fastcc i32 @exhaustive_parse(%struct.Parser* %p, i32 %state) nounwind {
-entry:
-	store i8* undef, i8** undef, align 4
-	%0 = getelementptr %struct.Parser* %p, i32 0, i32 0, i32 6		; <%struct.d_loc_t*> [#uses=1]
-	%1 = bitcast %struct.d_loc_t* %0 to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32(i8* undef, i8* %1, i32 20, i32 4)
-	br label %bb10
-
-bb10:		; preds = %bb30, %bb29, %bb26, %entry
-	br i1 undef, label %bb18, label %bb20
-
-bb18:		; preds = %bb10
-	br i1 undef, label %bb20, label %bb19
-
-bb19:		; preds = %bb18
-	br label %bb20
-
-bb20:		; preds = %bb19, %bb18, %bb10
-	br i1 undef, label %bb21, label %bb22
-
-bb21:		; preds = %bb20
-	unreachable
-
-bb22:		; preds = %bb20
-	br i1 undef, label %bb24, label %bb26
-
-bb24:		; preds = %bb22
-	unreachable
-
-bb26:		; preds = %bb22
-	br i1 undef, label %bb10, label %bb29
-
-bb29:		; preds = %bb26
-	br i1 undef, label %bb10, label %bb30
-
-bb30:		; preds = %bb29
-	br i1 undef, label %bb31, label %bb10
-
-bb31:		; preds = %bb30
-	unreachable
-}
diff --git a/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll b/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
index 397eba4..8bde748 100644
--- a/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
+++ b/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
@@ -1,32 +1,35 @@
 ; RUN: llc < %s -mtriple=arm-apple-darwin9 -march=arm | FileCheck %s
 
+; CHECK: L_LSDA_0:
+
+
 %struct.A = type { i32* }
 
 define void @"\01-[MyFunction Name:]"() {
 entry:
-  %save_filt.1 = alloca i32                       ; <i32*> [#uses=2]
-  %save_eptr.0 = alloca i8*                       ; <i8**> [#uses=2]
-  %a = alloca %struct.A                           ; <%struct.A*> [#uses=3]
-  %eh_exception = alloca i8*                      ; <i8**> [#uses=5]
-  %eh_selector = alloca i32                       ; <i32*> [#uses=3]
-  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  call  void @_ZN1AC1Ev(%struct.A* %a)
-  invoke  void @_Z3barv()
+  %save_filt.1 = alloca i32
+  %save_eptr.0 = alloca i8*
+  %a = alloca %struct.A
+  %eh_exception = alloca i8*
+  %eh_selector = alloca i32
+  %"alloca point" = bitcast i32 0 to i32
+  call void @_ZN1AC1Ev(%struct.A* %a)
+  invoke void @_Z3barv()
           to label %invcont unwind label %lpad
 
 invcont:                                          ; preds = %entry
-  call  void @_ZN1AD1Ev(%struct.A* %a) nounwind
+  call void @_ZN1AD1Ev(%struct.A* %a) nounwind
   br label %return
 
 bb:                                               ; preds = %ppad
-  %eh_select = load i32* %eh_selector             ; <i32> [#uses=1]
+  %eh_select = load i32* %eh_selector
   store i32 %eh_select, i32* %save_filt.1, align 4
-  %eh_value = load i8** %eh_exception             ; <i8*> [#uses=1]
+  %eh_value = load i8** %eh_exception
   store i8* %eh_value, i8** %save_eptr.0, align 4
-  call  void @_ZN1AD1Ev(%struct.A* %a) nounwind
-  %0 = load i8** %save_eptr.0, align 4            ; <i8*> [#uses=1]
+  call void @_ZN1AD1Ev(%struct.A* %a) nounwind
+  %0 = load i8** %save_eptr.0, align 4
   store i8* %0, i8** %eh_exception, align 4
-  %1 = load i32* %save_filt.1, align 4            ; <i32> [#uses=1]
+  %1 = load i32* %save_filt.1, align 4
   store i32 %1, i32* %eh_selector, align 4
   br label %Unwind
 
@@ -34,10 +37,10 @@ return:                                           ; preds = %invcont
   ret void
 
 lpad:                                             ; preds = %entry
-  %eh_ptr = call i8* @llvm.eh.exception()         ; <i8*> [#uses=1]
+  %eh_ptr = call i8* @llvm.eh.exception()
   store i8* %eh_ptr, i8** %eh_exception
-  %eh_ptr1 = load i8** %eh_exception              ; <i8*> [#uses=1]
-  %eh_select2 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32(i8* %eh_ptr1, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 0) ; <i32> [#uses=1]
+  %eh_ptr1 = load i8** %eh_exception
+  %eh_select2 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr1, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 0)
   store i32 %eh_select2, i32* %eh_selector
   br label %ppad
 
@@ -45,20 +48,20 @@ ppad:                                             ; preds = %lpad
   br label %bb
 
 Unwind:                                           ; preds = %bb
-  %eh_ptr3 = load i8** %eh_exception              ; <i8*> [#uses=1]
-  call  void @_Unwind_SjLj_Resume(i8* %eh_ptr3)
+  %eh_ptr3 = load i8** %eh_exception
+  call void @_Unwind_SjLj_Resume(i8* %eh_ptr3)
   unreachable
 }
 
 define linkonce_odr void @_ZN1AC1Ev(%struct.A* %this) {
 entry:
-  %this_addr = alloca %struct.A*                  ; <%struct.A**> [#uses=2]
-  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %this_addr = alloca %struct.A*
+  %"alloca point" = bitcast i32 0 to i32
   store %struct.A* %this, %struct.A** %this_addr
-  %0 = call  i8* @_Znwm(i32 4)         ; <i8*> [#uses=1]
-  %1 = bitcast i8* %0 to i32*                     ; <i32*> [#uses=1]
-  %2 = load %struct.A** %this_addr, align 4       ; <%struct.A*> [#uses=1]
-  %3 = getelementptr inbounds %struct.A* %2, i32 0, i32 0 ; <i32**> [#uses=1]
+  %0 = call i8* @_Znwm(i32 4)
+  %1 = bitcast i8* %0 to i32*
+  %2 = load %struct.A** %this_addr, align 4
+  %3 = getelementptr inbounds %struct.A* %2, i32 0, i32 0
   store i32* %1, i32** %3, align 4
   br label %return
 
@@ -70,14 +73,14 @@ declare i8* @_Znwm(i32)
 
 define linkonce_odr void @_ZN1AD1Ev(%struct.A* %this) nounwind {
 entry:
-  %this_addr = alloca %struct.A*                  ; <%struct.A**> [#uses=2]
-  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %this_addr = alloca %struct.A*
+  %"alloca point" = bitcast i32 0 to i32
   store %struct.A* %this, %struct.A** %this_addr
-  %0 = load %struct.A** %this_addr, align 4       ; <%struct.A*> [#uses=1]
-  %1 = getelementptr inbounds %struct.A* %0, i32 0, i32 0 ; <i32**> [#uses=1]
-  %2 = load i32** %1, align 4                     ; <i32*> [#uses=1]
-  %3 = bitcast i32* %2 to i8*                     ; <i8*> [#uses=1]
-  call  void @_ZdlPv(i8* %3) nounwind
+  %0 = load %struct.A** %this_addr, align 4
+  %1 = getelementptr inbounds %struct.A* %0, i32 0, i32 0
+  %2 = load i32** %1, align 4
+  %3 = bitcast i32* %2 to i8*
+  call void @_ZdlPv(i8* %3) nounwind
   br label %bb
 
 bb:                                               ; preds = %entry
@@ -86,17 +89,16 @@ bb:                                               ; preds = %entry
 return:                                           ; preds = %bb
   ret void
 }
-;CHECK: L_LSDA_0:
 
 declare void @_ZdlPv(i8*) nounwind
 
 declare void @_Z3barv()
 
-declare i8* @llvm.eh.exception() nounwind
+declare i8* @llvm.eh.exception() nounwind readonly
 
-declare i32 @llvm.eh.selector.i32(i8*, i8*, ...) nounwind
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
 
-declare i32 @llvm.eh.typeid.for.i32(i8*) nounwind
+declare i32 @llvm.eh.typeid.for(i8*) nounwind
 
 declare i32 @__gxx_personality_sj0(...)
 
diff --git a/test/CodeGen/ARM/constants.ll b/test/CodeGen/ARM/constants.ll
index 7893df7..7b6c9d4 100644
--- a/test/CodeGen/ARM/constants.ll
+++ b/test/CodeGen/ARM/constants.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -disable-cgp-branch-opts | FileCheck %s
+; RUN: llc < %s -mtriple=armv4t-unknown-linux-gnueabi -disable-cgp-branch-opts | FileCheck %s
 
 define i32 @f1() {
 ; CHECK: f1
@@ -44,3 +44,16 @@ define void @f7(i32 %a) {
 r:
         ret void
 }
+
+%t1 = type { <3 x float>, <3 x float> }
+
+@const1 = global %t1 { <3 x float> zeroinitializer,
+                       <3 x float> <float 1.000000e+00,
+                                    float 2.000000e+00,
+                                    float 3.000000e+00> }, align 16
+; CHECK: const1
+; CHECK: .zero 16
+; CHECK: float 1.0
+; CHECK: float 2.0
+; CHECK: float 3.0
+; CHECK: .zero 4
diff --git a/test/CodeGen/ARM/debug-info-blocks.ll b/test/CodeGen/ARM/debug-info-blocks.ll
new file mode 100644
index 0000000..519c40e
--- /dev/null
+++ b/test/CodeGen/ARM/debug-info-blocks.ll
@@ -0,0 +1,245 @@
+; RUN: llc -O0 < %s | FileCheck %s
+; CHECK: @DEBUG_VALUE: mydata <- [sp+#8]+#0
+; Radar 9331779
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-macosx10.7.0"
+
+%0 = type opaque
+%1 = type { [4 x i32] }
+%2 = type <{ i8*, i32, i32, i8*, %struct.Re*, i8*, %3*, %struct.my_struct* }>
+%3 = type opaque
+%struct.CP = type { float, float }
+%struct.CR = type { %struct.CP, %struct.CP }
+%struct.Re = type { i32, i32 }
+%struct.__block_byref_mydata = type { i8*, %struct.__block_byref_mydata*, i32, i32, i8*, i8*, %0* }
+%struct.my_struct = type opaque
+
+@"\01L_OBJC_SELECTOR_REFERENCES_13" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"OBJC_IVAR_$_MyWork._bounds" = external hidden global i32, section "__DATA, __objc_const", align 4
+@"OBJC_IVAR_$_MyWork._data" = external hidden global i32, section "__DATA, __objc_const", align 4
+@"\01L_OBJC_SELECTOR_REFERENCES_222" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare i8* @objc_msgSend(i8*, i8*, ...)
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %loadedMydata, [4 x i32] %bounds.coerce0, [4 x i32] %data.coerce0) ssp {
+  %1 = alloca %0*, align 4
+  %bounds = alloca %struct.CR, align 4
+  %data = alloca %struct.CR, align 4
+  call void @llvm.dbg.value(metadata !{i8* %.block_descriptor}, i64 0, metadata !27), !dbg !129
+  store %0* %loadedMydata, %0** %1, align 4
+  call void @llvm.dbg.declare(metadata !{%0** %1}, metadata !130), !dbg !131
+  %2 = bitcast %struct.CR* %bounds to %1*
+  %3 = getelementptr %1* %2, i32 0, i32 0
+  store [4 x i32] %bounds.coerce0, [4 x i32]* %3
+  call void @llvm.dbg.declare(metadata !{%struct.CR* %bounds}, metadata !132), !dbg !133
+  %4 = bitcast %struct.CR* %data to %1*
+  %5 = getelementptr %1* %4, i32 0, i32 0
+  store [4 x i32] %data.coerce0, [4 x i32]* %5
+  call void @llvm.dbg.declare(metadata !{%struct.CR* %data}, metadata !134), !dbg !135
+  %6 = bitcast i8* %.block_descriptor to %2*
+  %7 = getelementptr inbounds %2* %6, i32 0, i32 6
+  call void @llvm.dbg.declare(metadata !{%2* %6}, metadata !136), !dbg !137
+  call void @llvm.dbg.declare(metadata !{%2* %6}, metadata !138), !dbg !137
+  call void @llvm.dbg.declare(metadata !{%2* %6}, metadata !139), !dbg !140
+  %8 = load %0** %1, align 4, !dbg !141
+  %9 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_13", !dbg !141
+  %10 = bitcast %0* %8 to i8*, !dbg !141
+  %11 = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %10, i8* %9), !dbg !141
+  %12 = bitcast i8* %11 to %0*, !dbg !141
+  %13 = getelementptr inbounds %2* %6, i32 0, i32 5, !dbg !141
+  %14 = load i8** %13, !dbg !141
+  %15 = bitcast i8* %14 to %struct.__block_byref_mydata*, !dbg !141
+  %16 = getelementptr inbounds %struct.__block_byref_mydata* %15, i32 0, i32 1, !dbg !141
+  %17 = load %struct.__block_byref_mydata** %16, !dbg !141
+  %18 = getelementptr inbounds %struct.__block_byref_mydata* %17, i32 0, i32 6, !dbg !141
+  store %0* %12, %0** %18, align 4, !dbg !141
+  %19 = getelementptr inbounds %2* %6, i32 0, i32 6, !dbg !143
+  %20 = load %3** %19, align 4, !dbg !143
+  %21 = load i32* @"OBJC_IVAR_$_MyWork._data", !dbg !143
+  %22 = bitcast %3* %20 to i8*, !dbg !143
+  %23 = getelementptr inbounds i8* %22, i32 %21, !dbg !143
+  %24 = bitcast i8* %23 to %struct.CR*, !dbg !143
+  %25 = bitcast %struct.CR* %24 to i8*, !dbg !143
+  %26 = bitcast %struct.CR* %data to i8*, !dbg !143
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %25, i8* %26, i32 16, i32 4, i1 false), !dbg !143
+  %27 = getelementptr inbounds %2* %6, i32 0, i32 6, !dbg !144
+  %28 = load %3** %27, align 4, !dbg !144
+  %29 = load i32* @"OBJC_IVAR_$_MyWork._bounds", !dbg !144
+  %30 = bitcast %3* %28 to i8*, !dbg !144
+  %31 = getelementptr inbounds i8* %30, i32 %29, !dbg !144
+  %32 = bitcast i8* %31 to %struct.CR*, !dbg !144
+  %33 = bitcast %struct.CR* %32 to i8*, !dbg !144
+  %34 = bitcast %struct.CR* %bounds to i8*, !dbg !144
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %33, i8* %34, i32 16, i32 4, i1 false), !dbg !144
+  %35 = getelementptr inbounds %2* %6, i32 0, i32 6, !dbg !145
+  %36 = load %3** %35, align 4, !dbg !145
+  %37 = getelementptr inbounds %2* %6, i32 0, i32 5, !dbg !145
+  %38 = load i8** %37, !dbg !145
+  %39 = bitcast i8* %38 to %struct.__block_byref_mydata*, !dbg !145
+  %40 = getelementptr inbounds %struct.__block_byref_mydata* %39, i32 0, i32 1, !dbg !145
+  %41 = load %struct.__block_byref_mydata** %40, !dbg !145
+  %42 = getelementptr inbounds %struct.__block_byref_mydata* %41, i32 0, i32 6, !dbg !145
+  %43 = load %0** %42, align 4, !dbg !145
+  %44 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_222", !dbg !145
+  %45 = bitcast %3* %36 to i8*, !dbg !145
+  call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, %0*)*)(i8* %45, i8* %44, %0* %43), !dbg !145
+  ret void, !dbg !146
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.dbg.enum = !{!1, !1, !5, !5, !9, !14, !19, !19, !14, !14, !14, !19, !19, !19}
+!llvm.dbg.sp = !{!23}
+
+!0 = metadata !{i32 589841, i32 0, i32 16, metadata !"MyLibrary.i", metadata !"/Volumes/Sandbox/llvm", metadata !"Apple clang version 2.1", i1 true, i1 false, metadata !"", i32 2} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 589828, metadata !0, metadata !"", metadata !2, i32 248, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !3, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!2 = metadata !{i32 589865, metadata !"header.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 589864, metadata !"Ver1", i64 0} ; [ DW_TAG_enumerator ]
+!5 = metadata !{i32 589828, metadata !0, metadata !"Mode", metadata !6, i32 79, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !7, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!6 = metadata !{i32 589865, metadata !"header2.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!7 = metadata !{metadata !8}
+!8 = metadata !{i32 589864, metadata !"One", i64 0} ; [ DW_TAG_enumerator ]
+!9 = metadata !{i32 589828, metadata !0, metadata !"", metadata !10, i32 15, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !11, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!10 = metadata !{i32 589865, metadata !"header3.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!11 = metadata !{metadata !12, metadata !13}
+!12 = metadata !{i32 589864, metadata !"Unknown", i64 0} ; [ DW_TAG_enumerator ]
+!13 = metadata !{i32 589864, metadata !"Known", i64 1} ; [ DW_TAG_enumerator ]
+!14 = metadata !{i32 589828, metadata !0, metadata !"", metadata !15, i32 20, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !16, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!15 = metadata !{i32 589865, metadata !"Private.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!16 = metadata !{metadata !17, metadata !18}
+!17 = metadata !{i32 589864, metadata !"Single", i64 0} ; [ DW_TAG_enumerator ]
+!18 = metadata !{i32 589864, metadata !"Double", i64 1} ; [ DW_TAG_enumerator ]
+!19 = metadata !{i32 589828, metadata !0, metadata !"", metadata !20, i32 14, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !21, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!20 = metadata !{i32 589865, metadata !"header4.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!21 = metadata !{metadata !22}
+!22 = metadata !{i32 589864, metadata !"Eleven", i64 0} ; [ DW_TAG_enumerator ]
+!23 = metadata !{i32 589870, i32 0, metadata !24, metadata !"foobar_func_block_invoke_0", metadata !"foobar_func_block_invoke_0", metadata !"", metadata !24, i32 609, metadata !25, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (i8*, %0*, [4 x i32], [4 x i32])* @foobar_func_block_invoke_0, null, null} ; [ DW_TAG_subprogram ]
+!24 = metadata !{i32 589865, metadata !"MyLibrary.m", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!25 = metadata !{i32 589845, metadata !24, metadata !"", metadata !24, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !26, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!26 = metadata !{null}
+!27 = metadata !{i32 590081, metadata !23, metadata !".block_descriptor", metadata !24, i32 16777825, metadata !28, i32 64} ; [ DW_TAG_arg_variable ]
+!28 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 0, i64 0, i32 0, metadata !29} ; [ DW_TAG_pointer_type ]
+!29 = metadata !{i32 589843, metadata !24, metadata !"__block_literal_14", metadata !24, i32 609, i64 256, i64 32, i32 0, i32 0, i32 0, metadata !30, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!30 = metadata !{metadata !31, metadata !33, metadata !35, metadata !36, metadata !37, metadata !48, metadata !89, metadata !124}
+!31 = metadata !{i32 589837, metadata !24, metadata !"__isa", metadata !24, i32 609, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ]
+!32 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!33 = metadata !{i32 589837, metadata !24, metadata !"__flags", metadata !24, i32 609, i64 32, i64 32, i64 32, i32 0, metadata !34} ; [ DW_TAG_member ]
+!34 = metadata !{i32 589860, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!35 = metadata !{i32 589837, metadata !24, metadata !"__reserved", metadata !24, i32 609, i64 32, i64 32, i64 64, i32 0, metadata !34} ; [ DW_TAG_member ]
+!36 = metadata !{i32 589837, metadata !24, metadata !"__FuncPtr", metadata !24, i32 609, i64 32, i64 32, i64 96, i32 0, metadata !32} ; [ DW_TAG_member ]
+!37 = metadata !{i32 589837, metadata !24, metadata !"__descriptor", metadata !24, i32 609, i64 32, i64 32, i64 128, i32 0, metadata !38} ; [ DW_TAG_member ]
+!38 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !39} ; [ DW_TAG_pointer_type ]
+!39 = metadata !{i32 589843, metadata !0, metadata !"__block_descriptor_withcopydispose", metadata !40, i32 307, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !41, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!40 = metadata !{i32 589865, metadata !"MyLibrary.i", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!41 = metadata !{metadata !42, metadata !44, metadata !45, metadata !47}
+!42 = metadata !{i32 589837, metadata !40, metadata !"reserved", metadata !40, i32 307, i64 32, i64 32, i64 0, i32 0, metadata !43} ; [ DW_TAG_member ]
+!43 = metadata !{i32 589860, metadata !0, metadata !"long unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!44 = metadata !{i32 589837, metadata !40, metadata !"Size", metadata !40, i32 307, i64 32, i64 32, i64 32, i32 0, metadata !43} ; [ DW_TAG_member ]
+!45 = metadata !{i32 589837, metadata !40, metadata !"CopyFuncPtr", metadata !40, i32 307, i64 32, i64 32, i64 64, i32 0, metadata !46} ; [ DW_TAG_member ]
+!46 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ]
+!47 = metadata !{i32 589837, metadata !40, metadata !"DestroyFuncPtr", metadata !40, i32 307, i64 32, i64 32, i64 96, i32 0, metadata !46} ; [ DW_TAG_member ]
+!48 = metadata !{i32 589837, metadata !24, metadata !"mydata", metadata !24, i32 609, i64 32, i64 32, i64 160, i32 0, metadata !49} ; [ DW_TAG_member ]
+!49 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 0, i64 0, i32 0, metadata !50} ; [ DW_TAG_pointer_type ]
+!50 = metadata !{i32 589843, metadata !24, metadata !"", metadata !24, i32 0, i64 224, i64 0, i32 0, i32 16, i32 0, metadata !51, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!51 = metadata !{metadata !52, metadata !53, metadata !54, metadata !55, metadata !56, metadata !57, metadata !58}
+!52 = metadata !{i32 589837, metadata !24, metadata !"__isa", metadata !24, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ]
+!53 = metadata !{i32 589837, metadata !24, metadata !"__forwarding", metadata !24, i32 0, i64 32, i64 32, i64 32, i32 0, metadata !32} ; [ DW_TAG_member ]
+!54 = metadata !{i32 589837, metadata !24, metadata !"__flags", metadata !24, i32 0, i64 32, i64 32, i64 64, i32 0, metadata !34} ; [ DW_TAG_member ]
+!55 = metadata !{i32 589837, metadata !24, metadata !"__size", metadata !24, i32 0, i64 32, i64 32, i64 96, i32 0, metadata !34} ; [ DW_TAG_member ]
+!56 = metadata !{i32 589837, metadata !24, metadata !"__copy_helper", metadata !24, i32 0, i64 32, i64 32, i64 128, i32 0, metadata !32} ; [ DW_TAG_member ]
+!57 = metadata !{i32 589837, metadata !24, metadata !"__destroy_helper", metadata !24, i32 0, i64 32, i64 32, i64 160, i32 0, metadata !32} ; [ DW_TAG_member ]
+!58 = metadata !{i32 589837, metadata !24, metadata !"mydata", metadata !24, i32 0, i64 32, i64 32, i64 192, i32 0, metadata !59} ; [ DW_TAG_member ]
+!59 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !60} ; [ DW_TAG_pointer_type ]
+!60 = metadata !{i32 589843, metadata !24, metadata !"UIMydata", metadata !61, i32 26, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !62, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!61 = metadata !{i32 589865, metadata !"header11.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!62 = metadata !{metadata !63, metadata !71, metadata !75, metadata !79}
+!63 = metadata !{i32 589852, metadata !60, null, metadata !61, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ]
+!64 = metadata !{i32 589843, metadata !40, metadata !"NSO", metadata !65, i32 66, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !66, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!65 = metadata !{i32 589865, metadata !"NSO.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!66 = metadata !{metadata !67}
+!67 = metadata !{i32 589837, metadata !65, metadata !"isa", metadata !65, i32 67, i64 32, i64 32, i64 0, i32 2, metadata !68, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!68 = metadata !{i32 589846, metadata !0, metadata !"Class", metadata !40, i32 197, i64 0, i64 0, i64 0, i32 0, metadata !69} ; [ DW_TAG_typedef ]
+!69 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !70} ; [ DW_TAG_pointer_type ]
+!70 = metadata !{i32 589843, metadata !0, metadata !"objc_class", metadata !40, i32 0, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!71 = metadata !{i32 589837, metadata !61, metadata !"_mydataRef", metadata !61, i32 28, i64 32, i64 32, i64 32, i32 0, metadata !72, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!72 = metadata !{i32 589846, metadata !0, metadata !"CFTypeRef", metadata !24, i32 313, i64 0, i64 0, i64 0, i32 0, metadata !73} ; [ DW_TAG_typedef ]
+!73 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !74} ; [ DW_TAG_pointer_type ]
+!74 = metadata !{i32 589862, metadata !0, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, null} ; [ DW_TAG_const_type ]
+!75 = metadata !{i32 589837, metadata !61, metadata !"_scale", metadata !61, i32 29, i64 32, i64 32, i64 64, i32 0, metadata !76, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!76 = metadata !{i32 589846, metadata !0, metadata !"Float", metadata !77, i32 89, i64 0, i64 0, i64 0, i32 0, metadata !78} ; [ DW_TAG_typedef ]
+!77 = metadata !{i32 589865, metadata !"header12.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!78 = metadata !{i32 589860, metadata !0, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!79 = metadata !{i32 589837, metadata !61, metadata !"_mydataFlags", metadata !61, i32 37, i64 8, i64 8, i64 96, i32 0, metadata !80, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!80 = metadata !{i32 589843, metadata !0, metadata !"", metadata !61, i32 30, i64 8, i64 8, i32 0, i32 0, i32 0, metadata !81, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!81 = metadata !{metadata !82, metadata !84, metadata !85, metadata !86, metadata !87, metadata !88}
+!82 = metadata !{i32 589837, metadata !61, metadata !"named", metadata !61, i32 31, i64 1, i64 32, i64 0, i32 0, metadata !83} ; [ DW_TAG_member ]
+!83 = metadata !{i32 589860, metadata !0, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!84 = metadata !{i32 589837, metadata !61, metadata !"mydataO", metadata !61, i32 32, i64 3, i64 32, i64 1, i32 0, metadata !83} ; [ DW_TAG_member ]
+!85 = metadata !{i32 589837, metadata !61, metadata !"cached", metadata !61, i32 33, i64 1, i64 32, i64 4, i32 0, metadata !83} ; [ DW_TAG_member ]
+!86 = metadata !{i32 589837, metadata !61, metadata !"hasBeenCached", metadata !61, i32 34, i64 1, i64 32, i64 5, i32 0, metadata !83} ; [ DW_TAG_member ]
+!87 = metadata !{i32 589837, metadata !61, metadata !"hasPattern", metadata !61, i32 35, i64 1, i64 32, i64 6, i32 0, metadata !83} ; [ DW_TAG_member ]
+!88 = metadata !{i32 589837, metadata !61, metadata !"isCIMydata", metadata !61, i32 36, i64 1, i64 32, i64 7, i32 0, metadata !83} ; [ DW_TAG_member ]
+!89 = metadata !{i32 589837, metadata !24, metadata !"self", metadata !24, i32 609, i64 32, i64 32, i64 192, i32 0, metadata !90} ; [ DW_TAG_member ]
+!90 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !91} ; [ DW_TAG_pointer_type ]
+!91 = metadata !{i32 589843, metadata !40, metadata !"MyWork", metadata !24, i32 36, i64 384, i64 32, i32 0, i32 0, i32 0, metadata !92, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!92 = metadata !{metadata !93, metadata !98, metadata !101, metadata !107, metadata !123}
+!93 = metadata !{i32 589852, metadata !91, null, metadata !24, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !94} ; [ DW_TAG_inheritance ]
+!94 = metadata !{i32 589843, metadata !40, metadata !"twork", metadata !95, i32 43, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !96, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!95 = metadata !{i32 589865, metadata !"header13.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!96 = metadata !{metadata !97}
+!97 = metadata !{i32 589852, metadata !94, null, metadata !95, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ]
+!98 = metadata !{i32 589837, metadata !24, metadata !"_itemID", metadata !24, i32 38, i64 64, i64 32, i64 32, i32 1, metadata !99, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!99 = metadata !{i32 589846, metadata !0, metadata !"uint64_t", metadata !40, i32 55, i64 0, i64 0, i64 0, i32 0, metadata !100} ; [ DW_TAG_typedef ]
+!100 = metadata !{i32 589860, metadata !0, metadata !"long long unsigned int", null, i32 0, i64 64, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!101 = metadata !{i32 589837, metadata !24, metadata !"_library", metadata !24, i32 39, i64 32, i64 32, i64 96, i32 1, metadata !102, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!102 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !103} ; [ DW_TAG_pointer_type ]
+!103 = metadata !{i32 589843, metadata !40, metadata !"MyLibrary2", metadata !104, i32 22, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !105, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!104 = metadata !{i32 589865, metadata !"header14.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!105 = metadata !{metadata !106}
+!106 = metadata !{i32 589852, metadata !103, null, metadata !104, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ]
+!107 = metadata !{i32 589837, metadata !24, metadata !"_bounds", metadata !24, i32 40, i64 128, i64 32, i64 128, i32 1, metadata !108, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!108 = metadata !{i32 589846, metadata !0, metadata !"CR", metadata !40, i32 33, i64 0, i64 0, i64 0, i32 0, metadata !109} ; [ DW_TAG_typedef ]
+!109 = metadata !{i32 589843, metadata !0, metadata !"CR", metadata !77, i32 29, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !110, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!110 = metadata !{metadata !111, metadata !117}
+!111 = metadata !{i32 589837, metadata !77, metadata !"origin", metadata !77, i32 30, i64 64, i64 32, i64 0, i32 0, metadata !112} ; [ DW_TAG_member ]
+!112 = metadata !{i32 589846, metadata !0, metadata !"CP", metadata !77, i32 17, i64 0, i64 0, i64 0, i32 0, metadata !113} ; [ DW_TAG_typedef ]
+!113 = metadata !{i32 589843, metadata !0, metadata !"CP", metadata !77, i32 13, i64 64, i64 32, i32 0, i32 0, i32 0, metadata !114, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!114 = metadata !{metadata !115, metadata !116}
+!115 = metadata !{i32 589837, metadata !77, metadata !"x", metadata !77, i32 14, i64 32, i64 32, i64 0, i32 0, metadata !76} ; [ DW_TAG_member ]
+!116 = metadata !{i32 589837, metadata !77, metadata !"y", metadata !77, i32 15, i64 32, i64 32, i64 32, i32 0, metadata !76} ; [ DW_TAG_member ]
+!117 = metadata !{i32 589837, metadata !77, metadata !"size", metadata !77, i32 31, i64 64, i64 32, i64 64, i32 0, metadata !118} ; [ DW_TAG_member ]
+!118 = metadata !{i32 589846, metadata !0, metadata !"Size", metadata !77, i32 25, i64 0, i64 0, i64 0, i32 0, metadata !119} ; [ DW_TAG_typedef ]
+!119 = metadata !{i32 589843, metadata !0, metadata !"Size", metadata !77, i32 21, i64 64, i64 32, i32 0, i32 0, i32 0, metadata !120, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!120 = metadata !{metadata !121, metadata !122}
+!121 = metadata !{i32 589837, metadata !77, metadata !"width", metadata !77, i32 22, i64 32, i64 32, i64 0, i32 0, metadata !76} ; [ DW_TAG_member ]
+!122 = metadata !{i32 589837, metadata !77, metadata !"height", metadata !77, i32 23, i64 32, i64 32, i64 32, i32 0, metadata !76} ; [ DW_TAG_member ]
+!123 = metadata !{i32 589837, metadata !24, metadata !"_data", metadata !24, i32 40, i64 128, i64 32, i64 256, i32 1, metadata !108, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!124 = metadata !{i32 589837, metadata !24, metadata !"semi", metadata !24, i32 609, i64 32, i64 32, i64 224, i32 0, metadata !125} ; [ DW_TAG_member ]
+!125 = metadata !{i32 589846, metadata !0, metadata !"d_t", metadata !24, i32 35, i64 0, i64 0, i64 0, i32 0, metadata !126} ; [ DW_TAG_typedef ]
+!126 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !127} ; [ DW_TAG_pointer_type ]
+!127 = metadata !{i32 589843, metadata !0, metadata !"my_struct", metadata !128, i32 49, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!128 = metadata !{i32 589865, metadata !"header15.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!129 = metadata !{i32 609, i32 144, metadata !23, null}
+!130 = metadata !{i32 590081, metadata !23, metadata !"loadedMydata", metadata !24, i32 33555041, metadata !59, i32 0} ; [ DW_TAG_arg_variable ]
+!131 = metadata !{i32 609, i32 155, metadata !23, null}
+!132 = metadata !{i32 590081, metadata !23, metadata !"bounds", metadata !24, i32 50332257, metadata !108, i32 0} ; [ DW_TAG_arg_variable ]
+!133 = metadata !{i32 609, i32 175, metadata !23, null}
+!134 = metadata !{i32 590081, metadata !23, metadata !"data", metadata !24, i32 67109473, metadata !108, i32 0} ; [ DW_TAG_arg_variable ]
+!135 = metadata !{i32 609, i32 190, metadata !23, null}
+!136 = metadata !{i32 590080, metadata !23, metadata !"mydata", metadata !24, i32 604, metadata !50, i32 0, i64 1, i64 20, i64 2, i64 1, i64 4, i64 2, i64 1, i64 24} ; [ DW_TAG_auto_variable ]
+!137 = metadata !{i32 604, i32 49, metadata !23, null}
+!138 = metadata !{i32 590080, metadata !23, metadata !"self", metadata !40, i32 604, metadata !90, i32 0, i64 1, i64 24} ; [ DW_TAG_auto_variable ]
+!139 = metadata !{i32 590080, metadata !23, metadata !"semi", metadata !24, i32 607, metadata !125, i32 0, i64 1, i64 28} ; [ DW_TAG_auto_variable ]
+!140 = metadata !{i32 607, i32 30, metadata !23, null}
+!141 = metadata !{i32 610, i32 17, metadata !142, null}
+!142 = metadata !{i32 589835, metadata !23, i32 609, i32 200, metadata !24, i32 94} ; [ DW_TAG_lexical_block ]
+!143 = metadata !{i32 611, i32 17, metadata !142, null}
+!144 = metadata !{i32 612, i32 17, metadata !142, null}
+!145 = metadata !{i32 613, i32 17, metadata !142, null}
+!146 = metadata !{i32 615, i32 13, metadata !142, null}
diff --git a/test/CodeGen/ARM/dyn-stackalloc.ll b/test/CodeGen/ARM/dyn-stackalloc.ll
index 92e2d13..de2820e 100644
--- a/test/CodeGen/ARM/dyn-stackalloc.ll
+++ b/test/CodeGen/ARM/dyn-stackalloc.ll
@@ -1,48 +1,48 @@
 ; RUN: llc < %s -march=arm
 
-	%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
-	%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
+%struct.comment = type { i8**, i32*, i32, i8* }
+%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
+%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
+
+@str215 = external global [2 x i8]
 
 define void @t1(%struct.state* %v) {
-	%tmp6 = load i32* null
-	%tmp8 = alloca float, i32 %tmp6
-	store i32 1, i32* null
-	br i1 false, label %bb123.preheader, label %return
-
-bb123.preheader:
-	br i1 false, label %bb43, label %return
-
-bb43:
-	call fastcc void @f1( float* %tmp8, float* null, i32 0 )
-	%tmp70 = load i32* null
-	%tmp85 = getelementptr float* %tmp8, i32 0
-	call fastcc void @f2( float* null, float* null, float* %tmp85, i32 %tmp70 )
-	ret void
-
-return:
-	ret void
+  %tmp6 = load i32* null
+  %tmp8 = alloca float, i32 %tmp6
+  store i32 1, i32* null
+  br i1 false, label %bb123.preheader, label %return
+
+bb123.preheader:                                  ; preds = %0
+  br i1 false, label %bb43, label %return
+
+bb43:                                             ; preds = %bb123.preheader
+  call fastcc void @f1(float* %tmp8, float* null, i32 0)
+  %tmp70 = load i32* null
+  %tmp85 = getelementptr float* %tmp8, i32 0
+  call fastcc void @f2(float* null, float* null, float* %tmp85, i32 %tmp70)
+  ret void
+
+return:                                           ; preds = %bb123.preheader, %0
+  ret void
 }
 
 declare fastcc void @f1(float*, float*, i32)
 
 declare fastcc void @f2(float*, float*, float*, i32)
 
-	%struct.comment = type { i8**, i32*, i32, i8* }
-@str215 = external global [2 x i8]
-
 define void @t2(%struct.comment* %vc, i8* %tag, i8* %contents) {
-	%tmp1 = call i32 @strlen( i8* %tag )
-	%tmp3 = call i32 @strlen( i8* %contents )
-	%tmp4 = add i32 %tmp1, 2
-	%tmp5 = add i32 %tmp4, %tmp3
-	%tmp6 = alloca i8, i32 %tmp5
-	%tmp9 = call i8* @strcpy( i8* %tmp6, i8* %tag )
-	%tmp6.len = call i32 @strlen( i8* %tmp6 )
-	%tmp6.indexed = getelementptr i8* %tmp6, i32 %tmp6.len
-	call void @llvm.memcpy.i32( i8* %tmp6.indexed, i8* getelementptr ([2 x i8]* @str215, i32 0, i32 0), i32 2, i32 1 )
-	%tmp15 = call i8* @strcat( i8* %tmp6, i8* %contents )
-	call fastcc void @comment_add( %struct.comment* %vc, i8* %tmp6 )
-	ret void
+  %tmp1 = call i32 @strlen(i8* %tag)
+  %tmp3 = call i32 @strlen(i8* %contents)
+  %tmp4 = add i32 %tmp1, 2
+  %tmp5 = add i32 %tmp4, %tmp3
+  %tmp6 = alloca i8, i32 %tmp5
+  %tmp9 = call i8* @strcpy(i8* %tmp6, i8* %tag)
+  %tmp6.len = call i32 @strlen(i8* %tmp6)
+  %tmp6.indexed = getelementptr i8* %tmp6, i32 %tmp6.len
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp6.indexed, i8* getelementptr inbounds ([2 x i8]* @str215, i32 0, i32 0), i32 2, i32 1, i1 false)
+  %tmp15 = call i8* @strcat(i8* %tmp6, i8* %contents)
+  call fastcc void @comment_add(%struct.comment* %vc, i8* %tmp6)
+  ret void
 }
 
 declare i32 @strlen(i8*)
@@ -51,6 +51,6 @@ declare i8* @strcat(i8*, i8*)
 
 declare fastcc void @comment_add(%struct.comment*, i8*)
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
 declare i8* @strcpy(i8*, i8*)
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll
index 5bae037..30b9f59 100644
--- a/test/CodeGen/ARM/memcpy-inline.ll
+++ b/test/CodeGen/ARM/memcpy-inline.ll
@@ -6,13 +6,14 @@
 ; CHECK: ldrb
 
 %struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
+
 @src = external global %struct.x
 @dst = external global %struct.x
 
 define i32 @t() {
 entry:
-	call void @llvm.memcpy.i32( i8* getelementptr (%struct.x* @dst, i32 0, i32 0), i8* getelementptr (%struct.x* @src, i32 0, i32 0), i32 11, i32 8 )
-	ret i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%struct.x* @dst, i32 0, i32 0), i8* getelementptr inbounds (%struct.x* @src, i32 0, i32 0), i32 11, i32 8, i1 false)
+  ret i32 0
 }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/memfunc.ll b/test/CodeGen/ARM/memfunc.ll
index 757364b..aeda022 100644
--- a/test/CodeGen/ARM/memfunc.ll
+++ b/test/CodeGen/ARM/memfunc.ll
@@ -9,24 +9,21 @@ entry:
 
         ; CHECK: memmove
         ; EABI: __aeabi_memmove
-        call void @llvm.memmove.i32( i8* bitcast ([500 x i32]* @from to i8*), i8* bitcast ([500 x i32]* @to to i8*), i32 500, i32 0 )
+        call void @llvm.memmove.p0i8.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8* bitcast ([500 x i32]* @to to i8*), i32 500, i32 0, i1 false)
 
         ; CHECK: memcpy
         ; EABI: __aeabi_memcpy
-        call void @llvm.memcpy.i32( i8* bitcast ([500 x i32]* @from to i8*), i8* bitcast ([500 x i32]* @to to i8*), i32 500, i32 0 )
+        call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8* bitcast ([500 x i32]* @to to i8*), i32 500, i32 0, i1 false)
 
         ; EABI memset swaps arguments
         ; CHECK: mov r1, #0
         ; CHECK: memset
         ; EABI: mov r2, #0
         ; EABI: __aeabi_memset
-        call void @llvm.memset.i32( i8* bitcast ([500 x i32]* @from to i8*), i8 0, i32 500, i32 0 )
+        call void @llvm.memset.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8 0, i32 500, i32 0, i1 false)
         unreachable
 }
 
-declare void @llvm.memmove.i32(i8*, i8*, i32, i32)
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-declare void @llvm.memset.i32(i8*, i8, i32, i32)
-
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/private.ll b/test/CodeGen/ARM/private.ll
index fba56b4..f93ffe7 100644
--- a/test/CodeGen/ARM/private.ll
+++ b/test/CodeGen/ARM/private.ll
@@ -6,8 +6,6 @@
 ; RUN: grep .Lbaz: %t
 ; RUN: grep long.*\.Lbaz %t
 
-declare void @foo()
-
 define private void @foo() {
         ret void
 }
diff --git a/test/CodeGen/ARM/rev.ll b/test/CodeGen/ARM/rev.ll
index 5739086..ea44c28 100644
--- a/test/CodeGen/ARM/rev.ll
+++ b/test/CodeGen/ARM/rev.ll
@@ -67,3 +67,60 @@ entry:
   %or = or i32 %shr, %and
   ret i32 %or
 }
+
+; rdar://9609108
+define i32 @test6(i32 %x) nounwind readnone {
+entry:
+; CHECK: test6
+; CHECK: rev16 r0, r0
+  %and = shl i32 %x, 8
+  %shl = and i32 %and, 65280
+  %and2 = lshr i32 %x, 8
+  %shr11 = and i32 %and2, 255
+  %shr5 = and i32 %and2, 16711680
+  %shl9 = and i32 %and, -16777216
+  %or = or i32 %shr5, %shl9
+  %or6 = or i32 %or, %shr11
+  %or10 = or i32 %or6, %shl
+  ret i32 %or10
+}
+
+; rdar://9164521
+define i32 @test7(i32 %a) nounwind readnone {
+entry:
+; CHECK: test7
+; CHECK: rev r0, r0
+; CHECK: lsr r0, r0, #16
+  %and = lshr i32 %a, 8
+  %shr3 = and i32 %and, 255
+  %and2 = shl i32 %a, 8
+  %shl = and i32 %and2, 65280
+  %or = or i32 %shr3, %shl
+  ret i32 %or
+}
+
+define i32 @test8(i32 %a) nounwind readnone {
+entry:
+; CHECK: test8
+; CHECK: revsh r0, r0
+  %and = lshr i32 %a, 8
+  %shr4 = and i32 %and, 255
+  %and2 = shl i32 %a, 8
+  %or = or i32 %shr4, %and2
+  %sext = shl i32 %or, 16
+  %conv3 = ashr exact i32 %sext, 16
+  ret i32 %conv3
+}
+
+define zeroext i16 @test9(i16 zeroext %v) nounwind readnone {
+entry:
+; CHECK: test9
+; CHECK: rev r0, r0
+; CHECK: lsr r0, r0, #16
+  %conv = zext i16 %v to i32
+  %shr4 = lshr i32 %conv, 8
+  %shl = shl nuw nsw i32 %conv, 8
+  %or = or i32 %shr4, %shl
+  %conv3 = trunc i32 %or to i16
+  ret i16 %conv3
+}
diff --git a/test/CodeGen/ARM/vargs_align.ll b/test/CodeGen/ARM/vargs_align.ll
index e4ef9e3..e390cf0 100644
--- a/test/CodeGen/ARM/vargs_align.ll
+++ b/test/CodeGen/ARM/vargs_align.ll
@@ -6,7 +6,6 @@ entry:
 	%a_addr = alloca i32		; <i32*> [#uses=1]
 	%retval = alloca i32, align 4		; <i32*> [#uses=2]
 	%tmp = alloca i32, align 4		; <i32*> [#uses=2]
-	"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store i32 %a, i32* %a_addr
 	store i32 0, i32* %tmp
 	%tmp1 = load i32* %tmp		; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/vqdmul.ll b/test/CodeGen/ARM/vqdmul.ll
index 8dcc7f7..08e7d2b 100644
--- a/test/CodeGen/ARM/vqdmul.ll
+++ b/test/CodeGen/ARM/vqdmul.ll
@@ -152,7 +152,6 @@ entry:
   ret <2 x i32> %1
 }
 
-declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
 declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
 declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
 
diff --git a/test/CodeGen/Alpha/private.ll b/test/CodeGen/Alpha/private.ll
index 26076e0..f8d3094 100644
--- a/test/CodeGen/Alpha/private.ll
+++ b/test/CodeGen/Alpha/private.ll
@@ -6,8 +6,6 @@
 ; RUN: grep \\\$baz: %t
 ; RUN: grep ldah.*\\\$baz %t
 
-declare void @foo()
-
 define private void @foo() {
         ret void
 }
diff --git a/test/CodeGen/Blackfin/add-overflow.ll b/test/CodeGen/Blackfin/add-overflow.ll
index e982e43..8dcf3f8 100644
--- a/test/CodeGen/Blackfin/add-overflow.ll
+++ b/test/CodeGen/Blackfin/add-overflow.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
 
-	type { i24, i1 }		; type %0
+	%0 = type { i24, i1 }		; type %0
 
 define i1 @func2(i24 zeroext %v1, i24 zeroext %v2) nounwind {
 entry:
diff --git a/test/CodeGen/Blackfin/many-args.ll b/test/CodeGen/Blackfin/many-args.ll
index 8c52874..2df32ca 100644
--- a/test/CodeGen/Blackfin/many-args.ll
+++ b/test/CodeGen/Blackfin/many-args.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=bfin -verify-machineinstrs
 
-	type { i32, float, float, float, float, float, float, float, float, float, float }		; type %0
+	%0 = type { i32, float, float, float, float, float, float, float, float, float, float }		; type %0
 	%struct..s_segment_inf = type { float, i32, i16, i16, float, float, i32, float, float }
 
 define i32 @main(i32 %argc.1, i8** %argv.1) {
diff --git a/test/CodeGen/CBackend/2008-05-21-MRV-InlineAsm.ll b/test/CodeGen/CBackend/2008-05-21-MRV-InlineAsm.ll
deleted file mode 100644
index 8db3167..0000000
--- a/test/CodeGen/CBackend/2008-05-21-MRV-InlineAsm.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s -march=c
-
-declare {i32, i32} @foo()
-
-define i32 @test() {
-  %A = call {i32, i32} @foo()
-  %B = getresult {i32, i32} %A, 0
-  %C = getresult {i32, i32} %A, 1
-  %D = add i32 %B, %C
-  ret i32 %D
-}
-
-define i32 @test2() {
-  %A = call {i32, i32} asm sideeffect "...", "={cx},={di},~{dirflag},~{fpsr},~{flags},~{memory}"()
-  %B = getresult {i32, i32} %A, 0
-  %C = getresult {i32, i32} %A, 1
-  %D = add i32 %B, %C
-  ret i32 %D
-}
diff --git a/test/CodeGen/CBackend/2011-06-08-addWithOverflow.ll b/test/CodeGen/CBackend/2011-06-08-addWithOverflow.ll
new file mode 100644
index 0000000..0ae480d
--- /dev/null
+++ b/test/CodeGen/CBackend/2011-06-08-addWithOverflow.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=c
+; Check that uadd and sadd with overflow are handled by C Backend.
+
+%0 = type { i32, i1 }        ; type %0
+
+define i1 @func1(i32 zeroext %v1, i32 zeroext %v2) nounwind {
+entry:
+    %t = call %0 @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)     ; <%0> [#uses=1]
+    %obit = extractvalue %0 %t, 1       ; <i1> [#uses=1]
+    br i1 %obit, label %carry, label %normal
+
+normal:     ; preds = %entry
+    ret i1 true
+
+carry:      ; preds = %entry
+    ret i1 false
+}
+
+define i1 @func2(i32 signext %v1, i32 signext %v2) nounwind {
+entry:
+    %t = call %0 @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)     ; <%0> [#uses=1]
+    %obit = extractvalue %0 %t, 1       ; <i1> [#uses=1]
+    br i1 %obit, label %carry, label %normal
+
+normal:     ; preds = %entry
+    ret i1 true
+
+carry:      ; preds = %entry
+    ret i1 false
+}
+
+declare %0 @llvm.sadd.with.overflow.i32(i32, i32) nounwind
+
+declare %0 @llvm.uadd.with.overflow.i32(i32, i32) nounwind
+
diff --git a/test/CodeGen/CBackend/2008-06-04-IndirectMem.ll b/test/CodeGen/CBackend/X86/2008-06-04-IndirectMem.ll
index 054a3ca..054a3ca 100644
--- a/test/CodeGen/CBackend/2008-06-04-IndirectMem.ll
+++ b/test/CodeGen/CBackend/X86/2008-06-04-IndirectMem.ll
diff --git a/test/CodeGen/CBackend/X86/dg.exp b/test/CodeGen/CBackend/X86/dg.exp
new file mode 100644
index 0000000..833bcc5
--- /dev/null
+++ b/test/CodeGen/CBackend/X86/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target X86] && [llvm_gcc_supports c] } {
+    RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,s}]]
+}
diff --git a/test/CodeGen/CellSPU/private.ll b/test/CodeGen/CellSPU/private.ll
index 56f72e7..1d933ad 100644
--- a/test/CodeGen/CellSPU/private.ll
+++ b/test/CodeGen/CellSPU/private.ll
@@ -6,9 +6,6 @@
 ; RUN: grep .Lbaz: %t
 ; RUN: grep ila.*\.Lbaz %t
 
-
-declare void @foo()
-
 define private void @foo() {
         ret void
 }
diff --git a/test/CodeGen/Generic/2005-07-12-memcpy-i64-length.ll b/test/CodeGen/Generic/2005-07-12-memcpy-i64-length.ll
deleted file mode 100644
index 733202c..0000000
--- a/test/CodeGen/Generic/2005-07-12-memcpy-i64-length.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s
-; Test that llvm.memcpy works with a i64 length operand on all targets.
-
-declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
-
-define void @l12_l94_bc_divide_endif_2E_3_2E_ce() {
-newFuncRoot:
-        tail call void @llvm.memcpy.i64( i8* null, i8* null, i64 0, i32 1 )
-        unreachable
-}
-
diff --git a/test/CodeGen/Generic/2007-04-30-LandingPadBranchFolding.ll b/test/CodeGen/Generic/2007-04-30-LandingPadBranchFolding.ll
index 568b88f..2a2cf6c 100644
--- a/test/CodeGen/Generic/2007-04-30-LandingPadBranchFolding.ll
+++ b/test/CodeGen/Generic/2007-04-30-LandingPadBranchFolding.ll
@@ -1,13 +1,13 @@
 ; RUN: llc < %s 
 ; PR1228
 
-	"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" = type { i8* }
-	"struct.std::locale" = type { "struct.std::locale::_Impl"* }
-	"struct.std::locale::_Impl" = type { i32, "struct.std::locale::facet"**, i32, "struct.std::locale::facet"**, i8** }
-	"struct.std::locale::facet" = type { i32 (...)**, i32 }
-	"struct.std::string" = type { "struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" }
+	%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" = type { i8* }
+	%"struct.std::locale" = type { %"struct.std::locale::_Impl"* }
+	%"struct.std::locale::_Impl" = type { i32, %"struct.std::locale::facet"**, i32, %"struct.std::locale::facet"**, i8** }
+	%"struct.std::locale::facet" = type { i32 (...)**, i32 }
+	%"struct.std::string" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" }
 
-define void @_ZNKSt6locale4nameEv("struct.std::string"* %agg.result) {
+define void @_ZNKSt6locale4nameEv(%"struct.std::string"* %agg.result) {
 entry:
 	%tmp105 = icmp eq i8* null, null		; <i1> [#uses=1]
 	br i1 %tmp105, label %cond_true, label %cond_true222
diff --git a/test/CodeGen/Generic/2007-11-21-UndeadIllegalNode.ll b/test/CodeGen/Generic/2007-11-21-UndeadIllegalNode.ll
deleted file mode 100644
index 3090857..0000000
--- a/test/CodeGen/Generic/2007-11-21-UndeadIllegalNode.ll
+++ /dev/null
@@ -1,157 +0,0 @@
-; RUN: llc < %s -o -
-
-	%struct.RETURN = type { i32, i32 }
-	%struct.ada__finalization__controlled = type { %struct.system__finalization_root__root_controlled }
-	%struct.ada__streams__root_stream_type = type { %struct.ada__tags__dispatch_table* }
-	%struct.ada__strings__unbounded__string_access = type { i8*, %struct.RETURN* }
-	%struct.ada__strings__unbounded__unbounded_string = type { %struct.ada__finalization__controlled, %struct.ada__strings__unbounded__string_access, i32 }
-	%struct.ada__tags__dispatch_table = type { [1 x i32] }
-	%struct.exception = type { i8, i8, i32, i8*, i8*, i32, i8* }
-	%struct.system__finalization_root__root_controlled = type { %struct.ada__streams__root_stream_type, %struct.system__finalization_root__root_controlled*, %struct.system__finalization_root__root_controlled* }
-	%struct.system__standard_library__exception_data = type { i8, i8, i32, i32, %struct.system__standard_library__exception_data*, i32, void ()* }
-@C.495.7639 = internal constant %struct.RETURN { i32 1, i32 16 }		; <%struct.RETURN*> [#uses=1]
-@ada__strings__index_error = external global %struct.exception		; <%struct.exception*> [#uses=1]
-@.str5 = internal constant [16 x i8] c"a-strunb.adb:690"		; <[16 x i8]*> [#uses=1]
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-declare void @ada__strings__unbounded__realloc_for_chunk(%struct.ada__strings__unbounded__unbounded_string*, i32)
-
-declare void @__gnat_raise_exception(%struct.system__standard_library__exception_data*, i64)
-
-define void @ada__strings__unbounded__insert__2(%struct.ada__strings__unbounded__unbounded_string* %source, i32 %before, i64 %new_item.0.0) {
-entry:
-	%tmp24636 = lshr i64 %new_item.0.0, 32		; <i64> [#uses=1]
-	%tmp24637 = trunc i64 %tmp24636 to i32		; <i32> [#uses=1]
-	%tmp24638 = inttoptr i32 %tmp24637 to %struct.RETURN*		; <%struct.RETURN*> [#uses=2]
-	%tmp25 = getelementptr %struct.RETURN* %tmp24638, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp26 = load i32* %tmp25, align 4		; <i32> [#uses=1]
-	%tmp29 = getelementptr %struct.RETURN* %tmp24638, i32 0, i32 1		; <i32*> [#uses=1]
-	%tmp30 = load i32* %tmp29, align 4		; <i32> [#uses=1]
-	%tmp63 = getelementptr %struct.ada__strings__unbounded__unbounded_string* %source, i32 0, i32 1, i32 1		; <%struct.RETURN**> [#uses=5]
-	%tmp64 = load %struct.RETURN** %tmp63, align 4		; <%struct.RETURN*> [#uses=1]
-	%tmp65 = getelementptr %struct.RETURN* %tmp64, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp66 = load i32* %tmp65, align 4		; <i32> [#uses=1]
-	%tmp67 = icmp sgt i32 %tmp66, %before		; <i1> [#uses=1]
-	br i1 %tmp67, label %bb77, label %bb
-
-bb:		; preds = %entry
-	%tmp71 = getelementptr %struct.ada__strings__unbounded__unbounded_string* %source, i32 0, i32 2		; <i32*> [#uses=4]
-	%tmp72 = load i32* %tmp71, align 4		; <i32> [#uses=1]
-	%tmp73 = add i32 %tmp72, 1		; <i32> [#uses=1]
-	%tmp74 = icmp slt i32 %tmp73, %before		; <i1> [#uses=1]
-	br i1 %tmp74, label %bb77, label %bb84
-
-bb77:		; preds = %bb, %entry
-	tail call void @__gnat_raise_exception( %struct.system__standard_library__exception_data* bitcast (%struct.exception* @ada__strings__index_error to %struct.system__standard_library__exception_data*), i64 or (i64 zext (i32 ptrtoint ([16 x i8]* @.str5 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.RETURN* @C.495.7639 to i32) to i64), i64 32)) )
-	unreachable
-
-bb84:		; preds = %bb
-	%tmp93 = sub i32 %tmp30, %tmp26		; <i32> [#uses=2]
-	%tmp9394 = sext i32 %tmp93 to i36		; <i36> [#uses=1]
-	%tmp95 = shl i36 %tmp9394, 3		; <i36> [#uses=1]
-	%tmp96 = add i36 %tmp95, 8		; <i36> [#uses=2]
-	%tmp97 = icmp sgt i36 %tmp96, -1		; <i1> [#uses=1]
-	%tmp100 = select i1 %tmp97, i36 %tmp96, i36 0		; <i36> [#uses=2]
-	%tmp101 = icmp slt i36 %tmp100, 17179869177		; <i1> [#uses=1]
-	%tmp100.cast = trunc i36 %tmp100 to i32		; <i32> [#uses=1]
-	%min102 = select i1 %tmp101, i32 %tmp100.cast, i32 -8		; <i32> [#uses=1]
-	tail call void @ada__strings__unbounded__realloc_for_chunk( %struct.ada__strings__unbounded__unbounded_string* %source, i32 %min102 )
-	%tmp148 = load i32* %tmp71, align 4		; <i32> [#uses=4]
-	%tmp152 = add i32 %tmp93, 1		; <i32> [#uses=2]
-	%tmp153 = icmp sgt i32 %tmp152, -1		; <i1> [#uses=1]
-	%max154 = select i1 %tmp153, i32 %tmp152, i32 0		; <i32> [#uses=5]
-	%tmp155 = add i32 %tmp148, %max154		; <i32> [#uses=5]
-	%tmp315 = getelementptr %struct.ada__strings__unbounded__unbounded_string* %source, i32 0, i32 1, i32 0		; <i8**> [#uses=4]
-	%tmp328 = load %struct.RETURN** %tmp63, align 4		; <%struct.RETURN*> [#uses=1]
-	%tmp329 = getelementptr %struct.RETURN* %tmp328, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp330 = load i32* %tmp329, align 4		; <i32> [#uses=4]
-	%tmp324 = add i32 %max154, %before		; <i32> [#uses=3]
-	%tmp331 = sub i32 %tmp324, %tmp330		; <i32> [#uses=1]
-	%tmp349 = sub i32 %before, %tmp330		; <i32> [#uses=1]
-	%tmp356 = icmp sgt i32 %tmp331, %tmp349		; <i1> [#uses=1]
-	%tmp431 = icmp sgt i32 %tmp324, %tmp155		; <i1> [#uses=2]
-	br i1 %tmp356, label %bb420, label %bb359
-
-bb359:		; preds = %bb84
-	br i1 %tmp431, label %bb481, label %bb382
-
-bb382:		; preds = %bb382, %bb359
-	%indvar = phi i32 [ 0, %bb359 ], [ %indvar.next, %bb382 ]		; <i32> [#uses=2]
-	%max379.pn = phi i32 [ %max154, %bb359 ], [ %L492b.0, %bb382 ]		; <i32> [#uses=1]
-	%before.pn = phi i32 [ %before, %bb359 ], [ 1, %bb382 ]		; <i32> [#uses=1]
-	%L492b.0 = add i32 %before.pn, %max379.pn		; <i32> [#uses=3]
-	%tmp386 = load %struct.RETURN** %tmp63, align 4		; <%struct.RETURN*> [#uses=1]
-	%tmp387 = getelementptr %struct.RETURN* %tmp386, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp388 = load i32* %tmp387, align 4		; <i32> [#uses=2]
-	%tmp392 = load i8** %tmp315, align 4		; <i8*> [#uses=2]
-	%R493b.0 = add i32 %indvar, %before		; <i32> [#uses=1]
-	%tmp405 = sub i32 %R493b.0, %tmp388		; <i32> [#uses=1]
-	%tmp406 = getelementptr i8* %tmp392, i32 %tmp405		; <i8*> [#uses=1]
-	%tmp407 = load i8* %tmp406, align 1		; <i8> [#uses=1]
-	%tmp408 = sub i32 %L492b.0, %tmp388		; <i32> [#uses=1]
-	%tmp409 = getelementptr i8* %tmp392, i32 %tmp408		; <i8*> [#uses=1]
-	store i8 %tmp407, i8* %tmp409, align 1
-	%tmp414 = icmp eq i32 %L492b.0, %tmp155		; <i1> [#uses=1]
-	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
-	br i1 %tmp414, label %bb481, label %bb382
-
-bb420:		; preds = %bb84
-	br i1 %tmp431, label %bb481, label %bb436.preheader
-
-bb436.preheader:		; preds = %bb420
-	%tmp4468 = load i8** %tmp315, align 4		; <i8*> [#uses=2]
-	%tmp4599 = sub i32 %tmp148, %tmp330		; <i32> [#uses=1]
-	%tmp46010 = getelementptr i8* %tmp4468, i32 %tmp4599		; <i8*> [#uses=1]
-	%tmp46111 = load i8* %tmp46010, align 1		; <i8> [#uses=1]
-	%tmp46212 = sub i32 %tmp155, %tmp330		; <i32> [#uses=1]
-	%tmp46313 = getelementptr i8* %tmp4468, i32 %tmp46212		; <i8*> [#uses=1]
-	store i8 %tmp46111, i8* %tmp46313, align 1
-	%exitcond14 = icmp eq i32 %tmp155, %tmp324		; <i1> [#uses=1]
-	br i1 %exitcond14, label %bb481, label %bb.nph
-
-bb.nph:		; preds = %bb436.preheader
-	%tmp5 = sub i32 %tmp148, %before		; <i32> [#uses=1]
-	br label %bb478
-
-bb478:		; preds = %bb478, %bb.nph
-	%indvar6422 = phi i32 [ 0, %bb.nph ], [ %indvar.next643, %bb478 ]		; <i32> [#uses=1]
-	%indvar.next643 = add i32 %indvar6422, 1		; <i32> [#uses=4]
-	%L490b.0 = sub i32 %tmp155, %indvar.next643		; <i32> [#uses=1]
-	%R491b.0 = sub i32 %tmp148, %indvar.next643		; <i32> [#uses=1]
-	%tmp440 = load %struct.RETURN** %tmp63, align 4		; <%struct.RETURN*> [#uses=1]
-	%tmp441 = getelementptr %struct.RETURN* %tmp440, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp442 = load i32* %tmp441, align 4		; <i32> [#uses=2]
-	%tmp446 = load i8** %tmp315, align 4		; <i8*> [#uses=2]
-	%tmp459 = sub i32 %R491b.0, %tmp442		; <i32> [#uses=1]
-	%tmp460 = getelementptr i8* %tmp446, i32 %tmp459		; <i8*> [#uses=1]
-	%tmp461 = load i8* %tmp460, align 1		; <i8> [#uses=1]
-	%tmp462 = sub i32 %L490b.0, %tmp442		; <i32> [#uses=1]
-	%tmp463 = getelementptr i8* %tmp446, i32 %tmp462		; <i8*> [#uses=1]
-	store i8 %tmp461, i8* %tmp463, align 1
-	%exitcond = icmp eq i32 %indvar.next643, %tmp5		; <i1> [#uses=1]
-	br i1 %exitcond, label %bb481, label %bb478
-
-bb481:		; preds = %bb478, %bb436.preheader, %bb420, %bb382, %bb359
-	%tmp577 = add i32 %before, -1		; <i32> [#uses=3]
-	%tmp578 = add i32 %max154, %tmp577		; <i32> [#uses=2]
-	%tmp581 = icmp sge i32 %tmp578, %tmp577		; <i1> [#uses=1]
-	%max582 = select i1 %tmp581, i32 %tmp578, i32 %tmp577		; <i32> [#uses=1]
-	%tmp584 = sub i32 %max582, %before		; <i32> [#uses=1]
-	%tmp585 = add i32 %tmp584, 1		; <i32> [#uses=2]
-	%tmp586 = icmp sgt i32 %tmp585, -1		; <i1> [#uses=1]
-	%max587 = select i1 %tmp586, i32 %tmp585, i32 0		; <i32> [#uses=1]
-	%tmp591 = load %struct.RETURN** %tmp63, align 4		; <%struct.RETURN*> [#uses=1]
-	%tmp592 = getelementptr %struct.RETURN* %tmp591, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp593 = load i32* %tmp592, align 4		; <i32> [#uses=1]
-	%tmp597 = load i8** %tmp315, align 4		; <i8*> [#uses=1]
-	%tmp600621 = trunc i64 %new_item.0.0 to i32		; <i32> [#uses=1]
-	%tmp600622 = inttoptr i32 %tmp600621 to i8*		; <i8*> [#uses=1]
-	%tmp601 = sub i32 %before, %tmp593		; <i32> [#uses=1]
-	%tmp602 = getelementptr i8* %tmp597, i32 %tmp601		; <i8*> [#uses=1]
-	tail call void @llvm.memcpy.i32( i8* %tmp602, i8* %tmp600622, i32 %max587, i32 1 )
-	%tmp606 = load i32* %tmp71, align 4		; <i32> [#uses=1]
-	%tmp613 = add i32 %tmp606, %max154		; <i32> [#uses=1]
-	store i32 %tmp613, i32* %tmp71, align 4
-	ret void
-}
diff --git a/test/CodeGen/Generic/getresult-undef.ll b/test/CodeGen/Generic/getresult-undef.ll
deleted file mode 100644
index c675535..0000000
--- a/test/CodeGen/Generic/getresult-undef.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s
-
-define double @foo() {
-  %t = getresult {double, double} undef, 1
-  ret double %t
-}
diff --git a/test/CodeGen/Generic/invalid-memcpy.ll b/test/CodeGen/Generic/invalid-memcpy.ll
index 8448565..2dfa28b 100644
--- a/test/CodeGen/Generic/invalid-memcpy.ll
+++ b/test/CodeGen/Generic/invalid-memcpy.ll
@@ -4,16 +4,14 @@
 ; greater than the alignment guaranteed for Qux or C.0.1173), but it
 ; should compile, not crash the code generator.
 
-@C.0.1173 = external constant [33 x i8]         ; <[33 x i8]*> [#uses=1]
+@C.0.1173 = external constant [33 x i8]
 
 define void @Bork() {
 entry:
-        %Qux = alloca [33 x i8]         ; <[33 x i8]*> [#uses=1]
-        %Qux1 = bitcast [33 x i8]* %Qux to i8*          ; <i8*> [#uses=1]
-        call void @llvm.memcpy.i64( i8* %Qux1, i8* getelementptr ([33 x i8]* @C.0.1173, i32 0, i32 0), i64 33, i32 8 )
-        ret void
+  %Qux = alloca [33 x i8]
+  %Qux1 = bitcast [33 x i8]* %Qux to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %Qux1, i8* getelementptr inbounds ([33 x i8]* @C.0.1173, i32 0, i32 0), i64 33, i32 8, i1 false)
+  ret void
 }
 
-declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
-
-
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll b/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll
index 282e973..a3cab5d 100644
--- a/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll
+++ b/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll
@@ -6,9 +6,9 @@ define void @foo(i64* %p, double* %q) nounwind {
         %t = invoke { i64, double } @wild() to label %normal unwind label %handler
 
 normal:
-        %mrv_gr = getresult { i64, double } %t, 0
+        %mrv_gr = extractvalue { i64, double } %t, 0
         store i64 %mrv_gr, i64* %p
-        %mrv_gr12681 = getresult { i64, double } %t, 1   
+        %mrv_gr12681 = extractvalue { i64, double } %t, 1   
         store double %mrv_gr12681, double* %q
 	ret void
   
diff --git a/test/CodeGen/Mips/alloca.ll b/test/CodeGen/Mips/alloca.ll
index 50eeecf..ff503ec 100644
--- a/test/CodeGen/Mips/alloca.ll
+++ b/test/CodeGen/Mips/alloca.ll
@@ -4,15 +4,15 @@ define i32 @twoalloca(i32 %size) nounwind {
 entry:
 ; CHECK: subu  $[[T0:[0-9]+]], $sp, $[[SZ:[0-9]+]]
 ; CHECK: addu  $sp, $zero, $[[T0]]
-; CHECK: addu  $[[SP1:[0-9]+]], $zero, $sp
-; CHECK: subu  $[[T1:[0-9]+]], $sp, $[[SZ]]
-; CHECK: addu  $sp, $zero, $[[T1]]
-; CHECK: addu  $[[SP2:[0-9]+]], $zero, $sp
+; CHECK: addiu $[[T1:[0-9]+]], $sp, [[OFF:[0-9]+]]
+; CHECK: subu  $[[T2:[0-9]+]], $sp, $[[SZ]]
+; CHECK: addu  $sp, $zero, $[[T2]]
+; CHECK: addiu $[[T3:[0-9]+]], $sp, [[OFF]]
 ; CHECK: lw  $25, %call16(foo)($gp)
-; CHECK: addiu $4, $[[SP1]], 24
+; CHECK: addu $4, $zero, $[[T1]]
 ; CHECK: jalr  $25
 ; CHECK: lw  $25, %call16(foo)($gp)
-; CHECK: addiu $4, $[[SP2]], 24
+; CHECK: addu $4, $zero, $[[T3]]
 ; CHECK: jalr  $25
   %tmp1 = alloca i8, i32 %size, align 4
   %add.ptr = getelementptr inbounds i8* %tmp1, i32 5
@@ -29,3 +29,72 @@ declare void @foo2(double, double, i32)
 
 declare i32 @foo(i8*)
 
+@.str = private unnamed_addr constant [22 x i8] c"%d %d %d %d %d %d %d\0A\00", align 1
+
+define i32 @alloca2(i32 %size) nounwind {
+entry:
+; dynamic allocated stack area and $gp restore slot have the same offsets
+; relative to $sp.
+;
+; CHECK: alloca2
+; CHECK: .cprestore [[OFF:[0-9]+]]
+; CHECK: subu  $[[T0:[0-9]+]], $sp, $[[SZ:[0-9]+]]
+; CHECK: addu  $sp, $zero, $[[T0]]
+; CHECK: addiu $[[T1:[0-9]+]], $sp, [[OFF]]
+
+  %tmp1 = alloca i8, i32 %size, align 4
+  %0 = bitcast i8* %tmp1 to i32*
+  %cmp = icmp sgt i32 %size, 10
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+; CHECK: addiu $4, $[[T1]], 40
+
+  %add.ptr = getelementptr inbounds i8* %tmp1, i32 40
+  %1 = bitcast i8* %add.ptr to i32*
+  call void @foo3(i32* %1) nounwind
+  %arrayidx15.pre = getelementptr inbounds i8* %tmp1, i32 12
+  %.pre = bitcast i8* %arrayidx15.pre to i32*
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+; CHECK: addiu $4, $[[T1]], 12
+
+  %add.ptr5 = getelementptr inbounds i8* %tmp1, i32 12
+  %2 = bitcast i8* %add.ptr5 to i32*
+  call void @foo3(i32* %2) nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+; CHECK: lw  $5, 0($[[T1]])
+; CHECK: lw  $25, %call16(printf)
+
+  %.pre-phi = phi i32* [ %2, %if.else ], [ %.pre, %if.then ]
+  %tmp7 = load i32* %0, align 4, !tbaa !0
+  %arrayidx9 = getelementptr inbounds i8* %tmp1, i32 4
+  %3 = bitcast i8* %arrayidx9 to i32*
+  %tmp10 = load i32* %3, align 4, !tbaa !0
+  %arrayidx12 = getelementptr inbounds i8* %tmp1, i32 8
+  %4 = bitcast i8* %arrayidx12 to i32*
+  %tmp13 = load i32* %4, align 4, !tbaa !0
+  %tmp16 = load i32* %.pre-phi, align 4, !tbaa !0
+  %arrayidx18 = getelementptr inbounds i8* %tmp1, i32 16
+  %5 = bitcast i8* %arrayidx18 to i32*
+  %tmp19 = load i32* %5, align 4, !tbaa !0
+  %arrayidx21 = getelementptr inbounds i8* %tmp1, i32 20
+  %6 = bitcast i8* %arrayidx21 to i32*
+  %tmp22 = load i32* %6, align 4, !tbaa !0
+  %arrayidx24 = getelementptr inbounds i8* %tmp1, i32 24
+  %7 = bitcast i8* %arrayidx24 to i32*
+  %tmp25 = load i32* %7, align 4, !tbaa !0
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str, i32 0, i32 0), i32 %tmp7, i32 %tmp10, i32 %tmp13, i32 %tmp16, i32 %tmp19, i32 %tmp22, i32 %tmp25) nounwind
+  ret i32 0
+}
+
+declare void @foo3(i32*)
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/Mips/inlineasmmemop.ll b/test/CodeGen/Mips/inlineasmmemop.ll
new file mode 100644
index 0000000..c565892
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasmmemop.ll
@@ -0,0 +1,23 @@
+; RUN: llc -march=mipsel -mcpu=4ke < %s | FileCheck %s
+
+@g1 = external global i32
+
+define i32 @f1(i32 %x) nounwind {
+entry:
+; CHECK: addiu $[[T0:[0-9]+]], $sp
+; CHECK: #APP
+; CHECK: sw $4, 0($[[T0]])
+; CHECK: #NO_APP
+; CHECK: lw  $[[T1:[0-9]+]], %got(g1)($gp)
+; CHECK: #APP
+; CHECK: lw $[[T3:[0-9]+]], 0($[[T0]])
+; CHECK: #NO_APP
+; CHECK: sw  $[[T3]], 0($[[T1]])
+
+  %l1 = alloca i32, align 4
+  call void asm "sw $1, $0", "=*m,r"(i32* %l1, i32 %x) nounwind
+  %0 = call i32 asm "lw $0, $1", "=r,*m"(i32* %l1) nounwind
+  store i32 %0, i32* @g1, align 4
+  ret i32 %0
+}
+
diff --git a/test/CodeGen/Mips/private.ll b/test/CodeGen/Mips/private.ll
index 34b7547..4cc48f0 100644
--- a/test/CodeGen/Mips/private.ll
+++ b/test/CodeGen/Mips/private.ll
@@ -6,8 +6,6 @@
 ; RUN: grep \\\$baz: %t
 ; RUN: grep lw.*\\\$baz %t
 
-declare void @foo()
-
 define private void @foo() {
         ret void
 }
diff --git a/test/CodeGen/PTX/add.ll b/test/CodeGen/PTX/add.ll
index b89a2f6..c16be49 100644
--- a/test/CodeGen/PTX/add.ll
+++ b/test/CodeGen/PTX/add.ll
@@ -22,14 +22,14 @@ define ptx_device i64 @t1_u64(i64 %x, i64 %y) {
 }
 
 define ptx_device float @t1_f32(float %x, float %y) {
-; CHECK: add.f32 r0, r1, r2
+; CHECK: add.rn.f32 r0, r1, r2
 ; CHECK-NEXT: ret;
   %z = fadd float %x, %y
   ret float %z
 }
 
 define ptx_device double @t1_f64(double %x, double %y) {
-; CHECK: add.f64 rd0, rd1, rd2
+; CHECK: add.rn.f64 rd0, rd1, rd2
 ; CHECK-NEXT: ret;
   %z = fadd double %x, %y
   ret double %z
@@ -57,14 +57,14 @@ define ptx_device i64 @t2_u64(i64 %x) {
 }
 
 define ptx_device float @t2_f32(float %x) {
-; CHECK: add.f32 r0, r1, 0F3F800000;
+; CHECK: add.rn.f32 r0, r1, 0F3F800000;
 ; CHECK-NEXT: ret;
   %z = fadd float %x, 1.0
   ret float %z
 }
 
 define ptx_device double @t2_f64(double %x) {
-; CHECK: add.f64 rd0, rd1, 0D3FF0000000000000;
+; CHECK: add.rn.f64 rd0, rd1, 0D3FF0000000000000;
 ; CHECK-NEXT: ret;
   %z = fadd double %x, 1.0
   ret double %z
diff --git a/test/CodeGen/PTX/cvt.ll b/test/CodeGen/PTX/cvt.ll
index 984cb4d..f723369 100644
--- a/test/CodeGen/PTX/cvt.ll
+++ b/test/CodeGen/PTX/cvt.ll
@@ -4,8 +4,10 @@
 ; (note: we convert back to i32 to return)
 
 define ptx_device i32 @cvt_pred_i16(i16 %x, i1 %y) {
-; CHECK: cvt.pred.u16 p0, rh1;
-; CHECK: ret;
+; CHECK: setp.gt.b16 p0, rh1, 0
+; CHECK-NEXT: and.pred p0, p0, p1;
+; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
+; CHECK-NEXT: ret;
 	%a = trunc i16 %x to i1
 	%b = and i1 %a, %y
 	%c = zext i1 %b to i32
@@ -13,8 +15,10 @@ define ptx_device i32 @cvt_pred_i16(i16 %x, i1 %y) {
 }
 
 define ptx_device i32 @cvt_pred_i32(i32 %x, i1 %y) {
-; CHECK: cvt.pred.u32 p0, r1;
-; CHECK: ret;
+; CHECK: setp.gt.b32 p0, r1, 0
+; CHECK-NEXT: and.pred p0, p0, p1;
+; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
+; CHECK-NEXT: ret;
 	%a = trunc i32 %x to i1
 	%b = and i1 %a, %y
 	%c = zext i1 %b to i32
@@ -22,8 +26,10 @@ define ptx_device i32 @cvt_pred_i32(i32 %x, i1 %y) {
 }
 
 define ptx_device i32 @cvt_pred_i64(i64 %x, i1 %y) {
-; CHECK: cvt.pred.u64 p0, rd1;
-; CHECK: ret;
+; CHECK: setp.gt.b64 p0, rd1, 0
+; CHECK-NEXT: and.pred p0, p0, p1;
+; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
+; CHECK-NEXT: ret;
 	%a = trunc i64 %x to i1
 	%b = and i1 %a, %y
 	%c = zext i1 %b to i32
@@ -31,8 +37,10 @@ define ptx_device i32 @cvt_pred_i64(i64 %x, i1 %y) {
 }
 
 define ptx_device i32 @cvt_pred_f32(float %x, i1 %y) {
-; CHECK: cvt.rni.pred.f32 p0, r1;
-; CHECK: ret;
+; CHECK: setp.gt.b32 p0, r1, 0
+; CHECK-NEXT: and.pred p0, p0, p1;
+; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
+; CHECK-NEXT: ret;
 	%a = fptoui float %x to i1
 	%b = and i1 %a, %y
 	%c = zext i1 %b to i32
@@ -40,8 +48,10 @@ define ptx_device i32 @cvt_pred_f32(float %x, i1 %y) {
 }
 
 define ptx_device i32 @cvt_pred_f64(double %x, i1 %y) {
-; CHECK: cvt.rni.pred.f64 p0, rd1;
-; CHECK: ret;
+; CHECK: setp.gt.b64 p0, rd1, 0
+; CHECK-NEXT: and.pred p0, p0, p1;
+; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
+; CHECK-NEXT: ret;
 	%a = fptoui double %x to i1
 	%b = and i1 %a, %y
 	%c = zext i1 %b to i32
@@ -51,36 +61,36 @@ define ptx_device i32 @cvt_pred_f64(double %x, i1 %y) {
 ; i16
 
 define ptx_device i16 @cvt_i16_preds(i1 %x) {
-; CHECK: cvt.u16.pred rh0, p1;
-; CHECK: ret;
+; CHECK: selp.u16 rh0, 1, 0, p1;
+; CHECK-NEXT: ret;
 	%a = zext i1 %x to i16
 	ret i16 %a
 }
 
 define ptx_device i16 @cvt_i16_i32(i32 %x) {
 ; CHECK: cvt.u16.u32 rh0, r1;
-; CHECK: ret;
+; CHECK-NEXT: ret;
 	%a = trunc i32 %x to i16
 	ret i16 %a
 }
 
 define ptx_device i16 @cvt_i16_i64(i64 %x) {
 ; CHECK: cvt.u16.u64 rh0, rd1;
-; CHECK: ret;
+; CHECK-NEXT: ret;
 	%a = trunc i64 %x to i16
 	ret i16 %a
 }
 
 define ptx_device i16 @cvt_i16_f32(float %x) {
-; CHECK: cvt.rni.u16.f32 rh0, r1;
-; CHECK: ret;
+; CHECK: cvt.rzi.u16.f32 rh0, r1;
+; CHECK-NEXT: ret;
 	%a = fptoui float %x to i16
 	ret i16 %a
 }
 
 define ptx_device i16 @cvt_i16_f64(double %x) {
-; CHECK: cvt.rni.u16.f64 rh0, rd1;
-; CHECK: ret;
+; CHECK: cvt.rzi.u16.f64 rh0, rd1;
+; CHECK-NEXT: ret;
 	%a = fptoui double %x to i16
 	ret i16 %a
 }
@@ -88,36 +98,36 @@ define ptx_device i16 @cvt_i16_f64(double %x) {
 ; i32
 
 define ptx_device i32 @cvt_i32_preds(i1 %x) {
-; CHECK: cvt.u32.pred r0, p1;
-; CHECK: ret;
+; CHECK: selp.u32 r0, 1, 0, p1;
+; CHECK-NEXT: ret;
 	%a = zext i1 %x to i32
 	ret i32 %a
 }
 
 define ptx_device i32 @cvt_i32_i16(i16 %x) {
 ; CHECK: cvt.u32.u16 r0, rh1;
-; CHECK: ret;
+; CHECK-NEXT: ret;
 	%a = zext i16 %x to i32
 	ret i32 %a
 }
 
 define ptx_device i32 @cvt_i32_i64(i64 %x) {
 ; CHECK: cvt.u32.u64 r0, rd1;
-; CHECK: ret;
+; CHECK-NEXT: ret;
 	%a = trunc i64 %x to i32
 	ret i32 %a
 }
 
 define ptx_device i32 @cvt_i32_f32(float %x) {
-; CHECK: cvt.rni.u32.f32 r0, r1;
-; CHECK: ret;
+; CHECK: cvt.rzi.u32.f32 r0, r1;
+; CHECK-NEXT: ret;
 	%a = fptoui float %x to i32
 	ret i32 %a
 }
 
 define ptx_device i32 @cvt_i32_f64(double %x) {
-; CHECK: cvt.rni.u32.f64 r0, rd1;
-; CHECK: ret;
+; CHECK: cvt.rzi.u32.f64 r0, rd1;
+; CHECK-NEXT: ret;
 	%a = fptoui double %x to i32
 	ret i32 %a
 }
@@ -125,35 +135,35 @@ define ptx_device i32 @cvt_i32_f64(double %x) {
 ; i64
 
 define ptx_device i64 @cvt_i64_preds(i1 %x) {
-; CHECK: cvt.u64.pred rd0, p1;
-; CHECK: ret;
+; CHECK: selp.u64 rd0, 1, 0, p1;
+; CHECK-NEXT: ret;
 	%a = zext i1 %x to i64
 	ret i64 %a
 }
 
 define ptx_device i64 @cvt_i64_i16(i16 %x) {
 ; CHECK: cvt.u64.u16 rd0, rh1;
-; CHECK: ret;
+; CHECK-NEXT: ret;
 	%a = zext i16 %x to i64
 	ret i64 %a
 }
 
 define ptx_device i64 @cvt_i64_i32(i32 %x) {
 ; CHECK: cvt.u64.u32 rd0, r1;
-; CHECK: ret;
+; CHECK-NEXT: ret;
 	%a = zext i32 %x to i64
 	ret i64 %a
 }
 
 define ptx_device i64 @cvt_i64_f32(float %x) {
-; CHECK: cvt.rni.u64.f32 rd0, r1;
-; CHECK: ret;
+; CHECK: cvt.rzi.u64.f32 rd0, r1;
+; CHECK-NEXT: ret;
 	%a = fptoui float %x to i64
 	ret i64 %a
 }
 
 define ptx_device i64 @cvt_i64_f64(double %x) {
-; CHECK: cvt.rni.u64.f64 rd0, rd1;
+; CHECK: cvt.rzi.u64.f64 rd0, rd1;
 ; CHECK: ret;
 	%a = fptoui double %x to i64
 	ret i64 %a
@@ -162,36 +172,36 @@ define ptx_device i64 @cvt_i64_f64(double %x) {
 ; f32
 
 define ptx_device float @cvt_f32_preds(i1 %x) {
-; CHECK: cvt.rn.f32.pred r0, p1;
-; CHECK: ret;
+; CHECK: selp.f32 r0, 0F3F800000, 0F00000000, p1;
+; CHECK-NEXT: ret;
 	%a = uitofp i1 %x to float
 	ret float %a
 }
 
 define ptx_device float @cvt_f32_i16(i16 %x) {
 ; CHECK: cvt.rn.f32.u16 r0, rh1;
-; CHECK: ret;
+; CHECK-NEXT: ret;
 	%a = uitofp i16 %x to float
 	ret float %a
 }
 
 define ptx_device float @cvt_f32_i32(i32 %x) {
 ; CHECK: cvt.rn.f32.u32 r0, r1;
-; CHECK: ret;
+; CHECK-NEXT: ret;
 	%a = uitofp i32 %x to float
 	ret float %a
 }
 
 define ptx_device float @cvt_f32_i64(i64 %x) {
 ; CHECK: cvt.rn.f32.u64 r0, rd1;
-; CHECK: ret;
+; CHECK-NEXT: ret;
 	%a = uitofp i64 %x to float
 	ret float %a
 }
 
 define ptx_device float @cvt_f32_f64(double %x) {
 ; CHECK: cvt.rn.f32.f64 r0, rd1;
-; CHECK: ret;
+; CHECK-NEXT: ret;
 	%a = fptrunc double %x to float
 	ret float %a
 }
@@ -199,36 +209,36 @@ define ptx_device float @cvt_f32_f64(double %x) {
 ; f64
 
 define ptx_device double @cvt_f64_preds(i1 %x) {
-; CHECK: cvt.rn.f64.pred rd0, p1;
-; CHECK: ret;
+; CHECK: selp.f64 rd0, 0D3F80000000000000, 0D0000000000000000, p1;
+; CHECK-NEXT: ret;
 	%a = uitofp i1 %x to double
 	ret double %a
 }
 
 define ptx_device double @cvt_f64_i16(i16 %x) {
 ; CHECK: cvt.rn.f64.u16 rd0, rh1;
-; CHECK: ret;
+; CHECK-NEXT: ret;
 	%a = uitofp i16 %x to double
 	ret double %a
 }
 
 define ptx_device double @cvt_f64_i32(i32 %x) {
 ; CHECK: cvt.rn.f64.u32 rd0, r1;
-; CHECK: ret;
+; CHECK-NEXT: ret;
 	%a = uitofp i32 %x to double
 	ret double %a
 }
 
 define ptx_device double @cvt_f64_i64(i64 %x) {
 ; CHECK: cvt.rn.f64.u64 rd0, rd1;
-; CHECK: ret;
+; CHECK-NEXT: ret;
 	%a = uitofp i64 %x to double
 	ret double %a
 }
 
 define ptx_device double @cvt_f64_f32(float %x) {
 ; CHECK: cvt.f64.f32 rd0, r1;
-; CHECK: ret;
+; CHECK-NEXT: ret;
 	%a = fpext float %x to double
 	ret double %a
 }
diff --git a/test/CodeGen/PTX/fdiv-sm10.ll b/test/CodeGen/PTX/fdiv-sm10.ll
index 9aff251..eb32222 100644
--- a/test/CodeGen/PTX/fdiv-sm10.ll
+++ b/test/CodeGen/PTX/fdiv-sm10.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=ptx32 -mattr=+sm10 | FileCheck %s
 
 define ptx_device float @t1_f32(float %x, float %y) {
-; CHECK: div.approx.f32 r0, r1, r2;
+; CHECK: div.f32 r0, r1, r2;
 ; CHECK-NEXT: ret;
 	%a = fdiv float %x, %y
 	ret float %a
diff --git a/test/CodeGen/PTX/fdiv-sm13.ll b/test/CodeGen/PTX/fdiv-sm13.ll
index 84e0ada..ad24f35 100644
--- a/test/CodeGen/PTX/fdiv-sm13.ll
+++ b/test/CodeGen/PTX/fdiv-sm13.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s
 
 define ptx_device float @t1_f32(float %x, float %y) {
-; CHECK: div.approx.f32 r0, r1, r2;
+; CHECK: div.rn.f32 r0, r1, r2;
 ; CHECK-NEXT: ret;
 	%a = fdiv float %x, %y
 	ret float %a
diff --git a/test/CodeGen/PTX/mul.ll b/test/CodeGen/PTX/mul.ll
index 93f94e3..2093556 100644
--- a/test/CodeGen/PTX/mul.ll
+++ b/test/CodeGen/PTX/mul.ll
@@ -11,28 +11,28 @@
 ;}
 
 define ptx_device float @t1_f32(float %x, float %y) {
-; CHECK: mul.f32 r0, r1, r2
+; CHECK: mul.rn.f32 r0, r1, r2
 ; CHECK-NEXT: ret;
   %z = fmul float %x, %y
   ret float %z
 }
 
 define ptx_device double @t1_f64(double %x, double %y) {
-; CHECK: mul.f64 rd0, rd1, rd2
+; CHECK: mul.rn.f64 rd0, rd1, rd2
 ; CHECK-NEXT: ret;
   %z = fmul double %x, %y
   ret double %z
 }
 
 define ptx_device float @t2_f32(float %x) {
-; CHECK: mul.f32 r0, r1, 0F40A00000;
+; CHECK: mul.rn.f32 r0, r1, 0F40A00000;
 ; CHECK-NEXT: ret;
   %z = fmul float %x, 5.0
   ret float %z
 }
 
 define ptx_device double @t2_f64(double %x) {
-; CHECK: mul.f64 rd0, rd1, 0D4014000000000000;
+; CHECK: mul.rn.f64 rd0, rd1, 0D4014000000000000;
 ; CHECK-NEXT: ret;
   %z = fmul double %x, 5.0
   ret double %z
diff --git a/test/CodeGen/PTX/options.ll b/test/CodeGen/PTX/options.ll
index 92effa6..0fb6602 100644
--- a/test/CodeGen/PTX/options.ll
+++ b/test/CodeGen/PTX/options.ll
@@ -5,6 +5,8 @@
 ; RUN: llc < %s -march=ptx32 -mattr=sm10 | grep ".target sm_10"
 ; RUN: llc < %s -march=ptx32 -mattr=sm13 | grep ".target sm_13"
 ; RUN: llc < %s -march=ptx32 -mattr=sm20 | grep ".target sm_20"
+; RUN: llc < %s -march=ptx32 -mattr=ptx23 | grep ".address_size 32"
+; RUN: llc < %s -march=ptx64 -mattr=ptx23 | grep ".address_size 64"
 
 define ptx_device void @t1() {
 	ret void
diff --git a/test/CodeGen/PTX/setp.ll b/test/CodeGen/PTX/setp.ll
index 5836122..3e01a75 100644
--- a/test/CodeGen/PTX/setp.ll
+++ b/test/CodeGen/PTX/setp.ll
@@ -2,7 +2,7 @@
 
 define ptx_device i32 @test_setp_eq_u32_rr(i32 %x, i32 %y) {
 ; CHECK: setp.eq.u32 p0, r1, r2;
-; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
 ; CHECK-NEXT: ret;
 	%p = icmp eq i32 %x, %y
 	%z = zext i1 %p to i32
@@ -11,7 +11,7 @@ define ptx_device i32 @test_setp_eq_u32_rr(i32 %x, i32 %y) {
 
 define ptx_device i32 @test_setp_ne_u32_rr(i32 %x, i32 %y) {
 ; CHECK: setp.ne.u32 p0, r1, r2;
-; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
 ; CHECK-NEXT: ret;
 	%p = icmp ne i32 %x, %y
 	%z = zext i1 %p to i32
@@ -20,7 +20,7 @@ define ptx_device i32 @test_setp_ne_u32_rr(i32 %x, i32 %y) {
 
 define ptx_device i32 @test_setp_lt_u32_rr(i32 %x, i32 %y) {
 ; CHECK: setp.lt.u32 p0, r1, r2;
-; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
 ; CHECK-NEXT: ret;
 	%p = icmp ult i32 %x, %y
 	%z = zext i1 %p to i32
@@ -29,7 +29,7 @@ define ptx_device i32 @test_setp_lt_u32_rr(i32 %x, i32 %y) {
 
 define ptx_device i32 @test_setp_le_u32_rr(i32 %x, i32 %y) {
 ; CHECK: setp.le.u32 p0, r1, r2;
-; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
 ; CHECK-NEXT: ret;
 	%p = icmp ule i32 %x, %y
 	%z = zext i1 %p to i32
@@ -38,7 +38,7 @@ define ptx_device i32 @test_setp_le_u32_rr(i32 %x, i32 %y) {
 
 define ptx_device i32 @test_setp_gt_u32_rr(i32 %x, i32 %y) {
 ; CHECK: setp.gt.u32 p0, r1, r2;
-; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
 ; CHECK-NEXT: ret;
 	%p = icmp ugt i32 %x, %y
 	%z = zext i1 %p to i32
@@ -47,16 +47,52 @@ define ptx_device i32 @test_setp_gt_u32_rr(i32 %x, i32 %y) {
 
 define ptx_device i32 @test_setp_ge_u32_rr(i32 %x, i32 %y) {
 ; CHECK: setp.ge.u32 p0, r1, r2;
-; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
 ; CHECK-NEXT: ret;
 	%p = icmp uge i32 %x, %y
 	%z = zext i1 %p to i32
 	ret i32 %z
 }
 
+define ptx_device i32 @test_setp_lt_s32_rr(i32 %x, i32 %y) {
+; CHECK: setp.lt.s32 p0, r1, r2;
+; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
+; CHECK-NEXT: ret;
+	%p = icmp slt i32 %x, %y
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_le_s32_rr(i32 %x, i32 %y) {
+; CHECK: setp.le.s32 p0, r1, r2;
+; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
+; CHECK-NEXT: ret;
+	%p = icmp sle i32 %x, %y
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_gt_s32_rr(i32 %x, i32 %y) {
+; CHECK: setp.gt.s32 p0, r1, r2;
+; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
+; CHECK-NEXT: ret;
+	%p = icmp sgt i32 %x, %y
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_ge_s32_rr(i32 %x, i32 %y) {
+; CHECK: setp.ge.s32 p0, r1, r2;
+; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
+; CHECK-NEXT: ret;
+	%p = icmp sge i32 %x, %y
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
 define ptx_device i32 @test_setp_eq_u32_ri(i32 %x) {
 ; CHECK: setp.eq.u32 p0, r1, 1;
-; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
 ; CHECK-NEXT: ret;
 	%p = icmp eq i32 %x, 1
 	%z = zext i1 %p to i32
@@ -65,7 +101,7 @@ define ptx_device i32 @test_setp_eq_u32_ri(i32 %x) {
 
 define ptx_device i32 @test_setp_ne_u32_ri(i32 %x) {
 ; CHECK: setp.ne.u32 p0, r1, 1;
-; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
 ; CHECK-NEXT: ret;
 	%p = icmp ne i32 %x, 1
 	%z = zext i1 %p to i32
@@ -74,7 +110,7 @@ define ptx_device i32 @test_setp_ne_u32_ri(i32 %x) {
 
 define ptx_device i32 @test_setp_lt_u32_ri(i32 %x) {
 ; CHECK: setp.eq.u32 p0, r1, 0;
-; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
 ; CHECK-NEXT: ret;
 	%p = icmp ult i32 %x, 1
 	%z = zext i1 %p to i32
@@ -83,7 +119,7 @@ define ptx_device i32 @test_setp_lt_u32_ri(i32 %x) {
 
 define ptx_device i32 @test_setp_le_u32_ri(i32 %x) {
 ; CHECK: setp.lt.u32 p0, r1, 2;
-; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
 ; CHECK-NEXT: ret;
 	%p = icmp ule i32 %x, 1
 	%z = zext i1 %p to i32
@@ -92,7 +128,7 @@ define ptx_device i32 @test_setp_le_u32_ri(i32 %x) {
 
 define ptx_device i32 @test_setp_gt_u32_ri(i32 %x) {
 ; CHECK: setp.gt.u32 p0, r1, 1;
-; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
 ; CHECK-NEXT: ret;
 	%p = icmp ugt i32 %x, 1
 	%z = zext i1 %p to i32
@@ -101,17 +137,53 @@ define ptx_device i32 @test_setp_gt_u32_ri(i32 %x) {
 
 define ptx_device i32 @test_setp_ge_u32_ri(i32 %x) {
 ; CHECK: setp.ne.u32 p0, r1, 0;
-; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
 ; CHECK-NEXT: ret;
 	%p = icmp uge i32 %x, 1
 	%z = zext i1 %p to i32
 	ret i32 %z
 }
 
+define ptx_device i32 @test_setp_lt_s32_ri(i32 %x) {
+; CHECK: setp.lt.s32 p0, r1, 1;
+; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
+; CHECK-NEXT: ret;
+	%p = icmp slt i32 %x, 1
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_le_s32_ri(i32 %x) {
+; CHECK: setp.lt.s32 p0, r1, 2;
+; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
+; CHECK-NEXT: ret;
+	%p = icmp sle i32 %x, 1
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_gt_s32_ri(i32 %x) {
+; CHECK: setp.gt.s32 p0, r1, 1;
+; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
+; CHECK-NEXT: ret;
+	%p = icmp sgt i32 %x, 1
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_ge_s32_ri(i32 %x) {
+; CHECK: setp.gt.s32 p0, r1, 0;
+; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
+; CHECK-NEXT: ret;
+	%p = icmp sge i32 %x, 1
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
 define ptx_device i32 @test_setp_4_op_format_1(i32 %x, i32 %y, i32 %u, i32 %v) {
 ; CHECK: setp.gt.u32 p0, r3, r4;
 ; CHECK-NEXT: setp.eq.and.u32 p0, r1, r2, p0;
-; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
 ; CHECK-NEXT: ret;
 	%c = icmp eq i32 %x, %y
 	%d = icmp ugt i32 %u, %v
@@ -121,9 +193,9 @@ define ptx_device i32 @test_setp_4_op_format_1(i32 %x, i32 %y, i32 %u, i32 %v) {
 }
 
 define ptx_device i32 @test_setp_4_op_format_2(i32 %x, i32 %y, i32 %w) {
-; CHECK: cvt.pred.u32 p0, r3;
+; CHECK: setp.gt.b32 p0, r3, 0;
 ; CHECK-NEXT: setp.eq.and.u32 p0, r1, r2, !p0;
-; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
 ; CHECK-NEXT: ret;
 	%c = trunc i32 %w to i1
 	%d = icmp eq i32 %x, %y
diff --git a/test/CodeGen/PTX/sub.ll b/test/CodeGen/PTX/sub.ll
index 9efeaac..4d55280 100644
--- a/test/CodeGen/PTX/sub.ll
+++ b/test/CodeGen/PTX/sub.ll
@@ -22,14 +22,14 @@ define ptx_device i64 @t1_u64(i64 %x, i64 %y) {
 }
 
 define ptx_device float @t1_f32(float %x, float %y) {
-; CHECK: sub.f32 r0, r1, r2
+; CHECK: sub.rn.f32 r0, r1, r2
 ; CHECK-NEXT: ret;
   %z = fsub float %x, %y
   ret float %z
 }
 
 define ptx_device double @t1_f64(double %x, double %y) {
-; CHECK: sub.f64 rd0, rd1, rd2
+; CHECK: sub.rn.f64 rd0, rd1, rd2
 ; CHECK-NEXT: ret;
   %z = fsub double %x, %y
   ret double %z
@@ -57,14 +57,14 @@ define ptx_device i64 @t2_u64(i64 %x) {
 }
 
 define ptx_device float @t2_f32(float %x) {
-; CHECK: add.f32 r0, r1, 0FBF800000;
+; CHECK: add.rn.f32 r0, r1, 0FBF800000;
 ; CHECK-NEXT: ret;
   %z = fsub float %x, 1.0
   ret float %z
 }
 
 define ptx_device double @t2_f64(double %x) {
-; CHECK: add.f64 rd0, rd1, 0DBFF0000000000000;
+; CHECK: add.rn.f64 rd0, rd1, 0DBFF0000000000000;
 ; CHECK-NEXT: ret;
   %z = fsub double %x, 1.0
   ret double %z
diff --git a/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll b/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
index 7b00ac6..c779288 100644
--- a/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
+++ b/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
@@ -7,7 +7,6 @@ entry:
 	%temp = alloca i32, align 4		; <i32*> [#uses=2]
 	%ctz_x = alloca i32, align 4		; <i32*> [#uses=3]
 	%ctz_c = alloca i32, align 4		; <i32*> [#uses=2]
-	"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store i32 61440, i32* %ctz_x
 	%tmp = load i32* %ctz_x		; <i32> [#uses=1]
 	%tmp1 = sub i32 0, %tmp		; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll b/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
index 42f2152..c141551 100644
--- a/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
+++ b/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
@@ -13,7 +13,6 @@ entry:
 	%i_addr = alloca i32		; <i32*> [#uses=2]
 	%q_addr = alloca i32		; <i32*> [#uses=2]
 	%retval = alloca i32, align 4		; <i32*> [#uses=1]
-	"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store i32 %i, i32* %i_addr
 	store i32 %q, i32* %q_addr
 	%tmp = load i32* %i_addr		; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2008-10-30-IllegalShift.ll b/test/CodeGen/PowerPC/2008-10-30-IllegalShift.ll
deleted file mode 100644
index 83f3f6f..0000000
--- a/test/CodeGen/PowerPC/2008-10-30-IllegalShift.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s -march=ppc32
-; PR2986
-@argc = external global i32		; <i32*> [#uses=1]
-@buffer = external global [32 x i8], align 4		; <[32 x i8]*> [#uses=1]
-
-define void @test1() nounwind noinline {
-entry:
-	%0 = load i32* @argc, align 4		; <i32> [#uses=1]
-	%1 = trunc i32 %0 to i8		; <i8> [#uses=1]
-	tail call void @llvm.memset.i32(i8* getelementptr ([32 x i8]* @buffer, i32 0, i32 0), i8 %1, i32 17, i32 4)
-	unreachable
-}
-
-declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
diff --git a/test/CodeGen/PowerPC/2009-11-15-ReMatBug.ll b/test/CodeGen/PowerPC/2009-11-15-ReMatBug.ll
deleted file mode 100644
index 54f4b2e..0000000
--- a/test/CodeGen/PowerPC/2009-11-15-ReMatBug.ll
+++ /dev/null
@@ -1,155 +0,0 @@
-; RUN: llc < %s -mtriple=powerpc-apple-darwin8
-
-%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
-%struct.__gcov_var = type { %struct.FILE*, i32, i32, i32, i32, i32, i32, [1025 x i32] }
-%struct.__sFILEX = type opaque
-%struct.__sbuf = type { i8*, i32 }
-%struct.gcov_ctr_info = type { i32, i64*, void (i64*, i32)* }
-%struct.gcov_ctr_summary = type { i32, i32, i64, i64, i64 }
-%struct.gcov_fn_info = type { i32, i32, [0 x i32] }
-%struct.gcov_info = type { i32, %struct.gcov_info*, i32, i8*, i32, %struct.gcov_fn_info*, i32, [0 x %struct.gcov_ctr_info] }
-%struct.gcov_summary = type { i32, [1 x %struct.gcov_ctr_summary] }
-
-@__gcov_var = external global %struct.__gcov_var  ; <%struct.__gcov_var*> [#uses=1]
-@__sF = external global [0 x %struct.FILE]        ; <[0 x %struct.FILE]*> [#uses=1]
-@.str = external constant [56 x i8], align 4      ; <[56 x i8]*> [#uses=1]
-@gcov_list = external global %struct.gcov_info*   ; <%struct.gcov_info**> [#uses=1]
-@.str7 = external constant [35 x i8], align 4     ; <[35 x i8]*> [#uses=1]
-@.str8 = external constant [9 x i8], align 4      ; <[9 x i8]*> [#uses=1]
-@.str9 = external constant [10 x i8], align 4     ; <[10 x i8]*> [#uses=1]
-@.str10 = external constant [36 x i8], align 4    ; <[36 x i8]*> [#uses=1]
-
-declare i32 @"\01_fprintf$LDBL128"(%struct.FILE*, i8*, ...) nounwind
-
-define void @gcov_exit() nounwind {
-entry:
-  %gi_ptr.0357 = load %struct.gcov_info** @gcov_list, align 4 ; <%struct.gcov_info*> [#uses=1]
-  %0 = alloca i8, i32 undef, align 1              ; <i8*> [#uses=3]
-  br i1 undef, label %return, label %bb.nph341
-
-bb.nph341:                                        ; preds = %entry
-  %object27 = bitcast %struct.gcov_summary* undef to i8* ; <i8*> [#uses=1]
-  br label %bb25
-
-bb25:                                             ; preds = %read_fatal, %bb.nph341
-  %gi_ptr.1329 = phi %struct.gcov_info* [ %gi_ptr.0357, %bb.nph341 ], [ undef, %read_fatal ] ; <%struct.gcov_info*> [#uses=1]
-  call void @llvm.memset.i32(i8* %object27, i8 0, i32 36, i32 8)
-  br i1 undef, label %bb49.1, label %bb48
-
-bb48:                                             ; preds = %bb25
-  br label %bb49.1
-
-bb51:                                             ; preds = %bb48.4, %bb49.3
-  switch i32 undef, label %bb58 [
-    i32 0, label %rewrite
-    i32 1734567009, label %bb59
-  ]
-
-bb58:                                             ; preds = %bb51
-  %1 = call i32 (%struct.FILE*, i8*, ...)* @"\01_fprintf$LDBL128"(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE]* @__sF, i32 0, i32 2), i8* getelementptr inbounds ([35 x i8]* @.str7, i32 0, i32 0), i8* %0) nounwind ; <i32> [#uses=0]
-  br label %read_fatal
-
-bb59:                                             ; preds = %bb51
-  br i1 undef, label %bb60, label %bb3.i156
-
-bb3.i156:                                         ; preds = %bb59
-  store i8 52, i8* undef, align 1
-  store i8 42, i8* undef, align 1
-  %2 = call i32 (%struct.FILE*, i8*, ...)* @"\01_fprintf$LDBL128"(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE]* @__sF, i32 0, i32 2), i8* getelementptr inbounds ([56 x i8]* @.str, i32 0, i32 0), i8* %0, i8* undef, i8* undef) nounwind ; <i32> [#uses=0]
-  br label %read_fatal
-
-bb60:                                             ; preds = %bb59
-  br i1 undef, label %bb78.preheader, label %rewrite
-
-bb78.preheader:                                   ; preds = %bb60
-  br i1 undef, label %bb62, label %bb80
-
-bb62:                                             ; preds = %bb78.preheader
-  br i1 undef, label %bb64, label %read_mismatch
-
-bb64:                                             ; preds = %bb62
-  br i1 undef, label %bb65, label %read_mismatch
-
-bb65:                                             ; preds = %bb64
-  br i1 undef, label %bb75, label %read_mismatch
-
-read_mismatch:                                    ; preds = %bb98, %bb119.preheader, %bb72, %bb71, %bb65, %bb64, %bb62
-  %3 = icmp eq i32 undef, -1                      ; <i1> [#uses=1]
-  %iftmp.11.0 = select i1 %3, i8* getelementptr inbounds ([10 x i8]* @.str9, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8]* @.str8, i32 0, i32 0) ; <i8*> [#uses=1]
-  %4 = call i32 (%struct.FILE*, i8*, ...)* @"\01_fprintf$LDBL128"(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE]* @__sF, i32 0, i32 2), i8* getelementptr inbounds ([36 x i8]* @.str10, i32 0, i32 0), i8* %0, i8* %iftmp.11.0) nounwind ; <i32> [#uses=0]
-  br label %read_fatal
-
-bb71:                                             ; preds = %bb75
-  %5 = load i32* undef, align 4                   ; <i32> [#uses=1]
-  %6 = getelementptr inbounds %struct.gcov_info* %gi_ptr.1329, i32 0, i32 7, i32 undef, i32 2 ; <void (i64*, i32)**> [#uses=1]
-  %7 = load void (i64*, i32)** %6, align 4        ; <void (i64*, i32)*> [#uses=1]
-  %8 = call i32 @__gcov_read_unsigned() nounwind  ; <i32> [#uses=1]
-  %9 = call i32 @__gcov_read_unsigned() nounwind  ; <i32> [#uses=1]
-  %10 = icmp eq i32 %tmp386, %8                   ; <i1> [#uses=1]
-  br i1 %10, label %bb72, label %read_mismatch
-
-bb72:                                             ; preds = %bb71
-  %11 = icmp eq i32 undef, %9                     ; <i1> [#uses=1]
-  br i1 %11, label %bb73, label %read_mismatch
-
-bb73:                                             ; preds = %bb72
-  call void %7(i64* null, i32 %5) nounwind
-  unreachable
-
-bb74:                                             ; preds = %bb75
-  %12 = add i32 %13, 1                            ; <i32> [#uses=1]
-  br label %bb75
-
-bb75:                                             ; preds = %bb74, %bb65
-  %13 = phi i32 [ %12, %bb74 ], [ 0, %bb65 ]      ; <i32> [#uses=2]
-  %tmp386 = add i32 0, 27328512                   ; <i32> [#uses=1]
-  %14 = shl i32 1, %13                            ; <i32> [#uses=1]
-  %15 = load i32* undef, align 4                  ; <i32> [#uses=1]
-  %16 = and i32 %15, %14                          ; <i32> [#uses=1]
-  %17 = icmp eq i32 %16, 0                        ; <i1> [#uses=1]
-  br i1 %17, label %bb74, label %bb71
-
-bb80:                                             ; preds = %bb78.preheader
-  unreachable
-
-read_fatal:                                       ; preds = %read_mismatch, %bb3.i156, %bb58
-  br i1 undef, label %return, label %bb25
-
-rewrite:                                          ; preds = %bb60, %bb51
-  store i32 -1, i32* getelementptr inbounds (%struct.__gcov_var* @__gcov_var, i32 0, i32 6), align 4
-  br i1 undef, label %bb94, label %bb119.preheader
-
-bb94:                                             ; preds = %rewrite
-  unreachable
-
-bb119.preheader:                                  ; preds = %rewrite
-  br i1 undef, label %read_mismatch, label %bb98
-
-bb98:                                             ; preds = %bb119.preheader
-  br label %read_mismatch
-
-return:                                           ; preds = %read_fatal, %entry
-  ret void
-
-bb49.1:                                           ; preds = %bb48, %bb25
-  br i1 undef, label %bb49.2, label %bb48.2
-
-bb49.2:                                           ; preds = %bb48.2, %bb49.1
-  br i1 undef, label %bb49.3, label %bb48.3
-
-bb48.2:                                           ; preds = %bb49.1
-  br label %bb49.2
-
-bb49.3:                                           ; preds = %bb48.3, %bb49.2
-  br i1 undef, label %bb51, label %bb48.4
-
-bb48.3:                                           ; preds = %bb49.2
-  br label %bb49.3
-
-bb48.4:                                           ; preds = %bb49.3
-  br label %bb51
-}
-
-declare i32 @__gcov_read_unsigned() nounwind
-
-declare void @llvm.memset.i32(i8* nocapture, i8, i32, i32) nounwind
diff --git a/test/CodeGen/PowerPC/atomic-1.ll b/test/CodeGen/PowerPC/atomic-1.ll
index ec4e42d..a2cf170 100644
--- a/test/CodeGen/PowerPC/atomic-1.ll
+++ b/test/CodeGen/PowerPC/atomic-1.ll
@@ -1,21 +1,23 @@
 ; RUN: llc < %s -march=ppc32 | grep lwarx  | count 3
 ; RUN: llc < %s -march=ppc32 | grep stwcx. | count 4
 
-define i32 @exchange_and_add(i32* %mem, i32 %val) nounwind  {
-	%tmp = call i32 @llvm.atomic.load.add.i32( i32* %mem, i32 %val )
-	ret i32 %tmp
+define i32 @exchange_and_add(i32* %mem, i32 %val) nounwind {
+  %tmp = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %mem, i32 %val)
+  ret i32 %tmp
 }
 
-define i32 @exchange_and_cmp(i32* %mem) nounwind  {
-       	%tmp = call i32 @llvm.atomic.cmp.swap.i32( i32* %mem, i32 0, i32 1 )
-	ret i32 %tmp
+define i32 @exchange_and_cmp(i32* %mem) nounwind {
+  %tmp = call i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* %mem, i32 0, i32 1)
+  ret i32 %tmp
 }
 
-define i32 @exchange(i32* %mem, i32 %val) nounwind  {
-	%tmp = call i32 @llvm.atomic.swap.i32( i32* %mem, i32 1 )
-	ret i32 %tmp
+define i32 @exchange(i32* %mem, i32 %val) nounwind {
+  %tmp = call i32 @llvm.atomic.swap.i32.p0i32(i32* %mem, i32 1)
+  ret i32 %tmp
 }
 
-declare i32 @llvm.atomic.load.add.i32(i32*, i32) nounwind 
-declare i32 @llvm.atomic.cmp.swap.i32(i32*, i32, i32) nounwind 
-declare i32 @llvm.atomic.swap.i32(i32*, i32) nounwind 
+declare i32 @llvm.atomic.load.add.i32.p0i32(i32* nocapture, i32) nounwind
+
+declare i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* nocapture, i32, i32) nounwind
+
+declare i32 @llvm.atomic.swap.i32.p0i32(i32* nocapture, i32) nounwind
diff --git a/test/CodeGen/PowerPC/atomic-2.ll b/test/CodeGen/PowerPC/atomic-2.ll
index 6d9daef..0fa2a29 100644
--- a/test/CodeGen/PowerPC/atomic-2.ll
+++ b/test/CodeGen/PowerPC/atomic-2.ll
@@ -1,21 +1,23 @@
 ; RUN: llc < %s -march=ppc64 | grep ldarx  | count 3
 ; RUN: llc < %s -march=ppc64 | grep stdcx. | count 4
 
-define i64 @exchange_and_add(i64* %mem, i64 %val) nounwind  {
-	%tmp = call i64 @llvm.atomic.load.add.i64( i64* %mem, i64 %val )
-	ret i64 %tmp
+define i64 @exchange_and_add(i64* %mem, i64 %val) nounwind {
+  %tmp = call i64 @llvm.atomic.load.add.i64.p0i64(i64* %mem, i64 %val)
+  ret i64 %tmp
 }
 
-define i64 @exchange_and_cmp(i64* %mem) nounwind  {
-       	%tmp = call i64 @llvm.atomic.cmp.swap.i64( i64* %mem, i64 0, i64 1 )
-	ret i64 %tmp
+define i64 @exchange_and_cmp(i64* %mem) nounwind {
+  %tmp = call i64 @llvm.atomic.cmp.swap.i64.p0i64(i64* %mem, i64 0, i64 1)
+  ret i64 %tmp
 }
 
-define i64 @exchange(i64* %mem, i64 %val) nounwind  {
-	%tmp = call i64 @llvm.atomic.swap.i64( i64* %mem, i64 1 )
-	ret i64 %tmp
+define i64 @exchange(i64* %mem, i64 %val) nounwind {
+  %tmp = call i64 @llvm.atomic.swap.i64.p0i64(i64* %mem, i64 1)
+  ret i64 %tmp
 }
 
-declare i64 @llvm.atomic.load.add.i64(i64*, i64) nounwind 
-declare i64 @llvm.atomic.cmp.swap.i64(i64*, i64, i64) nounwind 
-declare i64 @llvm.atomic.swap.i64(i64*, i64) nounwind 
+declare i64 @llvm.atomic.load.add.i64.p0i64(i64* nocapture, i64) nounwind
+
+declare i64 @llvm.atomic.cmp.swap.i64.p0i64(i64* nocapture, i64, i64) nounwind
+
+declare i64 @llvm.atomic.swap.i64.p0i64(i64* nocapture, i64) nounwind
diff --git a/test/CodeGen/PowerPC/invalid-memcpy.ll b/test/CodeGen/PowerPC/invalid-memcpy.ll
deleted file mode 100644
index 3b1f306..0000000
--- a/test/CodeGen/PowerPC/invalid-memcpy.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: llc < %s -march=ppc32
-; RUN: llc < %s -march=ppc64
-
-; This testcase is invalid (the alignment specified for memcpy is 
-; greater than the alignment guaranteed for Qux or C.0.1173, but it
-; should compile, not crash the code generator.
-
-@C.0.1173 = external constant [33 x i8]         ; <[33 x i8]*> [#uses=1]
-
-define void @Bork() {
-entry:
-        %Qux = alloca [33 x i8]         ; <[33 x i8]*> [#uses=1]
-        %Qux1 = bitcast [33 x i8]* %Qux to i8*          ; <i8*> [#uses=1]
-        call void @llvm.memcpy.i64( i8* %Qux1, i8* getelementptr ([33 x i8]* @C.0.1173, i32 0, i32 0), i64 33, i32 8 )
-        ret void
-}
-
-declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
-
-
diff --git a/test/CodeGen/PowerPC/multiple-return-values.ll b/test/CodeGen/PowerPC/multiple-return-values.ll
deleted file mode 100644
index b9317f9..0000000
--- a/test/CodeGen/PowerPC/multiple-return-values.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s -march=ppc32
-; RUN: llc < %s -march=ppc64
-
-define {i64, float} @bar(i64 %a, float %b) {
-        %y = add i64 %a, 7
-        %z = fadd float %b, 7.0
-	ret i64 %y, float %z
-}
-
-define i64 @foo() {
-	%M = call {i64, float} @bar(i64 21, float 21.0)
-        %N = getresult {i64, float} %M, 0
-        %O = getresult {i64, float} %M, 1
-        %P = fptosi float %O to i64
-        %Q = add i64 %P, %N
-	ret i64 %Q
-}
diff --git a/test/CodeGen/PowerPC/ppc64-32bit-addic.ll b/test/CodeGen/PowerPC/ppc64-32bit-addic.ll
new file mode 100644
index 0000000..4d323da
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-32bit-addic.ll
@@ -0,0 +1,29 @@
+; Check that the ADDIC optimizations are not applied on PPC64
+; RUN: llc < %s | FileCheck %s
+; ModuleID = 'os_unix.c'
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-freebsd9.0"
+
+define i32 @notZero(i32 %call) nounwind {
+entry:
+; CHECK-NOT: addic
+  %not.tobool = icmp ne i32 %call, 0
+  %. = zext i1 %not.tobool to i32
+  ret i32 %.
+}
+
+define i32 @isMinusOne(i32 %call) nounwind {
+entry:
+; CHECK-NOT: addic
+  %not.tobool = icmp eq i32 %call, -1
+  %. = zext i1 %not.tobool to i32
+  ret i32 %.
+}
+
+define i32 @isNotMinusOne(i32 %call) nounwind {
+entry:
+; CHECK-NOT: addic
+  %not.tobool = icmp ne i32 %call, -1
+  %. = zext i1 %not.tobool to i32
+  ret i32 %.
+}
diff --git a/test/CodeGen/PowerPC/ppc64-crash.ll b/test/CodeGen/PowerPC/ppc64-crash.ll
new file mode 100644
index 0000000..073c322
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-crash.ll
@@ -0,0 +1,14 @@
+; RUN: llc %s -o -
+
+; ModuleID = 'undo.c'
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-freebsd"
+
+%struct.__sFILE = type {}
+%struct.pos_T = type { i64 }
+
+; check that we're not copying stuff between R and X registers
+define internal void @serialize_pos(%struct.pos_T* byval %pos, %struct.__sFILE* %fp) nounwind {
+entry:
+  ret void
+}
diff --git a/test/CodeGen/SPARC/private.ll b/test/CodeGen/SPARC/private.ll
index f091aa6..f06ccd0 100644
--- a/test/CodeGen/SPARC/private.ll
+++ b/test/CodeGen/SPARC/private.ll
@@ -6,8 +6,6 @@
 ; RUN: grep .baz: %t
 ; RUN: grep ld.*\.baz %t
 
-declare void @foo()
-
 define private void @foo() {
         ret void
 }
diff --git a/test/CodeGen/Thumb/dyn-stackalloc.ll b/test/CodeGen/Thumb/dyn-stackalloc.ll
index cd35be6..f3f0834 100644
--- a/test/CodeGen/Thumb/dyn-stackalloc.ll
+++ b/test/CodeGen/Thumb/dyn-stackalloc.ll
@@ -60,7 +60,7 @@ define void @t2(%struct.comment* %vc, i8* %tag, i8* %contents) {
 	%tmp9 = call i8* @strcpy( i8* %tmp6, i8* %tag )
 	%tmp6.len = call i32 @strlen( i8* %tmp6 )
 	%tmp6.indexed = getelementptr i8* %tmp6, i32 %tmp6.len
-	call void @llvm.memcpy.i32( i8* %tmp6.indexed, i8* getelementptr ([2 x i8]* @str215, i32 0, i32 0), i32 2, i32 1 )
+	call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp6.indexed, i8* getelementptr inbounds ([2 x i8]* @str215, i32 0, i32 0), i32 2, i32 1, i1 false)
 	%tmp15 = call i8* @strcat( i8* %tmp6, i8* %contents )
 	call fastcc void @comment_add( %struct.comment* %vc, i8* %tmp6 )
 	ret void
@@ -72,6 +72,6 @@ declare i8* @strcat(i8*, i8*)
 
 declare fastcc void @comment_add(%struct.comment*, i8*)
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
 declare i8* @strcpy(i8*, i8*)
diff --git a/test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll b/test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll
index 41b3029..0b56103 100644
--- a/test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll
+++ b/test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll
@@ -1,10 +1,10 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim
 
-	type { void (%"struct.xalanc_1_8::FormatterToXML"*, i16)*, i32 }		; type %0
-	type { void (%"struct.xalanc_1_8::FormatterToXML"*, i16*)*, i32 }		; type %1
-	type { void (%"struct.xalanc_1_8::FormatterToXML"*, %"struct.xalanc_1_8::XalanDOMString"*)*, i32 }		; type %2
-	type { void (%"struct.xalanc_1_8::FormatterToXML"*, i16*, i32, i32)*, i32 }		; type %3
-	type { void (%"struct.xalanc_1_8::FormatterToXML"*)*, i32 }		; type %4
+	%0 = type { void (%"struct.xalanc_1_8::FormatterToXML"*, i16)*, i32 }		; type %0
+	%1 = type { void (%"struct.xalanc_1_8::FormatterToXML"*, i16*)*, i32 }		; type %1
+	%2 = type { void (%"struct.xalanc_1_8::FormatterToXML"*, %"struct.xalanc_1_8::XalanDOMString"*)*, i32 }		; type %2
+	%3 = type { void (%"struct.xalanc_1_8::FormatterToXML"*, i16*, i32, i32)*, i32 }		; type %3
+	%4 = type { void (%"struct.xalanc_1_8::FormatterToXML"*)*, i32 }		; type %4
 	%"struct.std::CharVectorType" = type { %"struct.std::_Vector_base<char,std::allocator<char> >" }
 	%"struct.std::_Bit_const_iterator" = type { %"struct.std::_Bit_iterator_base" }
 	%"struct.std::_Bit_iterator_base" = type { i32*, i32 }
diff --git a/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll b/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll
index b832637..28ac28b 100644
--- a/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll
+++ b/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll
@@ -1,10 +1,10 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim
 
-	type { %struct.GAP }		; type %0
-	type { i16, i8, i8 }		; type %1
-	type { [2 x i32], [2 x i32] }		; type %2
-	type { %struct.rec* }		; type %3
-	type { i8, i8, i16, i8, i8, i8, i8 }		; type %4
+	%0 = type { %struct.GAP }		; type %0
+	%1 = type { i16, i8, i8 }		; type %1
+	%2 = type { [2 x i32], [2 x i32] }		; type %2
+	%3 = type { %struct.rec* }		; type %3
+	%4 = type { i8, i8, i16, i8, i8, i8, i8 }		; type %4
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
 	%struct.FILE_POS = type { i8, i8, i16, i32 }
 	%struct.FIRST_UNION = type { %struct.FILE_POS }
diff --git a/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll b/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll
index 02fad4b..88accf8 100644
--- a/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll
+++ b/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll
@@ -1,11 +1,11 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim -O3
 
-	type { i16, i8, i8 }		; type %0
-	type { [2 x i32], [2 x i32] }		; type %1
-	type { %struct.GAP }		; type %2
-	type { %struct.rec* }		; type %3
-	type { i8, i8, i16, i8, i8, i8, i8 }		; type %4
-	type { i8, i8, i8, i8 }		; type %5
+	%0 = type { i16, i8, i8 }		; type %0
+	%1 = type { [2 x i32], [2 x i32] }		; type %1
+	%2 = type { %struct.GAP }		; type %2
+	%3 = type { %struct.rec* }		; type %3
+	%4 = type { i8, i8, i16, i8, i8, i8, i8 }		; type %4
+	%5 = type { i8, i8, i8, i8 }		; type %5
 	%struct.COMPOSITE = type { i8, i16, i16 }
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
 	%struct.FILE_POS = type { i8, i8, i16, i32 }
diff --git a/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll b/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll
index bfea003..779e100 100644
--- a/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll
+++ b/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll
@@ -1,4 +1,3 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | not grep fcpys
 ; rdar://7117307
 
@@ -13,7 +12,7 @@ entry:
 	br i1 undef, label %bb, label %bb6.preheader
 
 bb6.preheader:		; preds = %entry
-	call void @llvm.memcpy.i32(i8* undef, i8* undef, i32 12, i32 4)
+        call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* undef, i32 12, i32 4, i1 false)
 	br i1 undef, label %bb15, label %bb13
 
 bb:		; preds = %entry
@@ -31,4 +30,4 @@ bb15:		; preds = %bb13, %bb6.preheader
 	ret void
 }
 
-declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/Thumb2/ldr-str-imm12.ll b/test/CodeGen/Thumb2/ldr-str-imm12.ll
index e1932bd..4597ba5 100644
--- a/test/CodeGen/Thumb2/ldr-str-imm12.ll
+++ b/test/CodeGen/Thumb2/ldr-str-imm12.ll
@@ -22,7 +22,7 @@
 
 define %union.rec* @Manifest(%union.rec* %x, %union.rec* %env, %struct.STYLE* %style, %union.rec** %bthr, %union.rec** %fthr, %union.rec** %target, %union.rec** %crs, i32 %ok, i32 %need_expand, %union.rec** %enclose, i32 %fcr) nounwind {
 entry:
-; CHECK:       ldr.w	{{(r[0-9]+)|(lr)}}, [r7, #28]
+; CHECK:       ldr{{(.w)?}}	{{(r[0-9]+)|(lr)}}, [r7, #28]
   %xgaps.i = alloca [32 x %union.rec*], align 4   ; <[32 x %union.rec*]*> [#uses=0]
   %ycomp.i = alloca [32 x %union.rec*], align 4   ; <[32 x %union.rec*]*> [#uses=0]
   br label %bb20
diff --git a/test/CodeGen/X86/2004-02-12-Memcpy.ll b/test/CodeGen/X86/2004-02-12-Memcpy.ll
deleted file mode 100644
index f15a1b4..0000000
--- a/test/CodeGen/X86/2004-02-12-Memcpy.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep movs | count 1
-
-@A = global [32 x i32] zeroinitializer
-@B = global [32 x i32] zeroinitializer
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-define void @main() nounwind {
-  ; dword copy
-  call void @llvm.memcpy.i32(i8* bitcast ([32 x i32]* @A to i8*),
-                           i8* bitcast ([32 x i32]* @B to i8*),
-                           i32 128, i32 4 )
-
-  ; word copy
-  call void @llvm.memcpy.i32( i8* bitcast ([32 x i32]* @A to i8*),
-                           i8* bitcast ([32 x i32]* @B to i8*),
-                           i32 128, i32 2 )
-
-  ; byte copy
-  call void @llvm.memcpy.i32( i8* bitcast ([32 x i32]* @A to i8*),
-                           i8* bitcast ([32 x i32]* @B to i8*),
-                            i32 128, i32 1 )
-
-  ret void
-}
diff --git a/test/CodeGen/X86/2006-11-28-Memcpy.ll b/test/CodeGen/X86/2006-11-28-Memcpy.ll
deleted file mode 100644
index 8c1573f..0000000
--- a/test/CodeGen/X86/2006-11-28-Memcpy.ll
+++ /dev/null
@@ -1,34 +0,0 @@
-; PR1022, PR1023
-; RUN: llc < %s -march=x86 | grep -- -573785174 | count 2
-; RUN: llc < %s -march=x86 | grep -E {movl	_?bytes2} | count 1
-
-@fmt = constant [4 x i8] c"%x\0A\00"            ; <[4 x i8]*> [#uses=2]
-@bytes = constant [4 x i8] c"\AA\BB\CC\DD"              ; <[4 x i8]*> [#uses=1]
-@bytes2 = global [4 x i8] c"\AA\BB\CC\DD"               ; <[4 x i8]*> [#uses=1]
-
-define i32 @test1() nounwind {
-        %y = alloca i32         ; <i32*> [#uses=2]
-        %c = bitcast i32* %y to i8*             ; <i8*> [#uses=1]
-        %z = getelementptr [4 x i8]* @bytes, i32 0, i32 0               ; <i8*> [#uses=1]
-        call void @llvm.memcpy.i32( i8* %c, i8* %z, i32 4, i32 1 )
-        %r = load i32* %y               ; <i32> [#uses=1]
-        %t = bitcast [4 x i8]* @fmt to i8*              ; <i8*> [#uses=1]
-        %tmp = call i32 (i8*, ...)* @printf( i8* %t, i32 %r )           ; <i32> [#uses=0]
-        ret i32 0
-}
-
-define void @test2() nounwind {
-        %y = alloca i32         ; <i32*> [#uses=2]
-        %c = bitcast i32* %y to i8*             ; <i8*> [#uses=1]
-        %z = getelementptr [4 x i8]* @bytes2, i32 0, i32 0              ; <i8*> [#uses=1]
-        call void @llvm.memcpy.i32( i8* %c, i8* %z, i32 4, i32 1 )
-        %r = load i32* %y               ; <i32> [#uses=1]
-        %t = bitcast [4 x i8]* @fmt to i8*              ; <i8*> [#uses=1]
-        %tmp = call i32 (i8*, ...)* @printf( i8* %t, i32 %r )           ; <i32> [#uses=0]
-        ret void
-}
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-declare i32 @printf(i8*, ...)
-
diff --git a/test/CodeGen/X86/2007-03-16-InlineAsm.ll b/test/CodeGen/X86/2007-03-16-InlineAsm.ll
index 9580726..3bd6d59 100644
--- a/test/CodeGen/X86/2007-03-16-InlineAsm.ll
+++ b/test/CodeGen/X86/2007-03-16-InlineAsm.ll
@@ -9,7 +9,6 @@ entry:
 	%retval = alloca i32, align 4		; <i32*> [#uses=2]
 	%tmp = alloca i32, align 4		; <i32*> [#uses=2]
 	%ret = alloca i32, align 4		; <i32*> [#uses=2]
-	"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store i32 %A, i32* %A_addr
 	store i32 %B, i32* %B_addr
 	%tmp1 = load i32* %A_addr		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2007-05-05-Personality.ll b/test/CodeGen/X86/2007-05-05-Personality.ll
index 0f49d2e..d1fc70d 100644
--- a/test/CodeGen/X86/2007-05-05-Personality.ll
+++ b/test/CodeGen/X86/2007-05-05-Personality.ll
@@ -3,35 +3,35 @@
 ; CHECK: .cfi_personality 0, __gnat_eh_personality
 ; CHECK: .cfi_lsda 0, .Lexception0
 
-@error = external global i8		; <i8*> [#uses=2]
+@error = external global i8
 
 define void @_ada_x() {
 entry:
-	invoke void @raise( )
-			to label %eh_then unwind label %unwind
-
-unwind:		; preds = %entry
-	%eh_ptr = tail call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
-	%eh_select = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), i8* @error )		; <i32> [#uses=1]
-	%eh_typeid = tail call i32 @llvm.eh.typeid.for.i32( i8* @error )		; <i32> [#uses=1]
-	%tmp2 = icmp eq i32 %eh_select, %eh_typeid		; <i1> [#uses=1]
-	br i1 %tmp2, label %eh_then, label %Unwind
-
-eh_then:		; preds = %unwind, %entry
-	ret void
-
-Unwind:		; preds = %unwind
-	tail call i32 (...)* @_Unwind_Resume( i8* %eh_ptr )		; <i32>:0 [#uses=0]
-	unreachable
+  invoke void @raise()
+          to label %eh_then unwind label %unwind
+
+unwind:                                           ; preds = %entry
+  %eh_ptr = tail call i8* @llvm.eh.exception()
+  %eh_select = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), i8* @error)
+  %eh_typeid = tail call i32 @llvm.eh.typeid.for(i8* @error)
+  %tmp2 = icmp eq i32 %eh_select, %eh_typeid
+  br i1 %tmp2, label %eh_then, label %Unwind
+
+eh_then:                                          ; preds = %unwind, %entry
+  ret void
+
+Unwind:                                           ; preds = %unwind
+  %0 = tail call i32 (...)* @_Unwind_Resume(i8* %eh_ptr)
+  unreachable
 }
 
 declare void @raise()
 
-declare i8* @llvm.eh.exception()
+declare i8* @llvm.eh.exception() nounwind readonly
 
-declare i32 @llvm.eh.selector.i32(i8*, i8*, ...)
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
 
-declare i32 @llvm.eh.typeid.for.i32(i8*)
+declare i32 @llvm.eh.typeid.for(i8*) nounwind
 
 declare i32 @__gnat_eh_personality(...)
 
diff --git a/test/CodeGen/X86/2007-06-05-LSR-Dominator.ll b/test/CodeGen/X86/2007-06-05-LSR-Dominator.ll
deleted file mode 100644
index 36a97ef..0000000
--- a/test/CodeGen/X86/2007-06-05-LSR-Dominator.ll
+++ /dev/null
@@ -1,129 +0,0 @@
-; PR1495
-; RUN: llc < %s -march=x86
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "i686-pc-linux-gnu"
-	%struct.AVRational = type { i32, i32 }
-	%struct.FFTComplex = type { float, float }
-	%struct.FFTContext = type { i32, i32, i16*, %struct.FFTComplex*, %struct.FFTComplex*, void (%struct.FFTContext*, %struct.FFTComplex*)*, void (%struct.MDCTContext*, float*, float*, float*)* }
-	%struct.MDCTContext = type { i32, i32, float*, float*, %struct.FFTContext }
-	%struct.Minima = type { i32, i32, i32, i32 }
-	%struct.codebook_t = type { i32, i8*, i32*, i32, float, float, i32, i32, i32*, float*, float* }
-	%struct.floor_class_t = type { i32, i32, i32, i32* }
-	%struct.floor_t = type { i32, i32*, i32, %struct.floor_class_t*, i32, i32, i32, %struct.Minima* }
-	%struct.mapping_t = type { i32, i32*, i32*, i32*, i32, i32*, i32* }
-	%struct.residue_t = type { i32, i32, i32, i32, i32, i32, [8 x i8]*, [2 x float]* }
-	%struct.venc_context_t = type { i32, i32, [2 x i32], [2 x %struct.MDCTContext], [2 x float*], i32, float*, float*, float*, float*, float, i32, %struct.codebook_t*, i32, %struct.floor_t*, i32, %struct.residue_t*, i32, %struct.mapping_t*, i32, %struct.AVRational* }
-
-define fastcc i32 @put_main_header(%struct.venc_context_t* %venc, i8** %out) {
-entry:
-	br i1 false, label %bb1820, label %bb288.bb148_crit_edge
-
-bb288.bb148_crit_edge:		; preds = %entry
-	ret i32 0
-
-cond_next1712:		; preds = %bb1820.bb1680_crit_edge
-	ret i32 0
-
-bb1817:		; preds = %bb1820.bb1680_crit_edge
-	br label %bb1820
-
-bb1820:		; preds = %bb1817, %entry
-	%pb.1.50 = phi i32 [ %tmp1693, %bb1817 ], [ 8, %entry ]		; <i32> [#uses=3]
-	br i1 false, label %bb2093, label %bb1820.bb1680_crit_edge
-
-bb1820.bb1680_crit_edge:		; preds = %bb1820
-	%tmp1693 = add i32 %pb.1.50, 8		; <i32> [#uses=2]
-	%tmp1702 = icmp slt i32 %tmp1693, 0		; <i1> [#uses=1]
-	br i1 %tmp1702, label %cond_next1712, label %bb1817
-
-bb2093:		; preds = %bb1820
-	%tmp2102 = add i32 %pb.1.50, 65		; <i32> [#uses=0]
-	%tmp2236 = add i32 %pb.1.50, 72		; <i32> [#uses=1]
-	%tmp2237 = sdiv i32 %tmp2236, 8		; <i32> [#uses=2]
-	br i1 false, label %bb2543, label %bb2536.bb2396_crit_edge
-
-bb2536.bb2396_crit_edge:		; preds = %bb2093
-	ret i32 0
-
-bb2543:		; preds = %bb2093
-	br i1 false, label %cond_next2576, label %bb2690
-
-cond_next2576:		; preds = %bb2543
-	ret i32 0
-
-bb2682:		; preds = %bb2690
-	ret i32 0
-
-bb2690:		; preds = %bb2543
-	br i1 false, label %bb2682, label %bb2698
-
-bb2698:		; preds = %bb2690
-	br i1 false, label %cond_next2726, label %bb2831
-
-cond_next2726:		; preds = %bb2698
-	ret i32 0
-
-bb2831:		; preds = %bb2698
-	br i1 false, label %cond_next2859, label %bb2964
-
-cond_next2859:		; preds = %bb2831
-	br i1 false, label %bb2943, label %cond_true2866
-
-cond_true2866:		; preds = %cond_next2859
-	br i1 false, label %cond_true2874, label %cond_false2897
-
-cond_true2874:		; preds = %cond_true2866
-	ret i32 0
-
-cond_false2897:		; preds = %cond_true2866
-	ret i32 0
-
-bb2943:		; preds = %cond_next2859
-	ret i32 0
-
-bb2964:		; preds = %bb2831
-	br i1 false, label %cond_next2997, label %bb4589
-
-cond_next2997:		; preds = %bb2964
-	ret i32 0
-
-bb3103:		; preds = %bb4589
-	ret i32 0
-
-bb4589:		; preds = %bb2964
-	br i1 false, label %bb3103, label %bb4597
-
-bb4597:		; preds = %bb4589
-	br i1 false, label %cond_next4630, label %bb4744
-
-cond_next4630:		; preds = %bb4597
-	br i1 false, label %bb4744, label %cond_true4724
-
-cond_true4724:		; preds = %cond_next4630
-	br i1 false, label %bb4736, label %bb7531
-
-bb4736:		; preds = %cond_true4724
-	ret i32 0
-
-bb4744:		; preds = %cond_next4630, %bb4597
-	ret i32 0
-
-bb7531:		; preds = %cond_true4724
-	%v_addr.023.0.i6 = add i32 %tmp2237, -255		; <i32> [#uses=1]
-	br label %bb.i14
-
-bb.i14:		; preds = %bb.i14, %bb7531
-	%n.021.0.i8 = phi i32 [ 0, %bb7531 ], [ %indvar.next, %bb.i14 ]		; <i32> [#uses=2]
-	%tmp..i9 = mul i32 %n.021.0.i8, -255		; <i32> [#uses=1]
-	%tmp5.i11 = add i32 %v_addr.023.0.i6, %tmp..i9		; <i32> [#uses=1]
-	%tmp10.i12 = icmp ugt i32 %tmp5.i11, 254		; <i1> [#uses=1]
-	%indvar.next = add i32 %n.021.0.i8, 1		; <i32> [#uses=1]
-	br i1 %tmp10.i12, label %bb.i14, label %bb12.loopexit.i18
-
-bb12.loopexit.i18:		; preds = %bb.i14
-	call void @llvm.memcpy.i32( i8* null, i8* null, i32 %tmp2237, i32 1 )
-	ret i32 0
-}
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
diff --git a/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll b/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll
index c68628d..2b56b4e 100644
--- a/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll
+++ b/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll
@@ -362,7 +362,7 @@ bb1159:		; preds = %cond_next1150
 
 cond_true1169:		; preds = %bb1159
 	%tmp11741175 = trunc i64 %lsum.11225.0 to i32		; <i32> [#uses=1]
-	%tmp1178 = tail call i32 (%struct._IO_FILE* noalias , i8* noalias , ...)* @fprintf( %struct._IO_FILE* noalias %file  , i8* getelementptr ([49 x i8]* @.str32, i32 0, i64 0)  , i32 %tmp11741175, i32 0 )		; <i32> [#uses=0]
+	%tmp1178 = tail call i32 (%struct._IO_FILE*  , i8*  , ...)* @fprintf( %struct._IO_FILE* noalias %file  , i8* getelementptr ([49 x i8]* @.str32, i32 0, i64 0)  , i32 %tmp11741175, i32 0 )		; <i32> [#uses=0]
 	ret void
 
 UnifiedReturnBlock:		; preds = %bb1159
diff --git a/test/CodeGen/X86/2008-06-04-MemCpyLoweringBug.ll b/test/CodeGen/X86/2008-06-04-MemCpyLoweringBug.ll
deleted file mode 100644
index 236b7cd..0000000
--- a/test/CodeGen/X86/2008-06-04-MemCpyLoweringBug.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim | grep subl | grep 24
-
-	%struct.argument_t = type { i8*, %struct.argument_t*, i32, %struct.ipc_type_t*, i32, void (...)*, void (...)*, void (...)*, void (...)*, void (...)*, i8*, i8*, i8*, i8*, i8*, i32, i32, i32, %struct.routine*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, i32, i32, i32, i32, i32, i32 }
-	%struct.ipc_type_t = type { i8*, %struct.ipc_type_t*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, i32, i32, i32, i32, %struct.ipc_type_t*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8* }
-	%struct.routine = type opaque
-@"\01LC" = external constant [11 x i8]		; <[11 x i8]*> [#uses=1]
-
-define i8* @InArgMsgField(%struct.argument_t* %arg, i8* %str) nounwind  {
-entry:
-	%who = alloca [20 x i8]		; <[20 x i8]*> [#uses=1]
-	%who1 = getelementptr [20 x i8]* %who, i32 0, i32 0		; <i8*> [#uses=2]
-	call void @llvm.memset.i32( i8* %who1, i8 0, i32 20, i32 1 )
-	call void @llvm.memcpy.i32( i8* %who1, i8* getelementptr ([11 x i8]* @"\01LC", i32 0, i32 0), i32 11, i32 1 )
-	unreachable
-}
-
-declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind 
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
diff --git a/test/CodeGen/X86/2008-10-27-StackRealignment.ll b/test/CodeGen/X86/2008-10-27-StackRealignment.ll
index 3d0766c..a57f716 100644
--- a/test/CodeGen/X86/2008-10-27-StackRealignment.ll
+++ b/test/CodeGen/X86/2008-10-27-StackRealignment.ll
@@ -11,12 +11,12 @@ target triple = "i386-pc-linux-gnu"
   
 define void @foo(i32 %t) nounwind {
   %tmp1210 = alloca i8, i32 32, align 4
-  call void @llvm.memset.i64(i8* %tmp1210, i8 0, i64 32, i32 4)
-  
+  call void @llvm.memset.p0i8.i64(i8* %tmp1210, i8 0, i64 32, i32 4, i1 false)
   %x = alloca i8, i32 %t
   call void @dummy(i8* %x)
   ret void
 }
 
-declare void @dummy(i8* %x)
-declare void @llvm.memset.i64(i8*, i8, i64, i32) nounwind
+declare void @dummy(i8*)
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/X86/2009-01-25-NoSSE.ll b/test/CodeGen/X86/2009-01-25-NoSSE.ll
index 0583ef1..8406c4a 100644
--- a/test/CodeGen/X86/2009-01-25-NoSSE.ll
+++ b/test/CodeGen/X86/2009-01-25-NoSSE.ll
@@ -3,18 +3,18 @@
 target datalayout =
 "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
-        %struct.ktermios = type { i32, i32, i32, i32, i8, [19 x i8], i32, i32 }
+
+%struct.ktermios = type { i32, i32, i32, i32, i8, [19 x i8], i32, i32 }
 
 define void @foo() nounwind {
 entry:
-        %termios = alloca %struct.ktermios, align 8
-        %termios1 = bitcast %struct.ktermios* %termios to i8*
-        call void @llvm.memset.i64(i8* %termios1, i8 0, i64 44, i32 8)
-        call void @bar(%struct.ktermios* %termios) nounwind
-        ret void
+  %termios = alloca %struct.ktermios, align 8
+  %termios1 = bitcast %struct.ktermios* %termios to i8*
+  call void @llvm.memset.p0i8.i64(i8* %termios1, i8 0, i64 44, i32 8, i1 false)
+  call void @bar(%struct.ktermios* %termios) nounwind
+  ret void
 }
 
-declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
-
 declare void @bar(%struct.ktermios*)
 
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/X86/2009-04-09-InlineAsmCrash.ll b/test/CodeGen/X86/2009-04-09-InlineAsmCrash.ll
deleted file mode 100644
index 97bbd93..0000000
--- a/test/CodeGen/X86/2009-04-09-InlineAsmCrash.ll
+++ /dev/null
@@ -1,165 +0,0 @@
-; RUN: llc < %s 
-; rdar://6774324
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin10.0"
-	type <{ i32, %1 }>		; type %0
-	type <{ [216 x i8] }>		; type %1
-	type <{ %3, %4*, %28*, i64, i32, %6, %6, i32, i32, i32, i32, void (i8*, i32)*, i8*, %29*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x i8*], i32, %30, i32, %24, %4*, %4*, i64, i64, i32, i32, void (i32, %2*)*, i32, i32, i32, i32, i32, i32, i32, i32, %24, i64, i64, i64, i64, i64, %21, i32, i32, %21, i32, %31*, %3, %33, %34, %9*, i32, i32, %3, %3, %35, %41*, %42*, %11, i32, i32, i32, i8, i8, i8, i8, %69*, %69, %9*, %9*, [11 x %61], %3, i8*, i32, i64, i64, i32, i32, i32, i64 }>		; type %2
-	type <{ %3*, %3* }>		; type %3
-	type <{ %3, i32, %2*, %2*, %2*, %5*, i32, i32, %21, i64, i64, i64, i32, %22, %9*, %6, %4*, %23 }>		; type %4
-	type <{ %3, %3, %4*, %4*, i32, %6, %9*, %9*, %5*, %20* }>		; type %5
-	type <{ %7, i16, i8, i8, %8 }>		; type %6
-	type <{ i32 }>		; type %7
-	type <{ i8*, i8*, [2 x i32], i16, i8, i8, i8*, i8, i8, i8, i8, i8* }>		; type %8
-	type <{ %10, %13, %15, i32, i32, i32, i32, %9*, %9*, %16*, i32, %17*, i64, i32 }>		; type %9
-	type <{ i32, i32, %11 }>		; type %10
-	type <{ %12 }>		; type %11
-	type <{ [12 x i8] }>		; type %12
-	type <{ %14 }>		; type %13
-	type <{ [40 x i8] }>		; type %14
-	type <{ [4 x i8] }>		; type %15
-	type <{ %15, %15 }>		; type %16
-	type <{ %17*, %17*, %9*, i32, %18*, %19* }>		; type %17
-	type opaque		; type %18
-	type <{ i32, i32, %9*, %9*, i32, i32 }>		; type %19
-	type <{ %5*, %20*, %20*, %20* }>		; type %20
-	type <{ %3, %3*, void (i8*, i8*)*, i8*, i8*, i64 }>		; type %21
-	type <{ i32, [4 x i32], i32, i32, [128 x %3] }>		; type %22
-	type <{ %24, %24, %24, %24*, %24*, %24*, %25, %26, %27, i32, i32, i8* }>		; type %23
-	type <{ i64, i32, i32, i32 }>		; type %24
-	type <{ i32, i32 }>		; type %25
-	type <{ i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i64, i64, i64, i32, i32 }>		; type %26
-	type <{ [16 x %17*], i32 }>		; type %27
-	type <{ i8, i8, i8, i8, %7, %3 }>		; type %28
-	type <{ i32, %11*, i8*, i8*, %11* }>		; type %29
-	type <{ i32, i32, i32, i32, i64 }>		; type %30
-	type <{ %32*, %3, %3, i32, i32, i32, %5* }>		; type %31
-	type opaque		; type %32
-	type <{ [44 x i8] }>		; type %33
-	type <{ %17* }>		; type %34
-	type <{ %36, %36*, i32, [4 x %40], i32, i32, i64, i32 }>		; type %35
-	type <{ i8*, %0*, %37*, i64, %39, i32, %39, %6, i64, i64, i8*, i32 }>		; type %36
-	type <{ i32, i32, i8, i8, i8, i8, i8, i8, i8, i8, %38 }>		; type %37
-	type <{ i16, i16, i8, i8, i16, i32, i16, i16, i32, i16, i16, i32, i32, [8 x [8 x i16]], [8 x [16 x i16]], [96 x i8] }>		; type %38
-	type <{ i8, i8, i8, i8, i8, i8, i8, i8 }>		; type %39
-	type <{ i64 }>		; type %40
-	type <{ %11, i32, i32, i32, %42*, %3, i8*, %3, %5*, %32*, i32, i32, i32, i32, i32, i32, i32, %59, %60, i64, i64, i32, %11, %9*, %9*, %9*, [11 x %61], %9*, %9*, %9*, %9*, %9*, [3 x %9*], %62*, %3, %3, i32, i32, %9*, %9*, i32, %67*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, %68*, [2 x i32], i64, i64, i32 }>		; type %41
-	type <{ %43, %44, %47*, i64, i64, i64, i32, %11, %54, %46*, %46*, i32, i32, i32, i32, i32, i32, i32 }>		; type %42
-	type <{ i16, i8, i8, i32, i32 }>		; type %43
-	type <{ %45, i32, i32 }>		; type %44
-	type <{ %46*, %46*, i64, i64 }>		; type %45
-	type <{ %45, %15, i64, i8, i8, i8, i8, i16, i16 }>		; type %46
-	type <{ i64*, i64, %48*, i32, i32, i32, %6, %53, i32, i64, i64*, i64*, %48*, %48*, %48*, i32 }>		; type %47
-	type <{ %3, %43, i64, %49*, i32, i32, i32, i32, %48*, %48*, i64, %50*, i64, %52*, i32, i16, i16, i8, i8, i8, i8, %3, %3, i64, i32, i32, i32, i8*, i32, i8, i8, i8, i8, %3 }>		; type %48
-	type <{ %3, %3, %49*, %48*, i64, i8, i8, i8, i8, i32, i8, i8, i8, i8 }>		; type %49
-	type <{ i32, %51* }>		; type %50
-	type <{ void (%50*)*, void (%50*)*, i32 (%50*, %52*, i32)*, i32 (%50*)*, i32 (%50*, i64, i32, i32, i32*)*, i32 (%50*, i64, i32, i64*, i32*, i32, i32, i32)*, i32 (%50*, i64, i32)*, i32 (%50*, i64, i64, i32)*, i32 (%50*, i64, i64, i32)*, i32 (%50*, i32)*, i32 (%50*)*, i8* }>		; type %51
-	type <{ i32, %48* }>		; type %52
-	type <{ i32, i32, i32 }>		; type %53
-	type <{ %11, %55*, i32, %53, i64 }>		; type %54
-	type <{ %3, i32, i32, i32, i32, i32, [64 x i8], %56 }>		; type %55
-	type <{ %57, %58, %58 }>		; type %56
-	type <{ i64, i64, i64, i64, i64 }>		; type %57
-	type <{ i64, i64, i64, i64, i64, i64, i64, i64 }>		; type %58
-	type <{ [2 x i32] }>		; type %59
-	type <{ [8 x i32] }>		; type %60
-	type <{ %9*, i32, i32, i32 }>		; type %61
-	type <{ %11, i32, %11, i32, i32, %63*, i32, %64*, %65, i32, i32, i32, i32, %41* }>		; type %62
-	type <{ %10*, i32, %15, %15 }>		; type %63
-	type opaque		; type %64
-	type <{ i32, %66*, %66*, %66**, %66*, %66** }>		; type %65
-	type <{ %63, i32, %62*, %66*, %66* }>		; type %66
-	type <{ i32, i32, [0 x %39] }>		; type %67
-	type opaque		; type %68
-	type <{ %69*, void (%69*, %2*)* }>		; type %69
-	type <{ %70*, %2*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i32, i32, i32, i32, i32, i32, i32, %71, i32, i32, i64, i64, i64, %72, i8*, i8*, %73, %4*, %79*, %81*, %39*, %84, i32, i32, i32, i8*, i32, i32, i32, i32, i32, i32, i32, i64*, i32, i64*, i8*, i32, [256 x i32], i64, i64, %86, %77*, i64, i64, %88*, %2*, %2* }>		; type %70
-	type <{ %3, i64, i32, i32 }>		; type %71
-	type <{ i64, i64, i64 }>		; type %72
-	type <{ %73*, %73*, %73*, %73*, %74*, %75*, %76*, %70*, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, [3 x %78*], i8*, i8* }>		; type %73
-	type <{ %74*, %74*, %75*, %76*, %73*, i32, i32, i32, i32, i32, i8*, i8* }>		; type %74
-	type <{ %75*, %73*, %74*, %76*, i32, i32, i32, i32, %78*, i8*, i8* }>		; type %75
-	type <{ %76*, %73*, %74*, %75*, i32, i32, i32, i32, i8*, i8*, %77* }>		; type %76
-	type opaque		; type %77
-	type <{ %78*, %75*, i8, i8, i8, i8, i16, i16, i16, i8, i8, i32, [0 x %73*] }>		; type %78
-	type <{ i32, i32, i32, [20 x %80] }>		; type %79
-	type <{ i64*, i8* }>		; type %80
-	type <{ [256 x %39], [19 x %39], i8, i8, i8, i8, i8, i8, i8, i8, %82, i8, i8, i8, i8, i8, i8, i8, i8, %82, %83 }>		; type %81
-	type <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, i16 }>		; type %82
-	type <{ [16 x i64], i64 }>		; type %83
-	type <{ %82*, %85, %85, %39*, i32 }>		; type %84
-	type <{ i16, %39* }>		; type %85
-	type <{ %87, i8* }>		; type %86
-	type <{ i32, i32, i32, i8, i8, i16, i32, i32, i32, i32, i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }>		; type %87
-	type <{ i64, i64, i32, i32, i32, i32 }>		; type %88
-	type <{ i32, i32, i32, i32, i32, i32, i32 }>		; type %89
-@kernel_stack_size = external global i32		; <i32*> [#uses=1]
-
-define void @test(%0*) nounwind {
-	%2 = tail call %2* asm sideeffect "mov %gs:${1:P},$0", "=r,i,~{dirflag},~{fpsr},~{flags}"(i32 ptrtoint (%2** getelementptr (%70* null, i32 0, i32 1) to i32)) nounwind		; <%2*> [#uses=1]
-	%3 = getelementptr %2* %2, i32 0, i32 15		; <i32*> [#uses=1]
-	%4 = load i32* %3		; <i32> [#uses=2]
-	%5 = icmp eq i32 %4, 0		; <i1> [#uses=1]
-	br i1 %5, label %47, label %6
-
-; <label>:6		; preds = %1
-	%7 = load i32* @kernel_stack_size		; <i32> [#uses=1]
-	%8 = add i32 %7, %4		; <i32> [#uses=1]
-	%9 = inttoptr i32 %8 to %89*		; <%89*> [#uses=12]
-	%10 = tail call %2* asm sideeffect "mov %gs:${1:P},$0", "=r,i,~{dirflag},~{fpsr},~{flags}"(i32 ptrtoint (%2** getelementptr (%70* null, i32 0, i32 1) to i32)) nounwind		; <%2*> [#uses=1]
-	%11 = getelementptr %2* %10, i32 0, i32 65, i32 1		; <%36**> [#uses=1]
-	%12 = load %36** %11		; <%36*> [#uses=1]
-	%13 = getelementptr %36* %12, i32 0, i32 1		; <%0**> [#uses=1]
-	%14 = load %0** %13		; <%0*> [#uses=1]
-	%15 = icmp eq %0* %14, %0		; <i1> [#uses=1]
-	br i1 %15, label %40, label %16
-
-; <label>:16		; preds = %6
-	%17 = getelementptr %0* %0, i32 0, i32 1		; <%1*> [#uses=1]
-	%18 = getelementptr %89* %9, i32 -1, i32 0		; <i32*> [#uses=1]
-	%19 = getelementptr %0* %0, i32 0, i32 1, i32 0, i32 32		; <i8*> [#uses=1]
-	%20 = bitcast i8* %19 to i32*		; <i32*> [#uses=1]
-	%21 = load i32* %20		; <i32> [#uses=1]
-	store i32 %21, i32* %18
-	%22 = getelementptr %89* %9, i32 -1, i32 1		; <i32*> [#uses=1]
-	%23 = ptrtoint %1* %17 to i32		; <i32> [#uses=1]
-	store i32 %23, i32* %22
-	%24 = getelementptr %89* %9, i32 -1, i32 2		; <i32*> [#uses=1]
-	%25 = getelementptr %0* %0, i32 0, i32 1, i32 0, i32 24		; <i8*> [#uses=1]
-	%26 = bitcast i8* %25 to i32*		; <i32*> [#uses=1]
-	%27 = load i32* %26		; <i32> [#uses=1]
-	store i32 %27, i32* %24
-	%28 = getelementptr %89* %9, i32 -1, i32 3		; <i32*> [#uses=1]
-	%29 = getelementptr %0* %0, i32 0, i32 1, i32 0, i32 16		; <i8*> [#uses=1]
-	%30 = bitcast i8* %29 to i32*		; <i32*> [#uses=1]
-	%31 = load i32* %30		; <i32> [#uses=1]
-	store i32 %31, i32* %28
-	%32 = getelementptr %89* %9, i32 -1, i32 4		; <i32*> [#uses=1]
-	%33 = getelementptr %0* %0, i32 0, i32 1, i32 0, i32 20		; <i8*> [#uses=1]
-	%34 = bitcast i8* %33 to i32*		; <i32*> [#uses=1]
-	%35 = load i32* %34		; <i32> [#uses=1]
-	store i32 %35, i32* %32
-	%36 = getelementptr %89* %9, i32 -1, i32 5		; <i32*> [#uses=1]
-	%37 = getelementptr %0* %0, i32 0, i32 1, i32 0, i32 56		; <i8*> [#uses=1]
-	%38 = bitcast i8* %37 to i32*		; <i32*> [#uses=1]
-	%39 = load i32* %38		; <i32> [#uses=1]
-	store i32 %39, i32* %36
-	ret void
-
-; <label>:40		; preds = %6
-	%41 = getelementptr %89* %9, i32 -1, i32 0		; <i32*> [#uses=1]
-	tail call void asm sideeffect "movl %ebx, $0", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* %41) nounwind
-	%42 = getelementptr %89* %9, i32 -1, i32 1		; <i32*> [#uses=1]
-	tail call void asm sideeffect "movl %esp, $0", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* %42) nounwind
-	%43 = getelementptr %89* %9, i32 -1, i32 2		; <i32*> [#uses=1]
-	tail call void asm sideeffect "movl %ebp, $0", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* %43) nounwind
-	%44 = getelementptr %89* %9, i32 -1, i32 3		; <i32*> [#uses=1]
-	tail call void asm sideeffect "movl %edi, $0", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* %44) nounwind
-	%45 = getelementptr %89* %9, i32 -1, i32 4		; <i32*> [#uses=1]
-	tail call void asm sideeffect "movl %esi, $0", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* %45) nounwind
-	%46 = getelementptr %89* %9, i32 -1, i32 5		; <i32*> [#uses=1]
-	tail call void asm sideeffect "movl $$1f, $0\0A1:", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* %46) nounwind
-	ret void
-
-; <label>:47		; preds = %1
-	ret void
-}
diff --git a/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll b/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll
index 27f11cf..b1222d1 100644
--- a/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll
+++ b/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll
@@ -2,7 +2,7 @@
 ; radr://6772169
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin10"
-	type { i32, i1 }		; type %0
+	%0 = type { i32, i1 }		; type %0
 
 declare %0 @llvm.sadd.with.overflow.i32(i32, i32) nounwind
 
diff --git a/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll b/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll
index ff8cf0a..3d70b58 100644
--- a/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll
+++ b/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll
@@ -2,7 +2,7 @@
 ; rdar://6781755
 ; PR3934
 
-	type { i32, i32 }		; type %0
+	%0 = type { i32, i32 }		; type %0
 
 define void @bn_sqr_comba8(i32* nocapture %r, i32* %a) nounwind {
 entry:
diff --git a/test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll b/test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll
deleted file mode 100644
index 0a2fcdb..0000000
--- a/test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll
+++ /dev/null
@@ -1,165 +0,0 @@
-; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | grep cmpxchgl | not grep eax
-; PR4076
-
-	type { i8, i8, i8 }		; type %0
-	type { i32, i8** }		; type %1
-	type { %3* }		; type %2
-	type { %4 }		; type %3
-	type { %5 }		; type %4
-	type { %6, i32, %7 }		; type %5
-	type { i8* }		; type %6
-	type { i32, [12 x i8] }		; type %7
-	type { %9 }		; type %8
-	type { %10, %11*, i8 }		; type %9
-	type { %11* }		; type %10
-	type { i32, %6, i8*, %12, %13*, i8, i32, %28, %29, i32, %30, i32, i32, i32, i8*, i8*, i8, i8 }		; type %11
-	type { %13* }		; type %12
-	type { %14, i32, %13*, %21 }		; type %13
-	type { %15, %16 }		; type %14
-	type { i32 (...)** }		; type %15
-	type { %17, i8* (i32)*, void (i8*)*, i8 }		; type %16
-	type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %18 }		; type %17
-	type { %19* }		; type %18
-	type { i32, %20**, i32, %20**, i8** }		; type %19
-	type { i32 (...)**, i32 }		; type %20
-	type { %22, %25*, i8, i8, %17*, %26*, %27*, %27* }		; type %21
-	type { i32 (...)**, i32, i32, i32, i32, i32, %23*, %24, [8 x %24], i32, %24*, %18 }		; type %22
-	type { %23*, void (i32, %22*, i32)*, i32, i32 }		; type %23
-	type { i8*, i32 }		; type %24
-	type { i32 (...)**, %21 }		; type %25
-	type { %20, i32*, i8, i32*, i32*, i16*, i8, [256 x i8], [256 x i8], i8 }		; type %26
-	type { %20 }		; type %27
-	type { void (%9*)*, i32 }		; type %28
-	type { %15* }		; type %29
-	type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8* }		; type %30
-@AtomicOps_Internalx86CPUFeatures = external global %0		; <%0*> [#uses=1]
-internal constant [19 x i8] c"xxxxxxxxxxxxxxxxxx\00"		; <[19 x i8]*>:0 [#uses=1]
-internal constant [47 x i8] c"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\00"		; <[47 x i8]*>:1 [#uses=1]
-
-define i8** @func6(i8 zeroext, i32, i32, %1*) nounwind {
-; <label>:4
-	%5 = alloca i32, align 4		; <i32*> [#uses=2]
-	%6 = alloca i32, align 4		; <i32*> [#uses=2]
-	%7 = alloca %2, align 8		; <%2*> [#uses=3]
-	%8 = alloca %8, align 8		; <%8*> [#uses=2]
-	br label %17
-
-; <label>:9		; preds = %17
-	%10 = getelementptr %1* %3, i32 %19, i32 0		; <i32*> [#uses=1]
-	%11 = load i32* %10, align 4		; <i32> [#uses=1]
-	%12 = icmp eq i32 %11, %2		; <i1> [#uses=1]
-	br i1 %12, label %13, label %16
-
-; <label>:13		; preds = %9
-	%14 = getelementptr %1* %3, i32 %19, i32 1		; <i8***> [#uses=1]
-	%15 = load i8*** %14, align 4		; <i8**> [#uses=1]
-	ret i8** %15
-
-; <label>:16		; preds = %9
-	%indvar.next13 = add i32 %18, 1		; <i32> [#uses=1]
-	br label %17
-
-; <label>:17		; preds = %16, %4
-	%18 = phi i32 [ 0, %4 ], [ %indvar.next13, %16 ]		; <i32> [#uses=2]
-	%19 = add i32 %18, %1		; <i32> [#uses=3]
-	%20 = icmp sgt i32 %19, 3		; <i1> [#uses=1]
-	br i1 %20, label %21, label %9
-
-; <label>:21		; preds = %17
-	call void @func5()
-	%22 = getelementptr %1* %3, i32 0, i32 0		; <i32*> [#uses=1]
-	%23 = load i32* %22, align 4		; <i32> [#uses=1]
-	%24 = icmp eq i32 %23, 0		; <i1> [#uses=1]
-	br i1 %24, label %._crit_edge, label %._crit_edge1
-
-._crit_edge1:		; preds = %._crit_edge1, %21
-	%25 = phi i32 [ 0, %21 ], [ %26, %._crit_edge1 ]		; <i32> [#uses=1]
-	%26 = add i32 %25, 1		; <i32> [#uses=4]
-	%27 = getelementptr %1* %3, i32 %26, i32 0		; <i32*> [#uses=1]
-	%28 = load i32* %27, align 4		; <i32> [#uses=1]
-	%29 = icmp ne i32 %28, 0		; <i1> [#uses=1]
-	%30 = icmp ne i32 %26, 4		; <i1> [#uses=1]
-	%31 = and i1 %29, %30		; <i1> [#uses=1]
-	br i1 %31, label %._crit_edge1, label %._crit_edge
-
-._crit_edge:		; preds = %._crit_edge1, %21
-	%32 = phi i32 [ 0, %21 ], [ %26, %._crit_edge1 ]		; <i32> [#uses=3]
-	%33 = call i8* @pthread_getspecific(i32 0) nounwind		; <i8*> [#uses=2]
-	%34 = icmp ne i8* %33, null		; <i1> [#uses=1]
-	%35 = icmp eq i8 %0, 0		; <i1> [#uses=1]
-	%36 = or i1 %34, %35		; <i1> [#uses=1]
-	br i1 %36, label %._crit_edge4, label %37
-
-; <label>:37		; preds = %._crit_edge
-	%38 = call i8* @func2(i32 2048)		; <i8*> [#uses=4]
-	call void @llvm.memset.i32(i8* %38, i8 0, i32 2048, i32 4)
-	%39 = call i32 @pthread_setspecific(i32 0, i8* %38) nounwind		; <i32> [#uses=2]
-	store i32 %39, i32* %5
-	store i32 0, i32* %6
-	%40 = icmp eq i32 %39, 0		; <i1> [#uses=1]
-	br i1 %40, label %41, label %43
-
-; <label>:41		; preds = %37
-	%42 = getelementptr %2* %7, i32 0, i32 0		; <%3**> [#uses=1]
-	store %3* null, %3** %42, align 8
-	br label %._crit_edge4
-
-; <label>:43		; preds = %37
-	%44 = call %3* @func1(i32* %5, i32* %6, i8* getelementptr ([47 x i8]* @1, i32 0, i32 0))		; <%3*> [#uses=2]
-	%45 = getelementptr %2* %7, i32 0, i32 0		; <%3**> [#uses=1]
-	store %3* %44, %3** %45, align 8
-	%46 = icmp eq %3* %44, null		; <i1> [#uses=1]
-	br i1 %46, label %._crit_edge4, label %47
-
-; <label>:47		; preds = %43
-	call void @func4(%8* %8, i8* getelementptr ([19 x i8]* @0, i32 0, i32 0), i32 165, %2* %7)
-	call void @func3(%8* %8) noreturn
-	unreachable
-
-._crit_edge4:		; preds = %43, %41, %._crit_edge
-	%48 = phi i8* [ %38, %41 ], [ %33, %._crit_edge ], [ %38, %43 ]		; <i8*> [#uses=2]
-	%49 = bitcast i8* %48 to i8**		; <i8**> [#uses=3]
-	%50 = icmp ne i8* %48, null		; <i1> [#uses=1]
-	%51 = icmp slt i32 %32, 4		; <i1> [#uses=1]
-	%52 = and i1 %50, %51		; <i1> [#uses=1]
-	br i1 %52, label %53, label %._crit_edge6
-
-; <label>:53		; preds = %._crit_edge4
-	%54 = getelementptr %1* %3, i32 %32, i32 0		; <i32*> [#uses=1]
-	%55 = call i32 asm sideeffect "lock; cmpxchgl $1,$2", "={ax},q,*m,0,~{dirflag},~{fpsr},~{flags},~{memory}"(i32 %2, i32* %54, i32 0) nounwind		; <i32> [#uses=1]
-	%56 = load i8* getelementptr (%0* @AtomicOps_Internalx86CPUFeatures, i32 0, i32 0), align 8		; <i8> [#uses=1]
-	%57 = icmp eq i8 %56, 0		; <i1> [#uses=1]
-	br i1 %57, label %._crit_edge7, label %58
-
-; <label>:58		; preds = %53
-	call void asm sideeffect "lfence", "~{dirflag},~{fpsr},~{flags},~{memory}"() nounwind
-	br label %._crit_edge7
-
-._crit_edge7:		; preds = %58, %53
-	%59 = icmp eq i32 %55, 0		; <i1> [#uses=1]
-	br i1 %59, label %60, label %._crit_edge6
-
-._crit_edge6:		; preds = %._crit_edge7, %._crit_edge4
-	ret i8** %49
-
-; <label>:60		; preds = %._crit_edge7
-	%61 = getelementptr %1* %3, i32 %32, i32 1		; <i8***> [#uses=1]
-	store i8** %49, i8*** %61, align 4
-	ret i8** %49
-}
-
-declare %3* @func1(i32* nocapture, i32* nocapture, i8*)
-
-declare void @func5()
-
-declare void @func4(%8*, i8*, i32, %2*)
-
-declare void @func3(%8*) noreturn
-
-declare i8* @pthread_getspecific(i32) nounwind
-
-declare i8* @func2(i32)
-
-declare void @llvm.memset.i32(i8* nocapture, i8, i32, i32) nounwind
-
-declare i32 @pthread_setspecific(i32, i8*) nounwind
diff --git a/test/CodeGen/X86/2009-04-29-LinearScanBug.ll b/test/CodeGen/X86/2009-04-29-LinearScanBug.ll
index 6843723..2fbf7aa 100644
--- a/test/CodeGen/X86/2009-04-29-LinearScanBug.ll
+++ b/test/CodeGen/X86/2009-04-29-LinearScanBug.ll
@@ -1,33 +1,33 @@
 ; RUN: llc < %s -mtriple=i386-apple-darwin10
 ; rdar://6837009
 
-	type { %struct.pf_state*, %struct.pf_state*, %struct.pf_state*, i32 }		; type %0
-	type { %2 }		; type %1
-	type { %struct.pf_addr, %struct.pf_addr }		; type %2
-	type { %struct.in6_addr }		; type %3
-	type { [4 x i32] }		; type %4
-	type { %struct.pfi_dynaddr*, [4 x i8] }		; type %5
-	type { %struct.pfi_dynaddr*, %struct.pfi_dynaddr** }		; type %6
-	type { %struct.pfr_ktable*, %struct.pfr_ktable*, %struct.pfr_ktable*, i32 }		; type %7
-	type { %struct.pfr_ktable* }		; type %8
-	type { i8* }		; type %9
-	type { %11 }		; type %10
-	type { i8*, i8*, %struct.radix_node* }		; type %11
-	type { [2 x %struct.pf_rulequeue], %13, %13 }		; type %12
-	type { %struct.pf_rulequeue*, %struct.pf_rule**, i32, i32, i32 }		; type %13
-	type { %struct.pf_anchor*, %struct.pf_anchor*, %struct.pf_anchor*, i32 }		; type %14
-	type { %struct.pfi_kif*, %struct.pfi_kif*, %struct.pfi_kif*, i32 }		; type %15
-	type { %struct.ifnet*, %struct.ifnet** }		; type %16
-	type { %18 }		; type %17
-	type { %struct.pkthdr, %19 }		; type %18
-	type { %struct.m_ext, [176 x i8] }		; type %19
-	type { %struct.ifmultiaddr*, %struct.ifmultiaddr** }		; type %20
-	type { i32, %22 }		; type %21
-	type { i8*, [4 x i8] }		; type %22
-	type { %struct.tcphdr* }		; type %23
-	type { %struct.pf_ike_state }		; type %24
-	type { %struct.pf_state_key*, %struct.pf_state_key*, %struct.pf_state_key*, i32 }		; type %25
-	type { %struct.pf_src_node*, %struct.pf_src_node*, %struct.pf_src_node*, i32 }		; type %26
+	%0 = type { %struct.pf_state*, %struct.pf_state*, %struct.pf_state*, i32 }
+	%1 = type { %2 }
+	%2 = type { %struct.pf_addr, %struct.pf_addr }
+	%3 = type { %struct.in6_addr }
+	%4 = type { [4 x i32] }
+	%5 = type { %struct.pfi_dynaddr*, [4 x i8] }
+	%6 = type { %struct.pfi_dynaddr*, %struct.pfi_dynaddr** }
+	%7 = type { %struct.pfr_ktable*, %struct.pfr_ktable*, %struct.pfr_ktable*, i32 }
+	%8 = type { %struct.pfr_ktable* }
+	%9 = type { i8* }
+	%10 = type { %11 }
+	%11 = type { i8*, i8*, %struct.radix_node* }
+	%12 = type { [2 x %struct.pf_rulequeue], %13, %13 }
+	%13 = type { %struct.pf_rulequeue*, %struct.pf_rule**, i32, i32, i32 }
+	%14 = type { %struct.pf_anchor*, %struct.pf_anchor*, %struct.pf_anchor*, i32 }
+	%15 = type { %struct.pfi_kif*, %struct.pfi_kif*, %struct.pfi_kif*, i32 }
+	%16 = type { %struct.ifnet*, %struct.ifnet** }
+	%17 = type { %18 }
+	%18 = type { %struct.pkthdr, %19 }
+	%19 = type { %struct.m_ext, [176 x i8] }
+	%20 = type { %struct.ifmultiaddr*, %struct.ifmultiaddr** }
+	%21 = type { i32, %22 }
+	%22 = type { i8*, [4 x i8] }
+	%23 = type { %struct.tcphdr* }
+	%24 = type { %struct.pf_ike_state }
+	%25 = type { %struct.pf_state_key*, %struct.pf_state_key*, %struct.pf_state_key*, i32 }
+	%26 = type { %struct.pf_src_node*, %struct.pf_src_node*, %struct.pf_src_node*, i32 }
 	%struct.anon = type { %struct.pf_state*, %struct.pf_state** }
 	%struct.au_mask_t = type { i32, i32 }
 	%struct.bpf_if = type opaque
diff --git a/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll b/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll
index d1f9cf8..e803d6b 100644
--- a/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll
+++ b/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll
@@ -1,9 +1,9 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -disable-fp-elim -relocation-model=pic
 ; PR4099
 
-	type { [62 x %struct.Bitvec*] }		; type %0
-	type { i8* }		; type %1
-	type { double }		; type %2
+	%0 = type { [62 x %struct.Bitvec*] }		; type %0
+	%1 = type { i8* }		; type %1
+	%2 = type { double }		; type %2
 	%struct..5sPragmaType = type { i8*, i32 }
 	%struct.AggInfo = type { i8, i8, i32, %struct.ExprList*, i32, %struct.AggInfo_col*, i32, i32, i32, %struct.AggInfo_func*, i32, i32 }
 	%struct.AggInfo_col = type { %struct.Table*, i32, i32, i32, i32, %struct.Expr* }
diff --git a/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll b/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll
index 9415732..3dcc0d4 100644
--- a/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll
+++ b/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll
@@ -1,9 +1,9 @@
 ; RUN: llc < %s -march=x86
 
-	type { %struct.GAP }		; type %0
-	type { i16, i8, i8 }		; type %1
-	type { [2 x i32], [2 x i32] }		; type %2
-	type { %struct.rec* }		; type %3
+	%0 = type { %struct.GAP }		; type %0
+	%1 = type { i16, i8, i8 }		; type %1
+	%2 = type { [2 x i32], [2 x i32] }		; type %2
+	%3 = type { %struct.rec* }		; type %3
 	%struct.FILE_POS = type { i8, i8, i16, i32 }
 	%struct.FIRST_UNION = type { %struct.FILE_POS }
 	%struct.FOURTH_UNION = type { %struct.STYLE }
diff --git a/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll b/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll
index b329c91..2080c0a 100644
--- a/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll
+++ b/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll
@@ -87,8 +87,6 @@ for.inc:		; preds = %for.inc, %lor.end.i, %lor.rhs.i, %land.lhs.true3.i
 	br label %for.inc
 }
 
-declare i32 @safe()
-
 define i32 @func_35(i8 signext %p_35) nounwind readonly {
 entry:
   %tobool = icmp eq i8 %p_35, 0                   ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll b/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll
index 6b0d6d9..bf668e3 100644
--- a/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll
+++ b/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll
@@ -2,8 +2,8 @@
 target triple = "x86_64-mingw"
 
 ; ModuleID = 'mm.bc'
-	type opaque		; type %0
-	type opaque		; type %1
+	%0 = type opaque		; type %0
+	%1 = type opaque		; type %1
 
 define internal fastcc float @computeMipmappingRho(%0* %shaderExecutionStatePtr, i32 %index, <4 x float> %texCoord, <4 x float> %texCoordDX, <4 x float> %texCoordDY) readonly {
 indexCheckBlock:
diff --git a/test/CodeGen/X86/2009-09-19-earlyclobber.ll b/test/CodeGen/X86/2009-09-19-earlyclobber.ll
index 4f44cae..66f5118 100644
--- a/test/CodeGen/X86/2009-09-19-earlyclobber.ll
+++ b/test/CodeGen/X86/2009-09-19-earlyclobber.ll
@@ -4,7 +4,7 @@
 ; Registers other than RAX, RCX are OK, but they must be different.
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin10.0"
-	type { i64, i64 }		; type %0
+	%0 = type { i64, i64 }		; type %0
 
 define i64 @flsst(i64 %find) nounwind ssp {
 entry:
diff --git a/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll b/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
index d33f93e..94075e7 100644
--- a/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
+++ b/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
@@ -16,7 +16,7 @@ bb1:
 ; CHECK: LBB0_1:
 ; CHECK: movaps %xmm0, (%rsp)
   %tmp2 = phi i32 [ %tmp3, %bb1 ], [ 0, %entry ]
-  call void @llvm.memcpy.i64(i8* %tmp1, i8* getelementptr inbounds ([28 x i8]* @str, i64 0, i64 0), i64 28, i32 1)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* getelementptr inbounds ([28 x i8]* @str, i64 0, i64 0), i64 28, i32 1, i1 false)
   %tmp3 = add i32 %tmp2, 1
   %tmp4 = icmp eq i32 %tmp3, %count
   br i1 %tmp4, label %bb2, label %bb1
@@ -25,4 +25,4 @@ bb2:
   ret void
 }
 
-declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/CodeGen/X86/2011-06-14-PreschedRegalias.ll b/test/CodeGen/X86/2011-06-14-PreschedRegalias.ll
index 0532375..114b985 100644
--- a/test/CodeGen/X86/2011-06-14-PreschedRegalias.ll
+++ b/test/CodeGen/X86/2011-06-14-PreschedRegalias.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86-64 -stress-sched | FileCheck %s
-; REQUIRES: Asserts
+; REQUIRES: asserts
 ; Test interference between physreg aliases during preRAsched.
 ; mul wants an operand in AL, but call clobbers it.
 
diff --git a/test/CodeGen/X86/bswap.ll b/test/CodeGen/X86/bswap.ll
index 0a72c1c..a7540aa 100644
--- a/test/CodeGen/X86/bswap.ll
+++ b/test/CodeGen/X86/bswap.ll
@@ -1,8 +1,6 @@
 ; bswap should be constant folded when it is passed a constant argument
 
-; RUN: llc < %s -march=x86 | \
-; RUN:   grep bswapl | count 3
-; RUN: llc < %s -march=x86 | grep rolw | count 1
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 declare i16 @llvm.bswap.i16(i16)
 
@@ -11,17 +9,51 @@ declare i32 @llvm.bswap.i32(i32)
 declare i64 @llvm.bswap.i64(i64)
 
 define i16 @W(i16 %A) {
+; CHECK: W:
+; CHECK: rolw $8, %ax
         %Z = call i16 @llvm.bswap.i16( i16 %A )         ; <i16> [#uses=1]
         ret i16 %Z
 }
 
 define i32 @X(i32 %A) {
+; CHECK: X:
+; CHECK: bswapl %eax
         %Z = call i32 @llvm.bswap.i32( i32 %A )         ; <i32> [#uses=1]
         ret i32 %Z
 }
 
 define i64 @Y(i64 %A) {
+; CHECK: Y:
+; CHECK: bswapl %eax
+; CHECK: bswapl %edx
         %Z = call i64 @llvm.bswap.i64( i64 %A )         ; <i64> [#uses=1]
         ret i64 %Z
 }
 
+; rdar://9164521
+define i32 @test1(i32 %a) nounwind readnone {
+entry:
+; CHECK: test1
+; CHECK: bswapl %eax
+; CHECK: shrl $16, %eax
+  %and = lshr i32 %a, 8
+  %shr3 = and i32 %and, 255
+  %and2 = shl i32 %a, 8
+  %shl = and i32 %and2, 65280
+  %or = or i32 %shr3, %shl
+  ret i32 %or
+}
+
+define i32 @test2(i32 %a) nounwind readnone {
+entry:
+; CHECK: test2
+; CHECK: bswapl %eax
+; CHECK: sarl $16, %eax
+  %and = lshr i32 %a, 8
+  %shr4 = and i32 %and, 255
+  %and2 = shl i32 %a, 8
+  %or = or i32 %shr4, %and2
+  %sext = shl i32 %or, 16
+  %conv3 = ashr exact i32 %sext, 16
+  ret i32 %conv3
+}
diff --git a/test/CodeGen/X86/coalescer-cross.ll b/test/CodeGen/X86/coalescer-cross.ll
index 976db64..3f1fec1 100644
--- a/test/CodeGen/X86/coalescer-cross.ll
+++ b/test/CodeGen/X86/coalescer-cross.ll
@@ -5,8 +5,8 @@
 ; CHECK: os_clock
 ; CHECK-NOT: movaps
 
-	type { %struct.TValue }		; type %0
-	type { %struct.L_Umaxalign, i32, %struct.Node* }		; type %1
+	%0 = type { %struct.TValue }		; type %0
+	%1 = type { %struct.L_Umaxalign, i32, %struct.Node* }		; type %1
 	%struct.CallInfo = type { %struct.TValue*, %struct.TValue*, %struct.TValue*, i32*, i32, i32 }
 	%struct.GCObject = type { %struct.lua_State }
 	%struct.L_Umaxalign = type { double }
diff --git a/test/CodeGen/X86/darwin-bzero.ll b/test/CodeGen/X86/darwin-bzero.ll
index a9573cf..3099526 100644
--- a/test/CodeGen/X86/darwin-bzero.ll
+++ b/test/CodeGen/X86/darwin-bzero.ll
@@ -1,8 +1,8 @@
 ; RUN: llc < %s -mtriple=i386-apple-darwin10 | grep __bzero
 
-declare void @llvm.memset.i32(i8*, i8, i32, i32)
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
 
 define void @foo(i8* %p, i32 %len) {
-  call void @llvm.memset.i32(i8* %p, i8 0, i32 %len, i32 1)
+  call void @llvm.memset.p0i8.i32(i8* %p, i8 0, i32 %len, i32 1, i1 false)
   ret void
 }
diff --git a/test/CodeGen/X86/fast-isel-bail.ll b/test/CodeGen/X86/fast-isel-bail.ll
index 9072c5c..a485827 100644
--- a/test/CodeGen/X86/fast-isel-bail.ll
+++ b/test/CodeGen/X86/fast-isel-bail.ll
@@ -3,7 +3,7 @@
 ; This file is for regression tests for cases where FastISel needs
 ; to gracefully bail out and let SelectionDAGISel take over.
 
-	type { i64, i8* }		; type %0
+	%0 = type { i64, i8* }		; type %0
 
 declare void @bar(%0)
 
diff --git a/test/CodeGen/X86/fold-sext-trunc.ll b/test/CodeGen/X86/fold-sext-trunc.ll
index 2605123..b453310 100644
--- a/test/CodeGen/X86/fold-sext-trunc.ll
+++ b/test/CodeGen/X86/fold-sext-trunc.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86-64 | grep movslq | count 1
 ; PR4050
 
-	type { i64 }		; type %0
+	%0 = type { i64 }		; type %0
 	%struct.S1 = type { i16, i32 }
 @g_10 = external global %struct.S1		; <%struct.S1*> [#uses=2]
 
diff --git a/test/CodeGen/X86/fp-stack-2results.ll b/test/CodeGen/X86/fp-stack-2results.ll
index 321e267..e986e36 100644
--- a/test/CodeGen/X86/fp-stack-2results.ll
+++ b/test/CodeGen/X86/fp-stack-2results.ll
@@ -1,12 +1,16 @@
 ; RUN: llc < %s -march=x86 | grep fldz
 ; RUN: llc < %s -march=x86-64 | grep fld1
 
+%0 = type { x86_fp80, x86_fp80 }
+
 ; This is basically this code on x86-64:
 ; _Complex long double test() { return 1.0; }
 define {x86_fp80, x86_fp80} @test() {
   %A = fpext double 1.0 to x86_fp80
   %B = fpext double 0.0 to x86_fp80
-  ret x86_fp80 %A, x86_fp80 %B
+  %mrv = insertvalue %0 undef, x86_fp80 %A, 0
+  %mrv1 = insertvalue %0 %mrv, x86_fp80 %B, 1
+  ret %0 %mrv1
 }
 
 
@@ -16,16 +20,18 @@ define {x86_fp80, x86_fp80} @test() {
 ;	ret
 define {x86_fp80, x86_fp80} @test2() {
   %A = fpext double 1.0 to x86_fp80
-  ret x86_fp80 %A, x86_fp80 %A
+  %mrv = insertvalue %0 undef, x86_fp80 %A, 0
+  %mrv1 = insertvalue %0 %mrv, x86_fp80 %A, 1
+  ret %0 %mrv1
 }
 
 ; Uses both values.
 define void @call1(x86_fp80 *%P1, x86_fp80 *%P2) {
   %a = call {x86_fp80,x86_fp80} @test()
-  %b = getresult {x86_fp80,x86_fp80} %a, 0
+  %b = extractvalue {x86_fp80,x86_fp80} %a, 0
   store x86_fp80 %b, x86_fp80* %P1
 
-  %c = getresult {x86_fp80,x86_fp80} %a, 1
+  %c = extractvalue {x86_fp80,x86_fp80} %a, 1
   store x86_fp80 %c, x86_fp80* %P2
   ret void 
 }
@@ -33,10 +39,10 @@ define void @call1(x86_fp80 *%P1, x86_fp80 *%P2) {
 ; Uses both values, requires fxch
 define void @call2(x86_fp80 *%P1, x86_fp80 *%P2) {
   %a = call {x86_fp80,x86_fp80} @test()
-  %b = getresult {x86_fp80,x86_fp80} %a, 1
+  %b = extractvalue {x86_fp80,x86_fp80} %a, 1
   store x86_fp80 %b, x86_fp80* %P1
 
-  %c = getresult {x86_fp80,x86_fp80} %a, 0
+  %c = extractvalue {x86_fp80,x86_fp80} %a, 0
   store x86_fp80 %c, x86_fp80* %P2
   ret void
 }
@@ -44,7 +50,7 @@ define void @call2(x86_fp80 *%P1, x86_fp80 *%P2) {
 ; Uses ST(0), ST(1) is dead but must be popped.
 define void @call3(x86_fp80 *%P1, x86_fp80 *%P2) {
   %a = call {x86_fp80,x86_fp80} @test()
-  %b = getresult {x86_fp80,x86_fp80} %a, 0
+  %b = extractvalue {x86_fp80,x86_fp80} %a, 0
   store x86_fp80 %b, x86_fp80* %P1
   ret void 
 }
@@ -53,7 +59,7 @@ define void @call3(x86_fp80 *%P1, x86_fp80 *%P2) {
 define void @call4(x86_fp80 *%P1, x86_fp80 *%P2) {
   %a = call {x86_fp80,x86_fp80} @test()
 
-  %c = getresult {x86_fp80,x86_fp80} %a, 1
+  %c = extractvalue {x86_fp80,x86_fp80} %a, 1
   store x86_fp80 %c, x86_fp80* %P2
   ret void 
 }
diff --git a/test/CodeGen/X86/inline-asm-mrv.ll b/test/CodeGen/X86/inline-asm-mrv.ll
index 78d7e77..733205d 100644
--- a/test/CodeGen/X86/inline-asm-mrv.ll
+++ b/test/CodeGen/X86/inline-asm-mrv.ll
@@ -11,7 +11,7 @@ define i32 @test1(i8* %v, i8* %blk2, i8* %blk1, i32 %stride, i32 %h) nounwind  {
 	%tmp12 = sext i32 %stride to i64		; <i64> [#uses=1]
 	%mrv = call {i32, i8*, i8*} asm sideeffect "$0 $1 $2 $3 $4 $5 $6",
          "=r,=r,=r,r,r,r,r"( i64 %tmp12, i32 %h, i8* %blk1, i8* %blk2 ) nounwind
-        %tmp6 = getresult {i32, i8*, i8*} %mrv, 0
+        %tmp6 = extractvalue {i32, i8*, i8*} %mrv, 0
 	%tmp7 = call i32 asm sideeffect "set $0",
              "=r,~{dirflag},~{fpsr},~{flags}"( ) nounwind
 	ret i32 %tmp7
@@ -19,16 +19,16 @@ define i32 @test1(i8* %v, i8* %blk2, i8* %blk1, i32 %stride, i32 %h) nounwind  {
 
 define <4 x float> @test2() nounwind {
 	%mrv = call {<4 x float>, <4 x float>} asm "set $0, $1", "=x,=x"()
-	%a = getresult {<4 x float>, <4 x float>} %mrv, 0
-	%b = getresult {<4 x float>, <4 x float>} %mrv, 1
+	%a = extractvalue {<4 x float>, <4 x float>} %mrv, 0
+	%b = extractvalue {<4 x float>, <4 x float>} %mrv, 1
 	%c = fadd <4 x float> %a, %b
 	ret <4 x float> %c
 }
 
 define <4 x i32> @test3() nounwind {
 	%mrv = call {<4 x i32>, <4 x i32>} asm "set $0, $1", "=x,=x"()
-	%a = getresult {<4 x i32>, <4 x i32>} %mrv, 0
-	%b = getresult {<4 x i32>, <4 x i32>} %mrv, 1
+	%a = extractvalue {<4 x i32>, <4 x i32>} %mrv, 0
+	%b = extractvalue {<4 x i32>, <4 x i32>} %mrv, 1
 	%c = add <4 x i32> %a, %b
 	ret <4 x i32> %c
 }
diff --git a/test/CodeGen/X86/inline-asm-q-regs.ll b/test/CodeGen/X86/inline-asm-q-regs.ll
index ab44206..321fd30 100644
--- a/test/CodeGen/X86/inline-asm-q-regs.ll
+++ b/test/CodeGen/X86/inline-asm-q-regs.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86-64
 ; rdar://7066579
 
-	type { i64, i64, i64, i64, i64 }		; type %0
+	%0 = type { i64, i64, i64, i64, i64 }		; type %0
 
 define void @t() nounwind {
 entry:
diff --git a/test/CodeGen/X86/mem-promote-integers.ll b/test/CodeGen/X86/mem-promote-integers.ll
new file mode 100644
index 0000000..80103d1
--- /dev/null
+++ b/test/CodeGen/X86/mem-promote-integers.ll
@@ -0,0 +1,391 @@
+; Test the basic functionality of integer element promotions of different types.
+; This tests checks passing of arguments, loading and storing to memory and
+; basic arithmetic.
+; RUN: llc -march=x86 -promote-elements < %s
+; RUN: llc -march=x86-64 -promote-elements < %s
+
+define <1 x i8> @test_1xi8(<1 x i8> %x, <1 x i8>* %b) {
+  %bb = load <1 x i8>* %b
+  %tt = xor <1 x i8> %x, %bb
+  store <1 x i8> %tt, <1 x i8>* %b
+  br label %next
+
+next:
+  ret <1 x i8> %tt
+}
+
+
+define <1 x i16> @test_1xi16(<1 x i16> %x, <1 x i16>* %b) {
+  %bb = load <1 x i16>* %b
+  %tt = xor <1 x i16> %x, %bb
+  store <1 x i16> %tt, <1 x i16>* %b
+  br label %next
+
+next:
+  ret <1 x i16> %tt
+}
+
+
+define <1 x i32> @test_1xi32(<1 x i32> %x, <1 x i32>* %b) {
+  %bb = load <1 x i32>* %b
+  %tt = xor <1 x i32> %x, %bb
+  store <1 x i32> %tt, <1 x i32>* %b
+  br label %next
+
+next:
+  ret <1 x i32> %tt
+}
+
+
+define <1 x i64> @test_1xi64(<1 x i64> %x, <1 x i64>* %b) {
+  %bb = load <1 x i64>* %b
+  %tt = xor <1 x i64> %x, %bb
+  store <1 x i64> %tt, <1 x i64>* %b
+  br label %next
+
+next:
+  ret <1 x i64> %tt
+}
+
+
+define <1 x i128> @test_1xi128(<1 x i128> %x, <1 x i128>* %b) {
+  %bb = load <1 x i128>* %b
+  %tt = xor <1 x i128> %x, %bb
+  store <1 x i128> %tt, <1 x i128>* %b
+  br label %next
+
+next:
+  ret <1 x i128> %tt
+}
+
+
+define <1 x i256> @test_1xi256(<1 x i256> %x, <1 x i256>* %b) {
+  %bb = load <1 x i256>* %b
+  %tt = xor <1 x i256> %x, %bb
+  store <1 x i256> %tt, <1 x i256>* %b
+  br label %next
+
+next:
+  ret <1 x i256> %tt
+}
+
+
+define <1 x i512> @test_1xi512(<1 x i512> %x, <1 x i512>* %b) {
+  %bb = load <1 x i512>* %b
+  %tt = xor <1 x i512> %x, %bb
+  store <1 x i512> %tt, <1 x i512>* %b
+  br label %next
+
+next:
+  ret <1 x i512> %tt
+}
+
+
+define <2 x i8> @test_2xi8(<2 x i8> %x, <2 x i8>* %b) {
+  %bb = load <2 x i8>* %b
+  %tt = xor <2 x i8> %x, %bb
+  store <2 x i8> %tt, <2 x i8>* %b
+  br label %next
+
+next:
+  ret <2 x i8> %tt
+}
+
+
+define <2 x i16> @test_2xi16(<2 x i16> %x, <2 x i16>* %b) {
+  %bb = load <2 x i16>* %b
+  %tt = xor <2 x i16> %x, %bb
+  store <2 x i16> %tt, <2 x i16>* %b
+  br label %next
+
+next:
+  ret <2 x i16> %tt
+}
+
+
+define <2 x i32> @test_2xi32(<2 x i32> %x, <2 x i32>* %b) {
+  %bb = load <2 x i32>* %b
+  %tt = xor <2 x i32> %x, %bb
+  store <2 x i32> %tt, <2 x i32>* %b
+  br label %next
+
+next:
+  ret <2 x i32> %tt
+}
+
+
+define <2 x i64> @test_2xi64(<2 x i64> %x, <2 x i64>* %b) {
+  %bb = load <2 x i64>* %b
+  %tt = xor <2 x i64> %x, %bb
+  store <2 x i64> %tt, <2 x i64>* %b
+  br label %next
+
+next:
+  ret <2 x i64> %tt
+}
+
+
+define <2 x i128> @test_2xi128(<2 x i128> %x, <2 x i128>* %b) {
+  %bb = load <2 x i128>* %b
+  %tt = xor <2 x i128> %x, %bb
+  store <2 x i128> %tt, <2 x i128>* %b
+  br label %next
+
+next:
+  ret <2 x i128> %tt
+}
+
+
+define <2 x i256> @test_2xi256(<2 x i256> %x, <2 x i256>* %b) {
+  %bb = load <2 x i256>* %b
+  %tt = xor <2 x i256> %x, %bb
+  store <2 x i256> %tt, <2 x i256>* %b
+  br label %next
+
+next:
+  ret <2 x i256> %tt
+}
+
+
+define <2 x i512> @test_2xi512(<2 x i512> %x, <2 x i512>* %b) {
+  %bb = load <2 x i512>* %b
+  %tt = xor <2 x i512> %x, %bb
+  store <2 x i512> %tt, <2 x i512>* %b
+  br label %next
+
+next:
+  ret <2 x i512> %tt
+}
+
+
+define <3 x i8> @test_3xi8(<3 x i8> %x, <3 x i8>* %b) {
+  %bb = load <3 x i8>* %b
+  %tt = xor <3 x i8> %x, %bb
+  store <3 x i8> %tt, <3 x i8>* %b
+  br label %next
+
+next:
+  ret <3 x i8> %tt
+}
+
+
+define <3 x i16> @test_3xi16(<3 x i16> %x, <3 x i16>* %b) {
+  %bb = load <3 x i16>* %b
+  %tt = xor <3 x i16> %x, %bb
+  store <3 x i16> %tt, <3 x i16>* %b
+  br label %next
+
+next:
+  ret <3 x i16> %tt
+}
+
+
+define <3 x i32> @test_3xi32(<3 x i32> %x, <3 x i32>* %b) {
+  %bb = load <3 x i32>* %b
+  %tt = xor <3 x i32> %x, %bb
+  store <3 x i32> %tt, <3 x i32>* %b
+  br label %next
+
+next:
+  ret <3 x i32> %tt
+}
+
+
+define <3 x i64> @test_3xi64(<3 x i64> %x, <3 x i64>* %b) {
+  %bb = load <3 x i64>* %b
+  %tt = xor <3 x i64> %x, %bb
+  store <3 x i64> %tt, <3 x i64>* %b
+  br label %next
+
+next:
+  ret <3 x i64> %tt
+}
+
+
+define <3 x i128> @test_3xi128(<3 x i128> %x, <3 x i128>* %b) {
+  %bb = load <3 x i128>* %b
+  %tt = xor <3 x i128> %x, %bb
+  store <3 x i128> %tt, <3 x i128>* %b
+  br label %next
+
+next:
+  ret <3 x i128> %tt
+}
+
+
+define <3 x i256> @test_3xi256(<3 x i256> %x, <3 x i256>* %b) {
+  %bb = load <3 x i256>* %b
+  %tt = xor <3 x i256> %x, %bb
+  store <3 x i256> %tt, <3 x i256>* %b
+  br label %next
+
+next:
+  ret <3 x i256> %tt
+}
+
+
+define <3 x i512> @test_3xi512(<3 x i512> %x, <3 x i512>* %b) {
+  %bb = load <3 x i512>* %b
+  %tt = xor <3 x i512> %x, %bb
+  store <3 x i512> %tt, <3 x i512>* %b
+  br label %next
+
+next:
+  ret <3 x i512> %tt
+}
+
+
+define <4 x i8> @test_4xi8(<4 x i8> %x, <4 x i8>* %b) {
+  %bb = load <4 x i8>* %b
+  %tt = xor <4 x i8> %x, %bb
+  store <4 x i8> %tt, <4 x i8>* %b
+  br label %next
+
+next:
+  ret <4 x i8> %tt
+}
+
+
+define <4 x i16> @test_4xi16(<4 x i16> %x, <4 x i16>* %b) {
+  %bb = load <4 x i16>* %b
+  %tt = xor <4 x i16> %x, %bb
+  store <4 x i16> %tt, <4 x i16>* %b
+  br label %next
+
+next:
+  ret <4 x i16> %tt
+}
+
+
+define <4 x i32> @test_4xi32(<4 x i32> %x, <4 x i32>* %b) {
+  %bb = load <4 x i32>* %b
+  %tt = xor <4 x i32> %x, %bb
+  store <4 x i32> %tt, <4 x i32>* %b
+  br label %next
+
+next:
+  ret <4 x i32> %tt
+}
+
+
+define <4 x i64> @test_4xi64(<4 x i64> %x, <4 x i64>* %b) {
+  %bb = load <4 x i64>* %b
+  %tt = xor <4 x i64> %x, %bb
+  store <4 x i64> %tt, <4 x i64>* %b
+  br label %next
+
+next:
+  ret <4 x i64> %tt
+}
+
+
+define <4 x i128> @test_4xi128(<4 x i128> %x, <4 x i128>* %b) {
+  %bb = load <4 x i128>* %b
+  %tt = xor <4 x i128> %x, %bb
+  store <4 x i128> %tt, <4 x i128>* %b
+  br label %next
+
+next:
+  ret <4 x i128> %tt
+}
+
+
+define <4 x i256> @test_4xi256(<4 x i256> %x, <4 x i256>* %b) {
+  %bb = load <4 x i256>* %b
+  %tt = xor <4 x i256> %x, %bb
+  store <4 x i256> %tt, <4 x i256>* %b
+  br label %next
+
+next:
+  ret <4 x i256> %tt
+}
+
+
+define <4 x i512> @test_4xi512(<4 x i512> %x, <4 x i512>* %b) {
+  %bb = load <4 x i512>* %b
+  %tt = xor <4 x i512> %x, %bb
+  store <4 x i512> %tt, <4 x i512>* %b
+  br label %next
+
+next:
+  ret <4 x i512> %tt
+}
+
+
+define <5 x i8> @test_5xi8(<5 x i8> %x, <5 x i8>* %b) {
+  %bb = load <5 x i8>* %b
+  %tt = xor <5 x i8> %x, %bb
+  store <5 x i8> %tt, <5 x i8>* %b
+  br label %next
+
+next:
+  ret <5 x i8> %tt
+}
+
+
+define <5 x i16> @test_5xi16(<5 x i16> %x, <5 x i16>* %b) {
+  %bb = load <5 x i16>* %b
+  %tt = xor <5 x i16> %x, %bb
+  store <5 x i16> %tt, <5 x i16>* %b
+  br label %next
+
+next:
+  ret <5 x i16> %tt
+}
+
+
+define <5 x i32> @test_5xi32(<5 x i32> %x, <5 x i32>* %b) {
+  %bb = load <5 x i32>* %b
+  %tt = xor <5 x i32> %x, %bb
+  store <5 x i32> %tt, <5 x i32>* %b
+  br label %next
+
+next:
+  ret <5 x i32> %tt
+}
+
+
+define <5 x i64> @test_5xi64(<5 x i64> %x, <5 x i64>* %b) {
+  %bb = load <5 x i64>* %b
+  %tt = xor <5 x i64> %x, %bb
+  store <5 x i64> %tt, <5 x i64>* %b
+  br label %next
+
+next:
+  ret <5 x i64> %tt
+}
+
+
+define <5 x i128> @test_5xi128(<5 x i128> %x, <5 x i128>* %b) {
+  %bb = load <5 x i128>* %b
+  %tt = xor <5 x i128> %x, %bb
+  store <5 x i128> %tt, <5 x i128>* %b
+  br label %next
+
+next:
+  ret <5 x i128> %tt
+}
+
+
+define <5 x i256> @test_5xi256(<5 x i256> %x, <5 x i256>* %b) {
+  %bb = load <5 x i256>* %b
+  %tt = xor <5 x i256> %x, %bb
+  store <5 x i256> %tt, <5 x i256>* %b
+  br label %next
+
+next:
+  ret <5 x i256> %tt
+}
+
+
+define <5 x i512> @test_5xi512(<5 x i512> %x, <5 x i512>* %b) {
+  %bb = load <5 x i512>* %b
+  %tt = xor <5 x i512> %x, %bb
+  store <5 x i512> %tt, <5 x i512>* %b
+  br label %next
+
+next:
+  ret <5 x i512> %tt
+}
+
+
diff --git a/test/CodeGen/X86/memcpy-2.ll b/test/CodeGen/X86/memcpy-2.ll
index 17cd8e8..eae2e70 100644
--- a/test/CodeGen/X86/memcpy-2.ll
+++ b/test/CodeGen/X86/memcpy-2.ll
@@ -38,7 +38,7 @@ entry:
 ; X86-64: movq $0
   %tmp1 = alloca [25 x i8]
   %tmp2 = bitcast [25 x i8]* %tmp1 to i8*
-  call void @llvm.memcpy.i32( i8* %tmp2, i8* getelementptr ([25 x i8]* @.str, i32 0, i32 0), i32 25, i32 1 ) nounwind 
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* getelementptr inbounds ([25 x i8]* @.str, i32 0, i32 0), i32 25, i32 1, i1 false)
   unreachable
 }
 
@@ -72,7 +72,7 @@ entry:
 ; X86-64: movaps %xmm0, (%rdi)
   %tmp2 = bitcast %struct.s0* %a to i8*           ; <i8*> [#uses=1]
   %tmp3 = bitcast %struct.s0* %b to i8*           ; <i8*> [#uses=1]
-  tail call void @llvm.memcpy.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 16)
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 16, i1 false)
   ret void
 }
 
@@ -115,7 +115,7 @@ entry:
 ; X86-64: movq %rax, (%rdi)
   %tmp2 = bitcast %struct.s0* %a to i8*           ; <i8*> [#uses=1]
   %tmp3 = bitcast %struct.s0* %b to i8*           ; <i8*> [#uses=1]
-  tail call void @llvm.memcpy.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 8)
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 8, i1 false)
   ret void
 }
 
@@ -160,8 +160,8 @@ entry:
 ; X86-64: movl $2021161080
   %tmp1 = alloca [30 x i8]
   %tmp2 = bitcast [30 x i8]* %tmp1 to i8*
-  call void @llvm.memcpy.i32(i8* %tmp2, i8* getelementptr inbounds ([30 x i8]* @.str2, i32 0, i32 0), i32 30, i32 1)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* getelementptr inbounds ([30 x i8]* @.str2, i32 0, i32 0), i32 30, i32 1, i1 false)
   unreachable
 }
 
-declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/X86/memmove-4.ll b/test/CodeGen/X86/memmove-4.ll
deleted file mode 100644
index 027db1f..0000000
--- a/test/CodeGen/X86/memmove-4.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s | not grep call
-
-target triple = "i686-pc-linux-gnu"
-
-define void @a(i8* %a, i8* %b) nounwind {
-        %tmp2 = bitcast i8* %a to i8*
-        %tmp3 = bitcast i8* %b to i8*
-        tail call void @llvm.memmove.i32( i8* %tmp2, i8* %tmp3, i32 12, i32 4 )
-        ret void
-}
-
-declare void @llvm.memmove.i32(i8*, i8*, i32, i32)
diff --git a/test/CodeGen/X86/memset-2.ll b/test/CodeGen/X86/memset-2.ll
index 993583b..b2bd72b 100644
--- a/test/CodeGen/X86/memset-2.ll
+++ b/test/CodeGen/X86/memset-2.ll
@@ -6,7 +6,7 @@ define fastcc void @t1() nounwind {
 entry:
 ; CHECK: t1:
 ; CHECK: calll _memset
-  call void @llvm.memset.i32( i8* null, i8 0, i32 188, i32 1 ) nounwind
+  call void @llvm.memset.p0i8.i32(i8* null, i8 0, i32 188, i32 1, i1 false)
   unreachable
 }
 
@@ -14,7 +14,7 @@ define fastcc void @t2(i8 signext %c) nounwind {
 entry:
 ; CHECK: t2:
 ; CHECK: calll _memset
-  call void @llvm.memset.i32( i8* undef, i8 %c, i32 76, i32 1 ) nounwind
+  call void @llvm.memset.p0i8.i32(i8* undef, i8 %c, i32 76, i32 1, i1 false)
   unreachable
 }
 
diff --git a/test/CodeGen/X86/memset-3.ll b/test/CodeGen/X86/memset-3.ll
index 9b20ad5..29febfa 100644
--- a/test/CodeGen/X86/memset-3.ll
+++ b/test/CodeGen/X86/memset-3.ll
@@ -5,8 +5,8 @@ define void @t() nounwind ssp {
 entry:
   %buf = alloca [512 x i8], align 1
   %ptr = getelementptr inbounds [512 x i8]* %buf, i32 0, i32 0
-  call void @llvm.memset.i32(i8* %ptr, i8 undef, i32 512, i32 1)
+  call void @llvm.memset.p0i8.i32(i8* %ptr, i8 undef, i32 512, i32 1, i1 false)
   unreachable
 }
 
-declare void @llvm.memset.i32(i8* nocapture, i8, i32, i32) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/CodeGen/X86/memset.ll b/test/CodeGen/X86/memset.ll
index cf7464d..72b3e0f 100644
--- a/test/CodeGen/X86/memset.ll
+++ b/test/CodeGen/X86/memset.ll
@@ -8,11 +8,11 @@ entry:
 	%up_mvd = alloca [8 x %struct.x]		; <[8 x %struct.x]*> [#uses=2]
 	%up_mvd116 = getelementptr [8 x %struct.x]* %up_mvd, i32 0, i32 0		; <%struct.x*> [#uses=1]
 	%tmp110117 = bitcast [8 x %struct.x]* %up_mvd to i8*		; <i8*> [#uses=1]
-	call void @llvm.memset.i64( i8* %tmp110117, i8 0, i64 32, i32 8 )
+	call void @llvm.memset.p0i8.i64(i8* %tmp110117, i8 0, i64 32, i32 8, i1 false)
 	call void @foo( %struct.x* %up_mvd116 ) nounwind 
 	ret void
 }
 
 declare void @foo(%struct.x*)
 
-declare void @llvm.memset.i64(i8*, i8, i64, i32) nounwind 
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/X86/memset64-on-x86-32.ll b/test/CodeGen/X86/memset64-on-x86-32.ll
index 3f069b4..e20fce1 100644
--- a/test/CodeGen/X86/memset64-on-x86-32.ll
+++ b/test/CodeGen/X86/memset64-on-x86-32.ll
@@ -4,9 +4,8 @@
 
 define void @bork() nounwind {
 entry:
-        call void @llvm.memset.i64( i8* null, i8 0, i64 80, i32 4 )
-        ret void
+  call void @llvm.memset.p0i8.i64(i8* null, i8 0, i64 80, i32 4, i1 false)
+  ret void
 }
 
-declare void @llvm.memset.i64(i8*, i8, i64, i32) nounwind
-
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/X86/mmx-shuffle.ll b/test/CodeGen/X86/mmx-shuffle.ll
index 9f7501e..869f32b 100644
--- a/test/CodeGen/X86/mmx-shuffle.ll
+++ b/test/CodeGen/X86/mmx-shuffle.ll
@@ -5,12 +5,12 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 target triple = "i686-pc-linux-gnu"
 	%struct.DrawHelper = type { void (i32, %struct.QT_FT_Span*, i8*)*, void (i32, %struct.QT_FT_Span*, i8*)*, void (%struct.QRasterBuffer*, i32, i32, i32, i8*, i32, i32, i32)*, void (%struct.QRasterBuffer*, i32, i32, i32, i8*, i32, i32, i32)*, void (%struct.QRasterBuffer*, i32, i32, i32, i32, i32)* }
 	%struct.QBasicAtomic = type { i32 }
-	%struct.QClipData = type { i32, "struct.QClipData::ClipLine"*, i32, i32, %struct.QT_FT_Span*, i32, i32, i32, i32 }
-	"struct.QClipData::ClipLine" = type { i32, %struct.QT_FT_Span* }
+	%struct.QClipData = type { i32, %"struct.QClipData::ClipLine"*, i32, i32, %struct.QT_FT_Span*, i32, i32, i32, i32 }
+	%"struct.QClipData::ClipLine" = type { i32, %struct.QT_FT_Span* }
 	%struct.QRasterBuffer = type { %struct.QRect, %struct.QRegion, %struct.QClipData*, %struct.QClipData*, i8, i32, i32, %struct.DrawHelper*, i32, i32, i32, i8* }
 	%struct.QRect = type { i32, i32, i32, i32 }
-	%struct.QRegion = type { "struct.QRegion::QRegionData"* }
-	"struct.QRegion::QRegionData" = type { %struct.QBasicAtomic, %struct._XRegion*, i8*, %struct.QRegionPrivate* }
+	%struct.QRegion = type { %"struct.QRegion::QRegionData"* }
+	%"struct.QRegion::QRegionData" = type { %struct.QBasicAtomic, %struct._XRegion*, i8*, %struct.QRegionPrivate* }
 	%struct.QRegionPrivate = type opaque
 	%struct.QT_FT_Span = type { i16, i16, i16, i8 }
 	%struct._XRegion = type opaque
diff --git a/test/CodeGen/X86/muloti.ll b/test/CodeGen/X86/muloti.ll
new file mode 100644
index 0000000..2f0986e
--- /dev/null
+++ b/test/CodeGen/X86/muloti.ll
@@ -0,0 +1,81 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+%0 = type { i64, i64 }
+%1 = type { i128, i1 }
+
+define %0 @x(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) nounwind uwtable ssp {
+; CHECK: x
+entry:
+  %tmp16 = zext i64 %a.coerce0 to i128
+  %tmp11 = zext i64 %a.coerce1 to i128
+  %tmp12 = shl nuw i128 %tmp11, 64
+  %ins14 = or i128 %tmp12, %tmp16
+  %tmp6 = zext i64 %b.coerce0 to i128
+  %tmp3 = zext i64 %b.coerce1 to i128
+  %tmp4 = shl nuw i128 %tmp3, 64
+  %ins = or i128 %tmp4, %tmp6
+  %0 = tail call %1 @llvm.smul.with.overflow.i128(i128 %ins14, i128 %ins)
+; CHECK: callq   ___muloti4
+  %1 = extractvalue %1 %0, 0
+  %2 = extractvalue %1 %0, 1
+  br i1 %2, label %overflow, label %nooverflow
+
+overflow:                                         ; preds = %entry
+  tail call void @llvm.trap()
+  unreachable
+
+nooverflow:                                       ; preds = %entry
+  %tmp20 = trunc i128 %1 to i64
+  %tmp21 = insertvalue %0 undef, i64 %tmp20, 0
+  %tmp22 = lshr i128 %1, 64
+  %tmp23 = trunc i128 %tmp22 to i64
+  %tmp24 = insertvalue %0 %tmp21, i64 %tmp23, 1
+  ret %0 %tmp24
+}
+
+define %0 @foo(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) nounwind uwtable ssp {
+entry:
+; CHECK: foo
+  %retval = alloca i128, align 16
+  %coerce = alloca i128, align 16
+  %a.addr = alloca i128, align 16
+  %coerce1 = alloca i128, align 16
+  %b.addr = alloca i128, align 16
+  %0 = bitcast i128* %coerce to %0*
+  %1 = getelementptr %0* %0, i32 0, i32 0
+  store i64 %a.coerce0, i64* %1
+  %2 = getelementptr %0* %0, i32 0, i32 1
+  store i64 %a.coerce1, i64* %2
+  %a = load i128* %coerce, align 16
+  store i128 %a, i128* %a.addr, align 16
+  %3 = bitcast i128* %coerce1 to %0*
+  %4 = getelementptr %0* %3, i32 0, i32 0
+  store i64 %b.coerce0, i64* %4
+  %5 = getelementptr %0* %3, i32 0, i32 1
+  store i64 %b.coerce1, i64* %5
+  %b = load i128* %coerce1, align 16
+  store i128 %b, i128* %b.addr, align 16
+  %tmp = load i128* %a.addr, align 16
+  %tmp2 = load i128* %b.addr, align 16
+  %6 = call %1 @llvm.umul.with.overflow.i128(i128 %tmp, i128 %tmp2)
+; CHECK: cmov
+; CHECK: divti3
+  %7 = extractvalue %1 %6, 0
+  %8 = extractvalue %1 %6, 1
+  br i1 %8, label %overflow, label %nooverflow
+
+overflow:                                         ; preds = %entry
+  call void @llvm.trap()
+  unreachable
+
+nooverflow:                                       ; preds = %entry
+  store i128 %7, i128* %retval
+  %9 = bitcast i128* %retval to %0*
+  %10 = load %0* %9, align 1
+  ret %0 %10
+}
+
+declare %1 @llvm.umul.with.overflow.i128(i128, i128) nounwind readnone
+
+declare %1 @llvm.smul.with.overflow.i128(i128, i128) nounwind readnone
+
+declare void @llvm.trap() nounwind
diff --git a/test/CodeGen/X86/multiple-return-values-cross-block.ll b/test/CodeGen/X86/multiple-return-values-cross-block.ll
index e9837d0..b0cb061 100644
--- a/test/CodeGen/X86/multiple-return-values-cross-block.ll
+++ b/test/CodeGen/X86/multiple-return-values-cross-block.ll
@@ -4,12 +4,12 @@ declare {x86_fp80, x86_fp80} @test()
 
 define void @call2(x86_fp80 *%P1, x86_fp80 *%P2) {
   %a = call {x86_fp80,x86_fp80} @test()
-  %b = getresult {x86_fp80,x86_fp80} %a, 1
+  %b = extractvalue {x86_fp80,x86_fp80} %a, 1
   store x86_fp80 %b, x86_fp80* %P1
 br label %L
 
 L:
-  %c = getresult {x86_fp80,x86_fp80} %a, 0
+  %c = extractvalue {x86_fp80,x86_fp80} %a, 0
   store x86_fp80 %c, x86_fp80* %P2
   ret void
 }
diff --git a/test/CodeGen/X86/multiple-return-values.ll b/test/CodeGen/X86/multiple-return-values.ll
deleted file mode 100644
index 018d997..0000000
--- a/test/CodeGen/X86/multiple-return-values.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=x86
-
-define {i64, float} @bar(i64 %a, float %b) {
-        %y = add i64 %a, 7
-        %z = fadd float %b, 7.0
-	ret i64 %y, float %z
-}
-
-define i64 @foo() {
-	%M = call {i64, float} @bar(i64 21, float 21.0)
-        %N = getresult {i64, float} %M, 0
-        %O = getresult {i64, float} %M, 1
-        %P = fptosi float %O to i64
-        %Q = add i64 %P, %N
-	ret i64 %Q
-}
diff --git a/test/CodeGen/X86/optimize-max-0.ll b/test/CodeGen/X86/optimize-max-0.ll
index 162c7a5..981a16a 100644
--- a/test/CodeGen/X86/optimize-max-0.ll
+++ b/test/CodeGen/X86/optimize-max-0.ll
@@ -8,454 +8,454 @@ target triple = "i386-apple-darwin9"
 
 define void @foo(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
 entry:
-	%0 = mul i32 %x, %w		; <i32> [#uses=2]
-	%1 = mul i32 %x, %w		; <i32> [#uses=1]
-	%2 = sdiv i32 %1, 4		; <i32> [#uses=1]
-	%.sum2 = add i32 %2, %0		; <i32> [#uses=2]
-	%cond = icmp eq i32 %d, 1		; <i1> [#uses=1]
-	br i1 %cond, label %bb29, label %bb10.preheader
-
-bb10.preheader:		; preds = %entry
-	%3 = icmp sgt i32 %x, 0		; <i1> [#uses=1]
-	br i1 %3, label %bb.nph9, label %bb18.loopexit
-
-bb.nph7:		; preds = %bb7.preheader
-	%4 = mul i32 %y.08, %w		; <i32> [#uses=1]
-	%5 = mul i32 %y.08, %s		; <i32> [#uses=1]
-	%6 = add i32 %5, 1		; <i32> [#uses=1]
-	%tmp8 = icmp sgt i32 1, %w		; <i1> [#uses=1]
-	%smax9 = select i1 %tmp8, i32 1, i32 %w		; <i32> [#uses=1]
-	br label %bb6
-
-bb6:		; preds = %bb7, %bb.nph7
-	%x.06 = phi i32 [ 0, %bb.nph7 ], [ %indvar.next7, %bb7 ]		; <i32> [#uses=3]
-	%7 = add i32 %x.06, %4		; <i32> [#uses=1]
-	%8 = shl i32 %x.06, 1		; <i32> [#uses=1]
-	%9 = add i32 %6, %8		; <i32> [#uses=1]
-	%10 = getelementptr i8* %r, i32 %9		; <i8*> [#uses=1]
-	%11 = load i8* %10, align 1		; <i8> [#uses=1]
-	%12 = getelementptr i8* %j, i32 %7		; <i8*> [#uses=1]
-	store i8 %11, i8* %12, align 1
-	br label %bb7
-
-bb7:		; preds = %bb6
-	%indvar.next7 = add i32 %x.06, 1		; <i32> [#uses=2]
-	%exitcond10 = icmp ne i32 %indvar.next7, %smax9		; <i1> [#uses=1]
-	br i1 %exitcond10, label %bb6, label %bb7.bb9_crit_edge
-
-bb7.bb9_crit_edge:		; preds = %bb7
-	br label %bb9
-
-bb9:		; preds = %bb7.preheader, %bb7.bb9_crit_edge
-	br label %bb10
-
-bb10:		; preds = %bb9
-	%indvar.next11 = add i32 %y.08, 1		; <i32> [#uses=2]
-	%exitcond12 = icmp ne i32 %indvar.next11, %x		; <i1> [#uses=1]
-	br i1 %exitcond12, label %bb7.preheader, label %bb10.bb18.loopexit_crit_edge
-
-bb10.bb18.loopexit_crit_edge:		; preds = %bb10
-	br label %bb10.bb18.loopexit_crit_edge.split
-
-bb10.bb18.loopexit_crit_edge.split:		; preds = %bb.nph9, %bb10.bb18.loopexit_crit_edge
-	br label %bb18.loopexit
-
-bb.nph9:		; preds = %bb10.preheader
-	%13 = icmp sgt i32 %w, 0		; <i1> [#uses=1]
-	br i1 %13, label %bb.nph9.split, label %bb10.bb18.loopexit_crit_edge.split
-
-bb.nph9.split:		; preds = %bb.nph9
-	br label %bb7.preheader
-
-bb7.preheader:		; preds = %bb.nph9.split, %bb10
-	%y.08 = phi i32 [ 0, %bb.nph9.split ], [ %indvar.next11, %bb10 ]		; <i32> [#uses=3]
-	br i1 true, label %bb.nph7, label %bb9
-
-bb.nph5:		; preds = %bb18.loopexit
-	%14 = sdiv i32 %w, 2		; <i32> [#uses=1]
-	%15 = icmp slt i32 %w, 2		; <i1> [#uses=1]
-	%16 = sdiv i32 %x, 2		; <i32> [#uses=2]
-	br i1 %15, label %bb18.bb20_crit_edge.split, label %bb.nph5.split
-
-bb.nph5.split:		; preds = %bb.nph5
-	%tmp2 = icmp sgt i32 1, %16		; <i1> [#uses=1]
-	%smax3 = select i1 %tmp2, i32 1, i32 %16		; <i32> [#uses=1]
-	br label %bb13
-
-bb13:		; preds = %bb18, %bb.nph5.split
-	%y.14 = phi i32 [ 0, %bb.nph5.split ], [ %indvar.next1, %bb18 ]		; <i32> [#uses=4]
-	%17 = mul i32 %14, %y.14		; <i32> [#uses=2]
-	%18 = shl i32 %y.14, 1		; <i32> [#uses=1]
-	%19 = srem i32 %y.14, 2		; <i32> [#uses=1]
-	%20 = add i32 %19, %18		; <i32> [#uses=1]
-	%21 = mul i32 %20, %s		; <i32> [#uses=2]
-	br i1 true, label %bb.nph3, label %bb17
-
-bb.nph3:		; preds = %bb13
-	%22 = add i32 %17, %0		; <i32> [#uses=1]
-	%23 = add i32 %17, %.sum2		; <i32> [#uses=1]
-	%24 = sdiv i32 %w, 2		; <i32> [#uses=2]
-	%tmp = icmp sgt i32 1, %24		; <i1> [#uses=1]
-	%smax = select i1 %tmp, i32 1, i32 %24		; <i32> [#uses=1]
-	br label %bb14
-
-bb14:		; preds = %bb15, %bb.nph3
-	%x.12 = phi i32 [ 0, %bb.nph3 ], [ %indvar.next, %bb15 ]		; <i32> [#uses=5]
-	%25 = shl i32 %x.12, 2		; <i32> [#uses=1]
-	%26 = add i32 %25, %21		; <i32> [#uses=1]
-	%27 = getelementptr i8* %r, i32 %26		; <i8*> [#uses=1]
-	%28 = load i8* %27, align 1		; <i8> [#uses=1]
-	%.sum = add i32 %22, %x.12		; <i32> [#uses=1]
-	%29 = getelementptr i8* %j, i32 %.sum		; <i8*> [#uses=1]
-	store i8 %28, i8* %29, align 1
-	%30 = shl i32 %x.12, 2		; <i32> [#uses=1]
-	%31 = or i32 %30, 2		; <i32> [#uses=1]
-	%32 = add i32 %31, %21		; <i32> [#uses=1]
-	%33 = getelementptr i8* %r, i32 %32		; <i8*> [#uses=1]
-	%34 = load i8* %33, align 1		; <i8> [#uses=1]
-	%.sum6 = add i32 %23, %x.12		; <i32> [#uses=1]
-	%35 = getelementptr i8* %j, i32 %.sum6		; <i8*> [#uses=1]
-	store i8 %34, i8* %35, align 1
-	br label %bb15
-
-bb15:		; preds = %bb14
-	%indvar.next = add i32 %x.12, 1		; <i32> [#uses=2]
-	%exitcond = icmp ne i32 %indvar.next, %smax		; <i1> [#uses=1]
-	br i1 %exitcond, label %bb14, label %bb15.bb17_crit_edge
-
-bb15.bb17_crit_edge:		; preds = %bb15
-	br label %bb17
-
-bb17:		; preds = %bb15.bb17_crit_edge, %bb13
-	br label %bb18
-
-bb18.loopexit:		; preds = %bb10.bb18.loopexit_crit_edge.split, %bb10.preheader
-	%36 = icmp slt i32 %x, 2		; <i1> [#uses=1]
-	br i1 %36, label %bb20, label %bb.nph5
-
-bb18:		; preds = %bb17
-	%indvar.next1 = add i32 %y.14, 1		; <i32> [#uses=2]
-	%exitcond4 = icmp ne i32 %indvar.next1, %smax3		; <i1> [#uses=1]
-	br i1 %exitcond4, label %bb13, label %bb18.bb20_crit_edge
-
-bb18.bb20_crit_edge:		; preds = %bb18
-	br label %bb18.bb20_crit_edge.split
-
-bb18.bb20_crit_edge.split:		; preds = %bb18.bb20_crit_edge, %bb.nph5
-	br label %bb20
-
-bb20:		; preds = %bb18.bb20_crit_edge.split, %bb18.loopexit
-	switch i32 %d, label %return [
-		i32 3, label %bb22
-		i32 1, label %bb29
-	]
-
-bb22:		; preds = %bb20
-	%37 = mul i32 %x, %w		; <i32> [#uses=1]
-	%38 = sdiv i32 %37, 4		; <i32> [#uses=1]
-	%.sum3 = add i32 %38, %.sum2		; <i32> [#uses=2]
-	%39 = add i32 %x, 15		; <i32> [#uses=1]
-	%40 = and i32 %39, -16		; <i32> [#uses=1]
-	%41 = add i32 %w, 15		; <i32> [#uses=1]
-	%42 = and i32 %41, -16		; <i32> [#uses=1]
-	%43 = mul i32 %40, %s		; <i32> [#uses=1]
-	%44 = icmp sgt i32 %x, 0		; <i1> [#uses=1]
-	br i1 %44, label %bb.nph, label %bb26
-
-bb.nph:		; preds = %bb22
-	br label %bb23
-
-bb23:		; preds = %bb24, %bb.nph
-	%y.21 = phi i32 [ 0, %bb.nph ], [ %indvar.next5, %bb24 ]		; <i32> [#uses=3]
-	%45 = mul i32 %y.21, %42		; <i32> [#uses=1]
-	%.sum1 = add i32 %45, %43		; <i32> [#uses=1]
-	%46 = getelementptr i8* %r, i32 %.sum1		; <i8*> [#uses=1]
-	%47 = mul i32 %y.21, %w		; <i32> [#uses=1]
-	%.sum5 = add i32 %47, %.sum3		; <i32> [#uses=1]
-	%48 = getelementptr i8* %j, i32 %.sum5		; <i8*> [#uses=1]
-	tail call void @llvm.memcpy.i32(i8* %48, i8* %46, i32 %w, i32 1)
-	br label %bb24
-
-bb24:		; preds = %bb23
-	%indvar.next5 = add i32 %y.21, 1		; <i32> [#uses=2]
-	%exitcond6 = icmp ne i32 %indvar.next5, %x		; <i1> [#uses=1]
-	br i1 %exitcond6, label %bb23, label %bb24.bb26_crit_edge
-
-bb24.bb26_crit_edge:		; preds = %bb24
-	br label %bb26
-
-bb26:		; preds = %bb24.bb26_crit_edge, %bb22
-	%49 = mul i32 %x, %w		; <i32> [#uses=1]
-	%.sum4 = add i32 %.sum3, %49		; <i32> [#uses=1]
-	%50 = getelementptr i8* %j, i32 %.sum4		; <i8*> [#uses=1]
-	%51 = mul i32 %x, %w		; <i32> [#uses=1]
-	%52 = sdiv i32 %51, 2		; <i32> [#uses=1]
-	tail call void @llvm.memset.i32(i8* %50, i8 -128, i32 %52, i32 1)
-	ret void
-
-bb29:		; preds = %bb20, %entry
-	%53 = add i32 %w, 15		; <i32> [#uses=1]
-	%54 = and i32 %53, -16		; <i32> [#uses=1]
-	%55 = icmp sgt i32 %x, 0		; <i1> [#uses=1]
-	br i1 %55, label %bb.nph11, label %bb33
-
-bb.nph11:		; preds = %bb29
-	br label %bb30
-
-bb30:		; preds = %bb31, %bb.nph11
-	%y.310 = phi i32 [ 0, %bb.nph11 ], [ %indvar.next13, %bb31 ]		; <i32> [#uses=3]
-	%56 = mul i32 %y.310, %54		; <i32> [#uses=1]
-	%57 = getelementptr i8* %r, i32 %56		; <i8*> [#uses=1]
-	%58 = mul i32 %y.310, %w		; <i32> [#uses=1]
-	%59 = getelementptr i8* %j, i32 %58		; <i8*> [#uses=1]
-	tail call void @llvm.memcpy.i32(i8* %59, i8* %57, i32 %w, i32 1)
-	br label %bb31
-
-bb31:		; preds = %bb30
-	%indvar.next13 = add i32 %y.310, 1		; <i32> [#uses=2]
-	%exitcond14 = icmp ne i32 %indvar.next13, %x		; <i1> [#uses=1]
-	br i1 %exitcond14, label %bb30, label %bb31.bb33_crit_edge
-
-bb31.bb33_crit_edge:		; preds = %bb31
-	br label %bb33
-
-bb33:		; preds = %bb31.bb33_crit_edge, %bb29
-	%60 = mul i32 %x, %w		; <i32> [#uses=1]
-	%61 = getelementptr i8* %j, i32 %60		; <i8*> [#uses=1]
-	%62 = mul i32 %x, %w		; <i32> [#uses=1]
-	%63 = sdiv i32 %62, 2		; <i32> [#uses=1]
-	tail call void @llvm.memset.i32(i8* %61, i8 -128, i32 %63, i32 1)
-	ret void
-
-return:		; preds = %bb20
-	ret void
+  %0 = mul i32 %x, %w
+  %1 = mul i32 %x, %w
+  %2 = sdiv i32 %1, 4
+  %.sum2 = add i32 %2, %0
+  %cond = icmp eq i32 %d, 1
+  br i1 %cond, label %bb29, label %bb10.preheader
+
+bb10.preheader:                                   ; preds = %entry
+  %3 = icmp sgt i32 %x, 0
+  br i1 %3, label %bb.nph9, label %bb18.loopexit
+
+bb.nph7:                                          ; preds = %bb7.preheader
+  %4 = mul i32 %y.08, %w
+  %5 = mul i32 %y.08, %s
+  %6 = add i32 %5, 1
+  %tmp8 = icmp sgt i32 1, %w
+  %smax9 = select i1 %tmp8, i32 1, i32 %w
+  br label %bb6
+
+bb6:                                              ; preds = %bb7, %bb.nph7
+  %x.06 = phi i32 [ 0, %bb.nph7 ], [ %indvar.next7, %bb7 ]
+  %7 = add i32 %x.06, %4
+  %8 = shl i32 %x.06, 1
+  %9 = add i32 %6, %8
+  %10 = getelementptr i8* %r, i32 %9
+  %11 = load i8* %10, align 1
+  %12 = getelementptr i8* %j, i32 %7
+  store i8 %11, i8* %12, align 1
+  br label %bb7
+
+bb7:                                              ; preds = %bb6
+  %indvar.next7 = add i32 %x.06, 1
+  %exitcond10 = icmp ne i32 %indvar.next7, %smax9
+  br i1 %exitcond10, label %bb6, label %bb7.bb9_crit_edge
+
+bb7.bb9_crit_edge:                                ; preds = %bb7
+  br label %bb9
+
+bb9:                                              ; preds = %bb7.preheader, %bb7.bb9_crit_edge
+  br label %bb10
+
+bb10:                                             ; preds = %bb9
+  %indvar.next11 = add i32 %y.08, 1
+  %exitcond12 = icmp ne i32 %indvar.next11, %x
+  br i1 %exitcond12, label %bb7.preheader, label %bb10.bb18.loopexit_crit_edge
+
+bb10.bb18.loopexit_crit_edge:                     ; preds = %bb10
+  br label %bb10.bb18.loopexit_crit_edge.split
+
+bb10.bb18.loopexit_crit_edge.split:               ; preds = %bb.nph9, %bb10.bb18.loopexit_crit_edge
+  br label %bb18.loopexit
+
+bb.nph9:                                          ; preds = %bb10.preheader
+  %13 = icmp sgt i32 %w, 0
+  br i1 %13, label %bb.nph9.split, label %bb10.bb18.loopexit_crit_edge.split
+
+bb.nph9.split:                                    ; preds = %bb.nph9
+  br label %bb7.preheader
+
+bb7.preheader:                                    ; preds = %bb.nph9.split, %bb10
+  %y.08 = phi i32 [ 0, %bb.nph9.split ], [ %indvar.next11, %bb10 ]
+  br i1 true, label %bb.nph7, label %bb9
+
+bb.nph5:                                          ; preds = %bb18.loopexit
+  %14 = sdiv i32 %w, 2
+  %15 = icmp slt i32 %w, 2
+  %16 = sdiv i32 %x, 2
+  br i1 %15, label %bb18.bb20_crit_edge.split, label %bb.nph5.split
+
+bb.nph5.split:                                    ; preds = %bb.nph5
+  %tmp2 = icmp sgt i32 1, %16
+  %smax3 = select i1 %tmp2, i32 1, i32 %16
+  br label %bb13
+
+bb13:                                             ; preds = %bb18, %bb.nph5.split
+  %y.14 = phi i32 [ 0, %bb.nph5.split ], [ %indvar.next1, %bb18 ]
+  %17 = mul i32 %14, %y.14
+  %18 = shl i32 %y.14, 1
+  %19 = srem i32 %y.14, 2
+  %20 = add i32 %19, %18
+  %21 = mul i32 %20, %s
+  br i1 true, label %bb.nph3, label %bb17
+
+bb.nph3:                                          ; preds = %bb13
+  %22 = add i32 %17, %0
+  %23 = add i32 %17, %.sum2
+  %24 = sdiv i32 %w, 2
+  %tmp = icmp sgt i32 1, %24
+  %smax = select i1 %tmp, i32 1, i32 %24
+  br label %bb14
+
+bb14:                                             ; preds = %bb15, %bb.nph3
+  %x.12 = phi i32 [ 0, %bb.nph3 ], [ %indvar.next, %bb15 ]
+  %25 = shl i32 %x.12, 2
+  %26 = add i32 %25, %21
+  %27 = getelementptr i8* %r, i32 %26
+  %28 = load i8* %27, align 1
+  %.sum = add i32 %22, %x.12
+  %29 = getelementptr i8* %j, i32 %.sum
+  store i8 %28, i8* %29, align 1
+  %30 = shl i32 %x.12, 2
+  %31 = or i32 %30, 2
+  %32 = add i32 %31, %21
+  %33 = getelementptr i8* %r, i32 %32
+  %34 = load i8* %33, align 1
+  %.sum6 = add i32 %23, %x.12
+  %35 = getelementptr i8* %j, i32 %.sum6
+  store i8 %34, i8* %35, align 1
+  br label %bb15
+
+bb15:                                             ; preds = %bb14
+  %indvar.next = add i32 %x.12, 1
+  %exitcond = icmp ne i32 %indvar.next, %smax
+  br i1 %exitcond, label %bb14, label %bb15.bb17_crit_edge
+
+bb15.bb17_crit_edge:                              ; preds = %bb15
+  br label %bb17
+
+bb17:                                             ; preds = %bb15.bb17_crit_edge, %bb13
+  br label %bb18
+
+bb18.loopexit:                                    ; preds = %bb10.bb18.loopexit_crit_edge.split, %bb10.preheader
+  %36 = icmp slt i32 %x, 2
+  br i1 %36, label %bb20, label %bb.nph5
+
+bb18:                                             ; preds = %bb17
+  %indvar.next1 = add i32 %y.14, 1
+  %exitcond4 = icmp ne i32 %indvar.next1, %smax3
+  br i1 %exitcond4, label %bb13, label %bb18.bb20_crit_edge
+
+bb18.bb20_crit_edge:                              ; preds = %bb18
+  br label %bb18.bb20_crit_edge.split
+
+bb18.bb20_crit_edge.split:                        ; preds = %bb18.bb20_crit_edge, %bb.nph5
+  br label %bb20
+
+bb20:                                             ; preds = %bb18.bb20_crit_edge.split, %bb18.loopexit
+  switch i32 %d, label %return [
+    i32 3, label %bb22
+    i32 1, label %bb29
+  ]
+
+bb22:                                             ; preds = %bb20
+  %37 = mul i32 %x, %w
+  %38 = sdiv i32 %37, 4
+  %.sum3 = add i32 %38, %.sum2
+  %39 = add i32 %x, 15
+  %40 = and i32 %39, -16
+  %41 = add i32 %w, 15
+  %42 = and i32 %41, -16
+  %43 = mul i32 %40, %s
+  %44 = icmp sgt i32 %x, 0
+  br i1 %44, label %bb.nph, label %bb26
+
+bb.nph:                                           ; preds = %bb22
+  br label %bb23
+
+bb23:                                             ; preds = %bb24, %bb.nph
+  %y.21 = phi i32 [ 0, %bb.nph ], [ %indvar.next5, %bb24 ]
+  %45 = mul i32 %y.21, %42
+  %.sum1 = add i32 %45, %43
+  %46 = getelementptr i8* %r, i32 %.sum1
+  %47 = mul i32 %y.21, %w
+  %.sum5 = add i32 %47, %.sum3
+  %48 = getelementptr i8* %j, i32 %.sum5
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %48, i8* %46, i32 %w, i32 1, i1 false)
+  br label %bb24
+
+bb24:                                             ; preds = %bb23
+  %indvar.next5 = add i32 %y.21, 1
+  %exitcond6 = icmp ne i32 %indvar.next5, %x
+  br i1 %exitcond6, label %bb23, label %bb24.bb26_crit_edge
+
+bb24.bb26_crit_edge:                              ; preds = %bb24
+  br label %bb26
+
+bb26:                                             ; preds = %bb24.bb26_crit_edge, %bb22
+  %49 = mul i32 %x, %w
+  %.sum4 = add i32 %.sum3, %49
+  %50 = getelementptr i8* %j, i32 %.sum4
+  %51 = mul i32 %x, %w
+  %52 = sdiv i32 %51, 2
+  tail call void @llvm.memset.p0i8.i32(i8* %50, i8 -128, i32 %52, i32 1, i1 false)
+  ret void
+
+bb29:                                             ; preds = %bb20, %entry
+  %53 = add i32 %w, 15
+  %54 = and i32 %53, -16
+  %55 = icmp sgt i32 %x, 0
+  br i1 %55, label %bb.nph11, label %bb33
+
+bb.nph11:                                         ; preds = %bb29
+  br label %bb30
+
+bb30:                                             ; preds = %bb31, %bb.nph11
+  %y.310 = phi i32 [ 0, %bb.nph11 ], [ %indvar.next13, %bb31 ]
+  %56 = mul i32 %y.310, %54
+  %57 = getelementptr i8* %r, i32 %56
+  %58 = mul i32 %y.310, %w
+  %59 = getelementptr i8* %j, i32 %58
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %59, i8* %57, i32 %w, i32 1, i1 false)
+  br label %bb31
+
+bb31:                                             ; preds = %bb30
+  %indvar.next13 = add i32 %y.310, 1
+  %exitcond14 = icmp ne i32 %indvar.next13, %x
+  br i1 %exitcond14, label %bb30, label %bb31.bb33_crit_edge
+
+bb31.bb33_crit_edge:                              ; preds = %bb31
+  br label %bb33
+
+bb33:                                             ; preds = %bb31.bb33_crit_edge, %bb29
+  %60 = mul i32 %x, %w
+  %61 = getelementptr i8* %j, i32 %60
+  %62 = mul i32 %x, %w
+  %63 = sdiv i32 %62, 2
+  tail call void @llvm.memset.p0i8.i32(i8* %61, i8 -128, i32 %63, i32 1, i1 false)
+  ret void
+
+return:                                           ; preds = %bb20
+  ret void
 }
 
 define void @bar(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
 entry:
-	%0 = mul i32 %x, %w		; <i32> [#uses=2]
-	%1 = mul i32 %x, %w		; <i32> [#uses=1]
-	%2 = udiv i32 %1, 4		; <i32> [#uses=1]
-	%.sum2 = add i32 %2, %0		; <i32> [#uses=2]
-	%cond = icmp eq i32 %d, 1		; <i1> [#uses=1]
-	br i1 %cond, label %bb29, label %bb10.preheader
-
-bb10.preheader:		; preds = %entry
-	%3 = icmp ne i32 %x, 0		; <i1> [#uses=1]
-	br i1 %3, label %bb.nph9, label %bb18.loopexit
-
-bb.nph7:		; preds = %bb7.preheader
-	%4 = mul i32 %y.08, %w		; <i32> [#uses=1]
-	%5 = mul i32 %y.08, %s		; <i32> [#uses=1]
-	%6 = add i32 %5, 1		; <i32> [#uses=1]
-	%tmp8 = icmp ugt i32 1, %w		; <i1> [#uses=1]
-	%smax9 = select i1 %tmp8, i32 1, i32 %w		; <i32> [#uses=1]
-	br label %bb6
-
-bb6:		; preds = %bb7, %bb.nph7
-	%x.06 = phi i32 [ 0, %bb.nph7 ], [ %indvar.next7, %bb7 ]		; <i32> [#uses=3]
-	%7 = add i32 %x.06, %4		; <i32> [#uses=1]
-	%8 = shl i32 %x.06, 1		; <i32> [#uses=1]
-	%9 = add i32 %6, %8		; <i32> [#uses=1]
-	%10 = getelementptr i8* %r, i32 %9		; <i8*> [#uses=1]
-	%11 = load i8* %10, align 1		; <i8> [#uses=1]
-	%12 = getelementptr i8* %j, i32 %7		; <i8*> [#uses=1]
-	store i8 %11, i8* %12, align 1
-	br label %bb7
-
-bb7:		; preds = %bb6
-	%indvar.next7 = add i32 %x.06, 1		; <i32> [#uses=2]
-	%exitcond10 = icmp ne i32 %indvar.next7, %smax9		; <i1> [#uses=1]
-	br i1 %exitcond10, label %bb6, label %bb7.bb9_crit_edge
-
-bb7.bb9_crit_edge:		; preds = %bb7
-	br label %bb9
-
-bb9:		; preds = %bb7.preheader, %bb7.bb9_crit_edge
-	br label %bb10
-
-bb10:		; preds = %bb9
-	%indvar.next11 = add i32 %y.08, 1		; <i32> [#uses=2]
-	%exitcond12 = icmp ne i32 %indvar.next11, %x		; <i1> [#uses=1]
-	br i1 %exitcond12, label %bb7.preheader, label %bb10.bb18.loopexit_crit_edge
-
-bb10.bb18.loopexit_crit_edge:		; preds = %bb10
-	br label %bb10.bb18.loopexit_crit_edge.split
-
-bb10.bb18.loopexit_crit_edge.split:		; preds = %bb.nph9, %bb10.bb18.loopexit_crit_edge
-	br label %bb18.loopexit
-
-bb.nph9:		; preds = %bb10.preheader
-	%13 = icmp ugt i32 %w, 0		; <i1> [#uses=1]
-	br i1 %13, label %bb.nph9.split, label %bb10.bb18.loopexit_crit_edge.split
-
-bb.nph9.split:		; preds = %bb.nph9
-	br label %bb7.preheader
-
-bb7.preheader:		; preds = %bb.nph9.split, %bb10
-	%y.08 = phi i32 [ 0, %bb.nph9.split ], [ %indvar.next11, %bb10 ]		; <i32> [#uses=3]
-	br i1 true, label %bb.nph7, label %bb9
-
-bb.nph5:		; preds = %bb18.loopexit
-	%14 = udiv i32 %w, 2		; <i32> [#uses=1]
-	%15 = icmp ult i32 %w, 2		; <i1> [#uses=1]
-	%16 = udiv i32 %x, 2		; <i32> [#uses=2]
-	br i1 %15, label %bb18.bb20_crit_edge.split, label %bb.nph5.split
-
-bb.nph5.split:		; preds = %bb.nph5
-	%tmp2 = icmp ugt i32 1, %16		; <i1> [#uses=1]
-	%smax3 = select i1 %tmp2, i32 1, i32 %16		; <i32> [#uses=1]
-	br label %bb13
-
-bb13:		; preds = %bb18, %bb.nph5.split
-	%y.14 = phi i32 [ 0, %bb.nph5.split ], [ %indvar.next1, %bb18 ]		; <i32> [#uses=4]
-	%17 = mul i32 %14, %y.14		; <i32> [#uses=2]
-	%18 = shl i32 %y.14, 1		; <i32> [#uses=1]
-	%19 = urem i32 %y.14, 2		; <i32> [#uses=1]
-	%20 = add i32 %19, %18		; <i32> [#uses=1]
-	%21 = mul i32 %20, %s		; <i32> [#uses=2]
-	br i1 true, label %bb.nph3, label %bb17
-
-bb.nph3:		; preds = %bb13
-	%22 = add i32 %17, %0		; <i32> [#uses=1]
-	%23 = add i32 %17, %.sum2		; <i32> [#uses=1]
-	%24 = udiv i32 %w, 2		; <i32> [#uses=2]
-	%tmp = icmp ugt i32 1, %24		; <i1> [#uses=1]
-	%smax = select i1 %tmp, i32 1, i32 %24		; <i32> [#uses=1]
-	br label %bb14
-
-bb14:		; preds = %bb15, %bb.nph3
-	%x.12 = phi i32 [ 0, %bb.nph3 ], [ %indvar.next, %bb15 ]		; <i32> [#uses=5]
-	%25 = shl i32 %x.12, 2		; <i32> [#uses=1]
-	%26 = add i32 %25, %21		; <i32> [#uses=1]
-	%27 = getelementptr i8* %r, i32 %26		; <i8*> [#uses=1]
-	%28 = load i8* %27, align 1		; <i8> [#uses=1]
-	%.sum = add i32 %22, %x.12		; <i32> [#uses=1]
-	%29 = getelementptr i8* %j, i32 %.sum		; <i8*> [#uses=1]
-	store i8 %28, i8* %29, align 1
-	%30 = shl i32 %x.12, 2		; <i32> [#uses=1]
-	%31 = or i32 %30, 2		; <i32> [#uses=1]
-	%32 = add i32 %31, %21		; <i32> [#uses=1]
-	%33 = getelementptr i8* %r, i32 %32		; <i8*> [#uses=1]
-	%34 = load i8* %33, align 1		; <i8> [#uses=1]
-	%.sum6 = add i32 %23, %x.12		; <i32> [#uses=1]
-	%35 = getelementptr i8* %j, i32 %.sum6		; <i8*> [#uses=1]
-	store i8 %34, i8* %35, align 1
-	br label %bb15
-
-bb15:		; preds = %bb14
-	%indvar.next = add i32 %x.12, 1		; <i32> [#uses=2]
-	%exitcond = icmp ne i32 %indvar.next, %smax		; <i1> [#uses=1]
-	br i1 %exitcond, label %bb14, label %bb15.bb17_crit_edge
-
-bb15.bb17_crit_edge:		; preds = %bb15
-	br label %bb17
-
-bb17:		; preds = %bb15.bb17_crit_edge, %bb13
-	br label %bb18
-
-bb18.loopexit:		; preds = %bb10.bb18.loopexit_crit_edge.split, %bb10.preheader
-	%36 = icmp ult i32 %x, 2		; <i1> [#uses=1]
-	br i1 %36, label %bb20, label %bb.nph5
-
-bb18:		; preds = %bb17
-	%indvar.next1 = add i32 %y.14, 1		; <i32> [#uses=2]
-	%exitcond4 = icmp ne i32 %indvar.next1, %smax3		; <i1> [#uses=1]
-	br i1 %exitcond4, label %bb13, label %bb18.bb20_crit_edge
-
-bb18.bb20_crit_edge:		; preds = %bb18
-	br label %bb18.bb20_crit_edge.split
-
-bb18.bb20_crit_edge.split:		; preds = %bb18.bb20_crit_edge, %bb.nph5
-	br label %bb20
-
-bb20:		; preds = %bb18.bb20_crit_edge.split, %bb18.loopexit
-	switch i32 %d, label %return [
-		i32 3, label %bb22
-		i32 1, label %bb29
-	]
-
-bb22:		; preds = %bb20
-	%37 = mul i32 %x, %w		; <i32> [#uses=1]
-	%38 = udiv i32 %37, 4		; <i32> [#uses=1]
-	%.sum3 = add i32 %38, %.sum2		; <i32> [#uses=2]
-	%39 = add i32 %x, 15		; <i32> [#uses=1]
-	%40 = and i32 %39, -16		; <i32> [#uses=1]
-	%41 = add i32 %w, 15		; <i32> [#uses=1]
-	%42 = and i32 %41, -16		; <i32> [#uses=1]
-	%43 = mul i32 %40, %s		; <i32> [#uses=1]
-	%44 = icmp ugt i32 %x, 0		; <i1> [#uses=1]
-	br i1 %44, label %bb.nph, label %bb26
-
-bb.nph:		; preds = %bb22
-	br label %bb23
-
-bb23:		; preds = %bb24, %bb.nph
-	%y.21 = phi i32 [ 0, %bb.nph ], [ %indvar.next5, %bb24 ]		; <i32> [#uses=3]
-	%45 = mul i32 %y.21, %42		; <i32> [#uses=1]
-	%.sum1 = add i32 %45, %43		; <i32> [#uses=1]
-	%46 = getelementptr i8* %r, i32 %.sum1		; <i8*> [#uses=1]
-	%47 = mul i32 %y.21, %w		; <i32> [#uses=1]
-	%.sum5 = add i32 %47, %.sum3		; <i32> [#uses=1]
-	%48 = getelementptr i8* %j, i32 %.sum5		; <i8*> [#uses=1]
-	tail call void @llvm.memcpy.i32(i8* %48, i8* %46, i32 %w, i32 1)
-	br label %bb24
-
-bb24:		; preds = %bb23
-	%indvar.next5 = add i32 %y.21, 1		; <i32> [#uses=2]
-	%exitcond6 = icmp ne i32 %indvar.next5, %x		; <i1> [#uses=1]
-	br i1 %exitcond6, label %bb23, label %bb24.bb26_crit_edge
-
-bb24.bb26_crit_edge:		; preds = %bb24
-	br label %bb26
-
-bb26:		; preds = %bb24.bb26_crit_edge, %bb22
-	%49 = mul i32 %x, %w		; <i32> [#uses=1]
-	%.sum4 = add i32 %.sum3, %49		; <i32> [#uses=1]
-	%50 = getelementptr i8* %j, i32 %.sum4		; <i8*> [#uses=1]
-	%51 = mul i32 %x, %w		; <i32> [#uses=1]
-	%52 = udiv i32 %51, 2		; <i32> [#uses=1]
-	tail call void @llvm.memset.i32(i8* %50, i8 -128, i32 %52, i32 1)
-	ret void
-
-bb29:		; preds = %bb20, %entry
-	%53 = add i32 %w, 15		; <i32> [#uses=1]
-	%54 = and i32 %53, -16		; <i32> [#uses=1]
-	%55 = icmp ugt i32 %x, 0		; <i1> [#uses=1]
-	br i1 %55, label %bb.nph11, label %bb33
-
-bb.nph11:		; preds = %bb29
-	br label %bb30
-
-bb30:		; preds = %bb31, %bb.nph11
-	%y.310 = phi i32 [ 0, %bb.nph11 ], [ %indvar.next13, %bb31 ]		; <i32> [#uses=3]
-	%56 = mul i32 %y.310, %54		; <i32> [#uses=1]
-	%57 = getelementptr i8* %r, i32 %56		; <i8*> [#uses=1]
-	%58 = mul i32 %y.310, %w		; <i32> [#uses=1]
-	%59 = getelementptr i8* %j, i32 %58		; <i8*> [#uses=1]
-	tail call void @llvm.memcpy.i32(i8* %59, i8* %57, i32 %w, i32 1)
-	br label %bb31
-
-bb31:		; preds = %bb30
-	%indvar.next13 = add i32 %y.310, 1		; <i32> [#uses=2]
-	%exitcond14 = icmp ne i32 %indvar.next13, %x		; <i1> [#uses=1]
-	br i1 %exitcond14, label %bb30, label %bb31.bb33_crit_edge
-
-bb31.bb33_crit_edge:		; preds = %bb31
-	br label %bb33
-
-bb33:		; preds = %bb31.bb33_crit_edge, %bb29
-	%60 = mul i32 %x, %w		; <i32> [#uses=1]
-	%61 = getelementptr i8* %j, i32 %60		; <i8*> [#uses=1]
-	%62 = mul i32 %x, %w		; <i32> [#uses=1]
-	%63 = udiv i32 %62, 2		; <i32> [#uses=1]
-	tail call void @llvm.memset.i32(i8* %61, i8 -128, i32 %63, i32 1)
-	ret void
-
-return:		; preds = %bb20
-	ret void
+  %0 = mul i32 %x, %w
+  %1 = mul i32 %x, %w
+  %2 = udiv i32 %1, 4
+  %.sum2 = add i32 %2, %0
+  %cond = icmp eq i32 %d, 1
+  br i1 %cond, label %bb29, label %bb10.preheader
+
+bb10.preheader:                                   ; preds = %entry
+  %3 = icmp ne i32 %x, 0
+  br i1 %3, label %bb.nph9, label %bb18.loopexit
+
+bb.nph7:                                          ; preds = %bb7.preheader
+  %4 = mul i32 %y.08, %w
+  %5 = mul i32 %y.08, %s
+  %6 = add i32 %5, 1
+  %tmp8 = icmp ugt i32 1, %w
+  %smax9 = select i1 %tmp8, i32 1, i32 %w
+  br label %bb6
+
+bb6:                                              ; preds = %bb7, %bb.nph7
+  %x.06 = phi i32 [ 0, %bb.nph7 ], [ %indvar.next7, %bb7 ]
+  %7 = add i32 %x.06, %4
+  %8 = shl i32 %x.06, 1
+  %9 = add i32 %6, %8
+  %10 = getelementptr i8* %r, i32 %9
+  %11 = load i8* %10, align 1
+  %12 = getelementptr i8* %j, i32 %7
+  store i8 %11, i8* %12, align 1
+  br label %bb7
+
+bb7:                                              ; preds = %bb6
+  %indvar.next7 = add i32 %x.06, 1
+  %exitcond10 = icmp ne i32 %indvar.next7, %smax9
+  br i1 %exitcond10, label %bb6, label %bb7.bb9_crit_edge
+
+bb7.bb9_crit_edge:                                ; preds = %bb7
+  br label %bb9
+
+bb9:                                              ; preds = %bb7.preheader, %bb7.bb9_crit_edge
+  br label %bb10
+
+bb10:                                             ; preds = %bb9
+  %indvar.next11 = add i32 %y.08, 1
+  %exitcond12 = icmp ne i32 %indvar.next11, %x
+  br i1 %exitcond12, label %bb7.preheader, label %bb10.bb18.loopexit_crit_edge
+
+bb10.bb18.loopexit_crit_edge:                     ; preds = %bb10
+  br label %bb10.bb18.loopexit_crit_edge.split
+
+bb10.bb18.loopexit_crit_edge.split:               ; preds = %bb.nph9, %bb10.bb18.loopexit_crit_edge
+  br label %bb18.loopexit
+
+bb.nph9:                                          ; preds = %bb10.preheader
+  %13 = icmp ugt i32 %w, 0
+  br i1 %13, label %bb.nph9.split, label %bb10.bb18.loopexit_crit_edge.split
+
+bb.nph9.split:                                    ; preds = %bb.nph9
+  br label %bb7.preheader
+
+bb7.preheader:                                    ; preds = %bb.nph9.split, %bb10
+  %y.08 = phi i32 [ 0, %bb.nph9.split ], [ %indvar.next11, %bb10 ]
+  br i1 true, label %bb.nph7, label %bb9
+
+bb.nph5:                                          ; preds = %bb18.loopexit
+  %14 = udiv i32 %w, 2
+  %15 = icmp ult i32 %w, 2
+  %16 = udiv i32 %x, 2
+  br i1 %15, label %bb18.bb20_crit_edge.split, label %bb.nph5.split
+
+bb.nph5.split:                                    ; preds = %bb.nph5
+  %tmp2 = icmp ugt i32 1, %16
+  %smax3 = select i1 %tmp2, i32 1, i32 %16
+  br label %bb13
+
+bb13:                                             ; preds = %bb18, %bb.nph5.split
+  %y.14 = phi i32 [ 0, %bb.nph5.split ], [ %indvar.next1, %bb18 ]
+  %17 = mul i32 %14, %y.14
+  %18 = shl i32 %y.14, 1
+  %19 = urem i32 %y.14, 2
+  %20 = add i32 %19, %18
+  %21 = mul i32 %20, %s
+  br i1 true, label %bb.nph3, label %bb17
+
+bb.nph3:                                          ; preds = %bb13
+  %22 = add i32 %17, %0
+  %23 = add i32 %17, %.sum2
+  %24 = udiv i32 %w, 2
+  %tmp = icmp ugt i32 1, %24
+  %smax = select i1 %tmp, i32 1, i32 %24
+  br label %bb14
+
+bb14:                                             ; preds = %bb15, %bb.nph3
+  %x.12 = phi i32 [ 0, %bb.nph3 ], [ %indvar.next, %bb15 ]
+  %25 = shl i32 %x.12, 2
+  %26 = add i32 %25, %21
+  %27 = getelementptr i8* %r, i32 %26
+  %28 = load i8* %27, align 1
+  %.sum = add i32 %22, %x.12
+  %29 = getelementptr i8* %j, i32 %.sum
+  store i8 %28, i8* %29, align 1
+  %30 = shl i32 %x.12, 2
+  %31 = or i32 %30, 2
+  %32 = add i32 %31, %21
+  %33 = getelementptr i8* %r, i32 %32
+  %34 = load i8* %33, align 1
+  %.sum6 = add i32 %23, %x.12
+  %35 = getelementptr i8* %j, i32 %.sum6
+  store i8 %34, i8* %35, align 1
+  br label %bb15
+
+bb15:                                             ; preds = %bb14
+  %indvar.next = add i32 %x.12, 1
+  %exitcond = icmp ne i32 %indvar.next, %smax
+  br i1 %exitcond, label %bb14, label %bb15.bb17_crit_edge
+
+bb15.bb17_crit_edge:                              ; preds = %bb15
+  br label %bb17
+
+bb17:                                             ; preds = %bb15.bb17_crit_edge, %bb13
+  br label %bb18
+
+bb18.loopexit:                                    ; preds = %bb10.bb18.loopexit_crit_edge.split, %bb10.preheader
+  %36 = icmp ult i32 %x, 2
+  br i1 %36, label %bb20, label %bb.nph5
+
+bb18:                                             ; preds = %bb17
+  %indvar.next1 = add i32 %y.14, 1
+  %exitcond4 = icmp ne i32 %indvar.next1, %smax3
+  br i1 %exitcond4, label %bb13, label %bb18.bb20_crit_edge
+
+bb18.bb20_crit_edge:                              ; preds = %bb18
+  br label %bb18.bb20_crit_edge.split
+
+bb18.bb20_crit_edge.split:                        ; preds = %bb18.bb20_crit_edge, %bb.nph5
+  br label %bb20
+
+bb20:                                             ; preds = %bb18.bb20_crit_edge.split, %bb18.loopexit
+  switch i32 %d, label %return [
+    i32 3, label %bb22
+    i32 1, label %bb29
+  ]
+
+bb22:                                             ; preds = %bb20
+  %37 = mul i32 %x, %w
+  %38 = udiv i32 %37, 4
+  %.sum3 = add i32 %38, %.sum2
+  %39 = add i32 %x, 15
+  %40 = and i32 %39, -16
+  %41 = add i32 %w, 15
+  %42 = and i32 %41, -16
+  %43 = mul i32 %40, %s
+  %44 = icmp ugt i32 %x, 0
+  br i1 %44, label %bb.nph, label %bb26
+
+bb.nph:                                           ; preds = %bb22
+  br label %bb23
+
+bb23:                                             ; preds = %bb24, %bb.nph
+  %y.21 = phi i32 [ 0, %bb.nph ], [ %indvar.next5, %bb24 ]
+  %45 = mul i32 %y.21, %42
+  %.sum1 = add i32 %45, %43
+  %46 = getelementptr i8* %r, i32 %.sum1
+  %47 = mul i32 %y.21, %w
+  %.sum5 = add i32 %47, %.sum3
+  %48 = getelementptr i8* %j, i32 %.sum5
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %48, i8* %46, i32 %w, i32 1, i1 false)
+  br label %bb24
+
+bb24:                                             ; preds = %bb23
+  %indvar.next5 = add i32 %y.21, 1
+  %exitcond6 = icmp ne i32 %indvar.next5, %x
+  br i1 %exitcond6, label %bb23, label %bb24.bb26_crit_edge
+
+bb24.bb26_crit_edge:                              ; preds = %bb24
+  br label %bb26
+
+bb26:                                             ; preds = %bb24.bb26_crit_edge, %bb22
+  %49 = mul i32 %x, %w
+  %.sum4 = add i32 %.sum3, %49
+  %50 = getelementptr i8* %j, i32 %.sum4
+  %51 = mul i32 %x, %w
+  %52 = udiv i32 %51, 2
+  tail call void @llvm.memset.p0i8.i32(i8* %50, i8 -128, i32 %52, i32 1, i1 false)
+  ret void
+
+bb29:                                             ; preds = %bb20, %entry
+  %53 = add i32 %w, 15
+  %54 = and i32 %53, -16
+  %55 = icmp ugt i32 %x, 0
+  br i1 %55, label %bb.nph11, label %bb33
+
+bb.nph11:                                         ; preds = %bb29
+  br label %bb30
+
+bb30:                                             ; preds = %bb31, %bb.nph11
+  %y.310 = phi i32 [ 0, %bb.nph11 ], [ %indvar.next13, %bb31 ]
+  %56 = mul i32 %y.310, %54
+  %57 = getelementptr i8* %r, i32 %56
+  %58 = mul i32 %y.310, %w
+  %59 = getelementptr i8* %j, i32 %58
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %59, i8* %57, i32 %w, i32 1, i1 false)
+  br label %bb31
+
+bb31:                                             ; preds = %bb30
+  %indvar.next13 = add i32 %y.310, 1
+  %exitcond14 = icmp ne i32 %indvar.next13, %x
+  br i1 %exitcond14, label %bb30, label %bb31.bb33_crit_edge
+
+bb31.bb33_crit_edge:                              ; preds = %bb31
+  br label %bb33
+
+bb33:                                             ; preds = %bb31.bb33_crit_edge, %bb29
+  %60 = mul i32 %x, %w
+  %61 = getelementptr i8* %j, i32 %60
+  %62 = mul i32 %x, %w
+  %63 = udiv i32 %62, 2
+  tail call void @llvm.memset.p0i8.i32(i8* %61, i8 -128, i32 %63, i32 1, i1 false)
+  ret void
+
+return:                                           ; preds = %bb20
+  ret void
 }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
-declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/CodeGen/X86/personality.ll b/test/CodeGen/X86/personality.ll
index e952a9b..d3d8e3f 100644
--- a/test/CodeGen/X86/personality.ll
+++ b/test/CodeGen/X86/personality.ll
@@ -4,40 +4,41 @@
 
 define void @_Z1fv() {
 entry:
-	invoke void @_Z1gv( )
-			to label %return unwind label %unwind
+  invoke void @_Z1gv()
+          to label %return unwind label %unwind
 
-unwind:		; preds = %entry
-	br i1 false, label %eh_then, label %cleanup20
+unwind:                                           ; preds = %entry
+  br i1 false, label %eh_then, label %cleanup20
 
-eh_then:		; preds = %unwind
-	invoke void @__cxa_end_catch( )
-			to label %return unwind label %unwind10
+eh_then:                                          ; preds = %unwind
+  invoke void @__cxa_end_catch()
+          to label %return unwind label %unwind10
 
-unwind10:		; preds = %eh_then
-	%eh_select13 = tail call i64 (i8*, i8*, ...)* @llvm.eh.selector.i64( i8* null, i8* bitcast (void ()* @__gxx_personality_v0 to i8*), i32 1 )		; <i32> [#uses=2]
-	%tmp18 = icmp slt i64 %eh_select13, 0		; <i1> [#uses=1]
-	br i1 %tmp18, label %filter, label %cleanup20
+unwind10:                                         ; preds = %eh_then
+  %upgraded.eh_select13 = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* null, i8* bitcast (void ()* @__gxx_personality_v0 to i8*), i32 1)
+  %upgraded.eh_select131 = sext i32 %upgraded.eh_select13 to i64
+  %tmp18 = icmp slt i64 %upgraded.eh_select131, 0
+  br i1 %tmp18, label %filter, label %cleanup20
 
-filter:		; preds = %unwind10
-	unreachable
+filter:                                           ; preds = %unwind10
+  unreachable
 
-cleanup20:		; preds = %unwind10, %unwind
-	%eh_selector.0 = phi i64 [ 0, %unwind ], [ %eh_select13, %unwind10 ]		; <i32> [#uses=0]
-	ret void
+cleanup20:                                        ; preds = %unwind10, %unwind
+  %eh_selector.0 = phi i64 [ 0, %unwind ], [ %upgraded.eh_select131, %unwind10 ]
+  ret void
 
-return:		; preds = %eh_then, %entry
-	ret void
+return:                                           ; preds = %eh_then, %entry
+  ret void
 }
 
 declare void @_Z1gv()
 
-declare i64 @llvm.eh.selector.i64(i8*, i8*, ...)
-
 declare void @__gxx_personality_v0()
 
 declare void @__cxa_end_catch()
 
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
+
 ; X64:      zPLR
 ; X64:      .byte 155
 ; X64-NEXT: .long	___gxx_personality_v0@GOTPCREL+4
diff --git a/test/CodeGen/X86/pre-split2.ll b/test/CodeGen/X86/pre-split2.ll
deleted file mode 100644
index 670737b..0000000
--- a/test/CodeGen/X86/pre-split2.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats -regalloc=linearscan |& \
-; RUN:   grep {pre-alloc-split} | count 2
-
-define i32 @t(i32 %arg) {
-entry:
-	br label %bb6
-
-.noexc6:		; preds = %bb6
-	%0 = and i32 %2, -8		; <i32> [#uses=1]
-	tail call void @llvm.memmove.i32(i8* %3, i8* null, i32 %0, i32 1) nounwind
-	store double %1, double* null, align 8
-	br label %bb6
-
-bb6:		; preds = %.noexc6, %entry
-	%1 = uitofp i32 %arg to double		; <double> [#uses=1]
-	%2 = sub i32 0, 0		; <i32> [#uses=1]
-	%3 = invoke i8* @_Znwm(i32 0)
-			to label %.noexc6 unwind label %lpad32		; <i8*> [#uses=1]
-
-lpad32:		; preds = %bb6
-	unreachable
-}
-
-declare void @llvm.memmove.i32(i8*, i8*, i32, i32) nounwind
-
-declare i8* @_Znwm(i32)
diff --git a/test/CodeGen/X86/pre-split3.ll b/test/CodeGen/X86/pre-split3.ll
deleted file mode 100644
index 0c49a91..0000000
--- a/test/CodeGen/X86/pre-split3.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan -stats |& \
-; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
-
-define i32 @t(i32 %arg) {
-entry:
-	br label %bb6
-
-.noexc6:		; preds = %bb6
-	%0 = and i32 %2, -8		; <i32> [#uses=1]
-	tail call void @llvm.memmove.i32(i8* %3, i8* null, i32 %0, i32 1) nounwind
-	store double %1, double* null, align 8
-	br label %bb6
-
-bb6:		; preds = %.noexc6, %entry
-	%1 = uitofp i32 %arg to double		; <double> [#uses=1]
-	%2 = sub i32 0, 0		; <i32> [#uses=1]
-	%3 = invoke i8* @_Znwm(i32 0)
-			to label %.noexc6 unwind label %lpad32		; <i8*> [#uses=1]
-
-lpad32:		; preds = %bb6
-	unreachable
-}
-
-declare void @llvm.memmove.i32(i8*, i8*, i32, i32) nounwind
-
-declare i8* @_Znwm(i32)
diff --git a/test/CodeGen/X86/private.ll b/test/CodeGen/X86/private.ll
index f52f8c7..484afc9 100644
--- a/test/CodeGen/X86/private.ll
+++ b/test/CodeGen/X86/private.ll
@@ -5,8 +5,6 @@
 ; RUN: llc < %s -mtriple=x86_64-pc-linux | grep .Lbaz:
 ; RUN: llc < %s -mtriple=x86_64-pc-linux | grep movl.*\.Lbaz
 
-declare void @foo()
-
 define private void @foo() {
         ret void
 }
diff --git a/test/CodeGen/X86/promote-trunc.ll b/test/CodeGen/X86/promote-trunc.ll
new file mode 100644
index 0000000..4211d82
--- /dev/null
+++ b/test/CodeGen/X86/promote-trunc.ll
@@ -0,0 +1,11 @@
+; RUN: llc -promote-elements < %s -march=x86-64
+
+define<4 x i8> @func_8_64() {
+  %F = load <4 x i64>* undef
+  %G = trunc <4 x i64> %F to <4 x i8>
+  %H = load <4 x i64>* undef
+  %Y = trunc <4 x i64> %H to <4 x i8>
+  %T = add <4 x i8> %Y, %G
+  ret <4 x i8> %T
+}
+
diff --git a/test/CodeGen/X86/sibcall.ll b/test/CodeGen/X86/sibcall.ll
index 4a98efb..a9a5420 100644
--- a/test/CodeGen/X86/sibcall.ll
+++ b/test/CodeGen/X86/sibcall.ll
@@ -312,8 +312,6 @@ entry:
   ret void
 }
 
-declare void @foo()
-
 ; If caller / callee calling convention mismatch then check if the return
 ; values are returned in the same registers.
 ; rdar://7874780
diff --git a/test/CodeGen/X86/tail-dup-addr.ll b/test/CodeGen/X86/tail-dup-addr.ll
new file mode 100644
index 0000000..c5a105c
--- /dev/null
+++ b/test/CodeGen/X86/tail-dup-addr.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+; Test that we don't drop a block that has its address taken.
+
+; CHECK: Ltmp1:                                  ## Block address taken
+; CHECK: Ltmp2:                                  ## Block address taken
+
+@a = common global i32 0, align 4
+@p = common global i8* null, align 8
+
+define void @foo() noreturn nounwind uwtable ssp {
+entry:
+  %tmp = load i32* @a, align 4
+  %foo = icmp eq i32 0, %tmp
+  br i1 %foo, label %sw.bb, label %sw.default
+
+sw.bb:                                            ; preds = %entry
+  store i8* blockaddress(@foo, %sw.bb1), i8** @p, align 8
+  br label %sw.bb1
+
+sw.bb1:                                           ; preds = %sw.default, %sw.bb, %entry
+  store i8* blockaddress(@foo, %sw.default), i8** @p, align 8
+  br label %sw.default
+
+sw.default:                                       ; preds = %sw.bb1, %entry
+  store i8* blockaddress(@foo, %sw.bb1), i8** @p, align 8
+  br label %sw.bb1
+}
diff --git a/test/CodeGen/X86/unaligned-load.ll b/test/CodeGen/X86/unaligned-load.ll
index 9f70489..d8fffbe 100644
--- a/test/CodeGen/X86/unaligned-load.ll
+++ b/test/CodeGen/X86/unaligned-load.ll
@@ -10,9 +10,17 @@ entry:
   %String2Loc = alloca [31 x i8], align 1
   br label %bb
 
-bb:
+bb:                                               ; preds = %bb, %entry
   %String2Loc9 = getelementptr inbounds [31 x i8]* %String2Loc, i64 0, i64 0
-  call void @llvm.memcpy.i64(i8* %String2Loc9, i8* getelementptr inbounds ([31 x i8]* @.str3, i64 0, i64 0), i64 31, i32 1)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %String2Loc9, i8* getelementptr inbounds ([31 x i8]* @.str3, i64 0, i64 0), i64 31, i32 1, i1 false)
+  br label %bb
+
+return:                                           ; No predecessors!
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
 ; I386: calll {{_?}}memcpy
 
 ; CORE2: movabsq
@@ -20,13 +28,6 @@ bb:
 ; CORE2: movabsq
 
 ; COREI7: movups _.str3
-  br label %bb
-
-return:
-  ret void
-}
-
-declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
 
 ; CORE2: .section
 ; CORE2: .align  3
diff --git a/test/CodeGen/X86/variable-sized-darwin-bzero.ll b/test/CodeGen/X86/variable-sized-darwin-bzero.ll
index 4817db2..1e86d75 100644
--- a/test/CodeGen/X86/variable-sized-darwin-bzero.ll
+++ b/test/CodeGen/X86/variable-sized-darwin-bzero.ll
@@ -1,8 +1,8 @@
 ; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin10 | grep __bzero
 
-declare void @llvm.memset.i64(i8*, i8, i64, i32)
-
 define void @foo(i8* %p, i64 %n) {
-  call void @llvm.memset.i64(i8* %p, i8 0, i64 %n, i32 4)
+  call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 %n, i32 4, i1 false)
   ret void
 }
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/XCore/private.ll b/test/CodeGen/XCore/private.ll
index c595a6d..537d63b 100644
--- a/test/CodeGen/XCore/private.ll
+++ b/test/CodeGen/XCore/private.ll
@@ -6,8 +6,6 @@
 ; RUN: grep .Lbaz: %t
 ; RUN: grep ldw.*\.Lbaz %t
 
-declare void @foo()
-
 define private void @foo() {
         ret void
 }
diff --git a/test/DebugInfo/X86/earlydup-crash.ll b/test/DebugInfo/X86/earlydup-crash.ll
new file mode 100644
index 0000000..5bd0c7e
--- /dev/null
+++ b/test/DebugInfo/X86/earlydup-crash.ll
@@ -0,0 +1,85 @@
+; RUN: llc %s -mtriple=i386-apple-macosx10.6.7 -o /dev/null
+
+; This used to crash because early dup was not ignoring debug instructions.
+
+%struct.cpp_dir = type { %struct.cpp_dir*, i8*, i32, i8, i8**, i8*, i8* (i8*, %struct.cpp_dir*)*, i64, i32, i8 }
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+define internal i8* @framework_construct_pathname(i8* %fname, %struct.cpp_dir* %dir) nounwind ssp {
+entry:
+  br i1 undef, label %bb33, label %bb
+
+bb:                                               ; preds = %entry
+  %tmp = icmp eq i32 undef, 0
+  %tmp1 = add i32 0, 11
+  call void @llvm.dbg.value(metadata !{i32 %tmp1}, i64 0, metadata !0)
+  br i1 undef, label %bb18, label %bb31.preheader
+
+bb31.preheader:                                   ; preds = %bb19, %bb
+  %tmp2 = getelementptr inbounds i8* %fname, i32 0
+  br label %bb31
+
+bb18:                                             ; preds = %bb
+  %tmp3 = icmp eq i32 undef, 0
+  br i1 %tmp3, label %bb19, label %bb33
+
+bb19:                                             ; preds = %bb18
+  call void @foobar(i32 0)
+  br label %bb31.preheader
+
+bb22:                                             ; preds = %bb31
+  %tmp4 = add i32 0, %tmp1
+  call void @foobar(i32 %tmp4)
+  br i1 undef, label %bb33, label %bb31
+
+bb31:                                             ; preds = %bb22, %bb31.preheader
+  br i1 false, label %bb33, label %bb22
+
+bb33:                                             ; preds = %bb31, %bb22, %bb18, %entry
+  ret i8* undef
+}
+
+declare void @foobar(i32)
+
+!0 = metadata !{i32 590080, metadata !1, metadata !"frname_len", metadata !3, i32 517, metadata !38, i32 0} ; [ DW_TAG_auto_variable ]
+!1 = metadata !{i32 589835, metadata !2, i32 515, i32 0, metadata !3, i32 19} ; [ DW_TAG_lexical_block ]
+!2 = metadata !{i32 589870, i32 0, metadata !3, metadata !"framework_construct_pathname", metadata !"framework_construct_pathname", metadata !"", metadata !3, i32 515, metadata !5, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8* (i8*, %struct.cpp_dir*)* @framework_construct_pathname} ; [ DW_TAG_subprogram ]
+!3 = metadata !{i32 589865, metadata !"darwin-c.c", metadata !"/Users/espindola/llvm/build-llvm-gcc/gcc/../../llvm-gcc-4.2/gcc/config", metadata !4} ; [ DW_TAG_file_type ]
+!4 = metadata !{i32 589841, i32 0, i32 1, metadata !"/Users/espindola/llvm/build-llvm-gcc/gcc/../../llvm-gcc-4.2/gcc/config/darwin-c.c", metadata !"/Users/espindola/llvm/build-llvm-gcc/gcc", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!5 = metadata !{i32 589845, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!6 = metadata !{metadata !7, metadata !9, metadata !11}
+!7 = metadata !{i32 589839, metadata !3, metadata !"", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!8 = metadata !{i32 589860, metadata !3, metadata !"char", metadata !3, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 589839, metadata !3, metadata !"", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 589862, metadata !3, metadata !"", metadata !3, i32 0, i64 8, i64 8, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ]
+!11 = metadata !{i32 589839, metadata !3, metadata !"", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
+!12 = metadata !{i32 589846, metadata !13, metadata !"cpp_dir", metadata !13, i32 45, i64 0, i64 0, i64 0, i32 0, metadata !14} ; [ DW_TAG_typedef ]
+!13 = metadata !{i32 589865, metadata !"cpplib.h", metadata !"/Users/espindola/llvm/build-llvm-gcc/gcc/../../llvm-gcc-4.2/gcc/../libcpp/include", metadata !4} ; [ DW_TAG_file_type ]
+!14 = metadata !{i32 589843, metadata !3, metadata !"cpp_dir", metadata !13, i32 43, i64 352, i64 32, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_structure_type ]
+!15 = metadata !{metadata !16, metadata !18, metadata !19, metadata !21, metadata !23, metadata !25, metadata !27, metadata !29, metadata !33, metadata !36}
+!16 = metadata !{i32 589837, metadata !14, metadata !"next", metadata !13, i32 572, i64 32, i64 32, i64 0, i32 0, metadata !17} ; [ DW_TAG_member ]
+!17 = metadata !{i32 589839, metadata !3, metadata !"", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
+!18 = metadata !{i32 589837, metadata !14, metadata !"name", metadata !13, i32 575, i64 32, i64 32, i64 32, i32 0, metadata !7} ; [ DW_TAG_member ]
+!19 = metadata !{i32 589837, metadata !14, metadata !"len", metadata !13, i32 576, i64 32, i64 32, i64 64, i32 0, metadata !20} ; [ DW_TAG_member ]
+!20 = metadata !{i32 589860, metadata !3, metadata !"unsigned int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!21 = metadata !{i32 589837, metadata !14, metadata !"sysp", metadata !13, i32 580, i64 8, i64 8, i64 96, i32 0, metadata !22} ; [ DW_TAG_member ]
+!22 = metadata !{i32 589860, metadata !3, metadata !"unsigned char", metadata !3, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
+!23 = metadata !{i32 589837, metadata !14, metadata !"name_map", metadata !13, i32 584, i64 32, i64 32, i64 128, i32 0, metadata !24} ; [ DW_TAG_member ]
+!24 = metadata !{i32 589839, metadata !3, metadata !"", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ]
+!25 = metadata !{i32 589837, metadata !14, metadata !"header_map", metadata !13, i32 590, i64 32, i64 32, i64 160, i32 0, metadata !26} ; [ DW_TAG_member ]
+!26 = metadata !{i32 589839, metadata !3, metadata !"", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!27 = metadata !{i32 589837, metadata !14, metadata !"construct", metadata !13, i32 597, i64 32, i64 32, i64 192, i32 0, metadata !28} ; [ DW_TAG_member ]
+!28 = metadata !{i32 589839, metadata !3, metadata !"", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ]
+!29 = metadata !{i32 589837, metadata !14, metadata !"ino", metadata !13, i32 601, i64 64, i64 64, i64 224, i32 0, metadata !30} ; [ DW_TAG_member ]
+!30 = metadata !{i32 589846, metadata !31, metadata !"ino_t", metadata !31, i32 141, i64 0, i64 0, i64 0, i32 0, metadata !32} ; [ DW_TAG_typedef ]
+!31 = metadata !{i32 589865, metadata !"types.h", metadata !"/usr/include/sys", metadata !4} ; [ DW_TAG_file_type ]
+!32 = metadata !{i32 589860, metadata !3, metadata !"long long unsigned int", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!33 = metadata !{i32 589837, metadata !14, metadata !"dev", metadata !13, i32 602, i64 32, i64 32, i64 288, i32 0, metadata !34} ; [ DW_TAG_member ]
+!34 = metadata !{i32 589846, metadata !31, metadata !"dev_t", metadata !31, i32 107, i64 0, i64 0, i64 0, i32 0, metadata !35} ; [ DW_TAG_typedef ]
+!35 = metadata !{i32 589860, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!36 = metadata !{i32 589837, metadata !14, metadata !"user_supplied_p", metadata !13, i32 605, i64 8, i64 8, i64 320, i32 0, metadata !37} ; [ DW_TAG_member ]
+!37 = metadata !{i32 589860, metadata !3, metadata !"_Bool", metadata !3, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
+!38 = metadata !{i32 589846, metadata !39, metadata !"size_t", metadata !39, i32 326, i64 0, i64 0, i64 0, i32 0, metadata !40} ; [ DW_TAG_typedef ]
+!39 = metadata !{i32 589865, metadata !"stddef.h", metadata !"/Users/espindola/llvm/build-llvm-gcc/./prev-gcc/include", metadata !4} ; [ DW_TAG_file_type ]
+!40 = metadata !{i32 589860, metadata !3, metadata !"long unsigned int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
diff --git a/test/Feature/calltest.ll b/test/Feature/calltest.ll
index feafd3c..dcdb1a0 100644
--- a/test/Feature/calltest.ll
+++ b/test/Feature/calltest.ll
@@ -4,8 +4,6 @@
 
 %FunTy = type i32 (i32)
 
-declare i32 @test(i32)   ; Test forward declaration merging
-
 define void @invoke(%FunTy* %x) {
         %foo = call i32 %x( i32 123 )           ; <i32> [#uses=0]
         %foo2 = tail call i32 %x( i32 123 )             ; <i32> [#uses=0]
diff --git a/test/Feature/forwardreftest.ll b/test/Feature/forwardreftest.ll
index 26d214a..6ca1be7 100644
--- a/test/Feature/forwardreftest.ll
+++ b/test/Feature/forwardreftest.ll
@@ -4,9 +4,9 @@
 
 %myty = type i32 
 %myfn = type float (i32,double,i32,i16)
-type i32(%myfn*)
-type i32(i32)
-type i32(i32(i32)*)
+%0 = type i32(%myfn*)
+%1 = type i32(i32)
+%2 = type i32(i32(i32)*)
 
   %thisfuncty = type i32 (i32) *
 
diff --git a/test/Feature/globalredefinition.ll b/test/Feature/globalredefinition.ll
deleted file mode 100644
index 42e2d1a..0000000
--- a/test/Feature/globalredefinition.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llvm-as < %s | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-; Test forward references and redefinitions of globals
-
-@A = global i32* @B             ; <i32**> [#uses=0]
-@B = global i32 7               ; <i32*> [#uses=1]
-
-declare void @X()
-
-declare void @X()
-
-define void @X() {
-  ret void
-}
-
-declare void @X()
diff --git a/test/Feature/opaquetypes.ll b/test/Feature/opaquetypes.ll
index 6539c1a..37fdf93 100644
--- a/test/Feature/opaquetypes.ll
+++ b/test/Feature/opaquetypes.ll
@@ -16,13 +16,10 @@
 %AAA = type { \2*, {\2*}, [12x{\2*}], {[1x{\2*}]} }
 
 ; Test numbered types
-type %CCC
-type %BBB
+%0 = type %CCC
+%1 = type %BBB
 %Composite = type { %0, %1 }
 
-; Test simple opaque type resolution...
-%intty = type i32
-
 ; Perform a simple forward reference...
 %ty1 = type { %ty2, i32 }
 %ty2 = type float
diff --git a/test/Feature/paramattrs.ll b/test/Feature/paramattrs.ll
index 91aa460..9860f5a 100644
--- a/test/Feature/paramattrs.ll
+++ b/test/Feature/paramattrs.ll
@@ -2,8 +2,8 @@
 ; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
 ; RUN: diff %t1.ll %t2.ll
 
-%ZFunTy = type i32(i8 zeroext)
-%SFunTy = type i32(i8 signext)
+%ZFunTy = type i32(i8)
+%SFunTy = type i32(i8)
 
 declare signext i16 @"test"(i16 signext %arg)  
 declare zeroext i8 @"test2" (i16 zeroext %a2) 
@@ -14,7 +14,7 @@ declare void @exit(i32) noreturn nounwind
 
 define i32 @main(i32 inreg %argc, i8 ** inreg %argv) nounwind {
     %val = trunc i32 %argc to i16
-    %res1 = call signext i16 (i16 signext) *@test(i16 signext %val) 
+    %res1 = call signext i16 (i16 ) *@test(i16 signext %val) 
     %two = add i16 %res1, %res1
     %res2 = call zeroext i8 @test2(i16 zeroext %two )  
     %retVal = sext i16 %two to i32
diff --git a/test/Feature/testtype.ll b/test/Feature/testtype.ll
index 124aa09..c5af723 100644
--- a/test/Feature/testtype.ll
+++ b/test/Feature/testtype.ll
@@ -11,8 +11,8 @@
 %fwd    = type %fwdref*
 
 ; same as above with unnamed types
-type { %1* }
-type %0* 
+%0 = type { %1* }
+%1 = type %0* 
 %test = type %1
 
 %test2 = type [2 x i32]
diff --git a/test/Feature/weak_constant.ll b/test/Feature/weak_constant.ll
index 9025aaa..fba7f12 100644
--- a/test/Feature/weak_constant.ll
+++ b/test/Feature/weak_constant.ll
@@ -4,7 +4,7 @@
 ; RUN:   grep 7 %t | count 1
 ; RUN:   grep 9 %t | count 1
 
-	type { i32, i32 }		; type %0
+	%0 = type { i32, i32 }		; type %0
 @a = weak constant i32 undef		; <i32*> [#uses=1]
 @b = weak constant i32 5		; <i32*> [#uses=1]
 @c = weak constant %0 { i32 7, i32 9 }		; <%0*> [#uses=1]
diff --git a/test/FrontendC/2008-07-29-EHLabel.ll b/test/FrontendC/2008-07-29-EHLabel.ll
deleted file mode 100644
index 186eafa..0000000
--- a/test/FrontendC/2008-07-29-EHLabel.ll
+++ /dev/null
@@ -1,282 +0,0 @@
-; RUN: llc -disable-cfi %s -o - | %llvmgcc -xassembler -c -o /dev/null -
-; PR2609
-	%struct..0._11 = type { i32 }
-	%struct..1__pthread_mutex_s = type { i32, i32, i32, i32, i32, %struct..0._11 }
-	%struct.pthread_attr_t = type { i32, [32 x i8] }
-	%struct.pthread_mutex_t = type { %struct..1__pthread_mutex_s }
-	%"struct.std::__ctype_abstract_base<wchar_t>" = type { %"struct.std::locale::facet" }
-	%"struct.std::basic_ios<char,std::char_traits<char> >" = type { %"struct.std::ios_base", %"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8, i8, %"struct.std::basic_streambuf<char,std::char_traits<char> >"*, %"struct.std::ctype<char>"*, %"struct.std::__ctype_abstract_base<wchar_t>"*, %"struct.std::__ctype_abstract_base<wchar_t>"* }
-	%"struct.std::basic_istream<char,std::char_traits<char> >" = type { i32 (...)**, i32, %"struct.std::basic_ios<char,std::char_traits<char> >" }
-	%"struct.std::basic_istream<char,std::char_traits<char> >::sentry" = type { i8 }
-	%"struct.std::basic_ostream<char,std::char_traits<char> >" = type { i32 (...)**, %"struct.std::basic_ios<char,std::char_traits<char> >" }
-	%"struct.std::basic_streambuf<char,std::char_traits<char> >" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %"struct.std::locale" }
-	%"struct.std::ctype<char>" = type { %"struct.std::locale::facet", i32*, i8, i32*, i32*, i16*, i8, [256 x i8], [256 x i8], i8 }
-	%"struct.std::ios_base" = type { i32 (...)**, i32, i32, i32, i32, i32, %"struct.std::ios_base::_Callback_list"*, %"struct.std::ios_base::_Words", [8 x %"struct.std::ios_base::_Words"], i32, %"struct.std::ios_base::_Words"*, %"struct.std::locale" }
-	%"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"struct.std::ios_base"*, i32)*, i32, i32 }
-	%"struct.std::ios_base::_Words" = type { i8*, i32 }
-	%"struct.std::locale" = type { %"struct.std::locale::_Impl"* }
-	%"struct.std::locale::_Impl" = type { i32, %"struct.std::locale::facet"**, i32, %"struct.std::locale::facet"**, i8** }
-	%"struct.std::locale::facet" = type { i32 (...)**, i32 }
-
-@_ZL20__gthrw_pthread_oncePiPFvvE = alias weak i32 (i32*, void ()*)* @pthread_once		; <i32 (i32*, void ()*)*> [#uses=0]
-@_ZL27__gthrw_pthread_getspecificj = alias weak i8* (i32)* @pthread_getspecific		; <i8* (i32)*> [#uses=0]
-@_ZL27__gthrw_pthread_setspecificjPKv = alias weak i32 (i32, i8*)* @pthread_setspecific		; <i32 (i32, i8*)*> [#uses=0]
-@_ZL22__gthrw_pthread_createPmPK14pthread_attr_tPFPvS3_ES3_ = alias weak i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create		; <i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0]
-@_ZL22__gthrw_pthread_cancelm = alias weak i32 (i32)* @pthread_cancel		; <i32 (i32)*> [#uses=0]
-@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = alias weak i32 (%struct.pthread_mutex_t*, %struct..0._11*)* @pthread_mutex_init		; <i32 (%struct.pthread_mutex_t*, %struct..0._11*)*> [#uses=0]
-@_ZL26__gthrw_pthread_key_createPjPFvPvE = alias weak i32 (i32*, void (i8*)*)* @pthread_key_create		; <i32 (i32*, void (i8*)*)*> [#uses=0]
-@_ZL26__gthrw_pthread_key_deletej = alias weak i32 (i32)* @pthread_key_delete		; <i32 (i32)*> [#uses=0]
-@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = alias weak i32 (%struct..0._11*)* @pthread_mutexattr_init		; <i32 (%struct..0._11*)*> [#uses=0]
-@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = alias weak i32 (%struct..0._11*, i32)* @pthread_mutexattr_settype		; <i32 (%struct..0._11*, i32)*> [#uses=0]
-@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = alias weak i32 (%struct..0._11*)* @pthread_mutexattr_destroy		; <i32 (%struct..0._11*)*> [#uses=0]
-
-define %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSi7getlineEPcic(%"struct.std::basic_istream<char,std::char_traits<char> >"* %this, i8* %__s, i32 %__n, i8 signext  %__delim) {
-entry:
-	%__cerb = alloca %"struct.std::basic_istream<char,std::char_traits<char> >::sentry"		; <%"struct.std::basic_istream<char,std::char_traits<char> >::sentry"*> [#uses=2]
-	getelementptr %"struct.std::basic_istream<char,std::char_traits<char> >"* %this, i32 0, i32 1		; <i32*>:0 [#uses=7]
-	store i32 0, i32* %0, align 4
-	call void @_ZNSi6sentryC1ERSib( %"struct.std::basic_istream<char,std::char_traits<char> >::sentry"* %__cerb, %"struct.std::basic_istream<char,std::char_traits<char> >"* %this, i8 zeroext  1 )
-	getelementptr %"struct.std::basic_istream<char,std::char_traits<char> >::sentry"* %__cerb, i32 0, i32 0		; <i8*>:1 [#uses=1]
-	load i8* %1, align 8		; <i8>:2 [#uses=1]
-	%toBool = icmp eq i8 %2, 0		; <i1> [#uses=1]
-	br i1 %toBool, label %bb162, label %bb
-
-bb:		; preds = %entry
-	zext i8 %__delim to i32		; <i32>:3 [#uses=1]
-	getelementptr %"struct.std::basic_istream<char,std::char_traits<char> >"* %this, i32 0, i32 0		; <i32 (...)***>:4 [#uses=1]
-	load i32 (...)*** %4, align 4		; <i32 (...)**>:5 [#uses=1]
-	getelementptr i32 (...)** %5, i32 -3		; <i32 (...)**>:6 [#uses=1]
-	bitcast i32 (...)** %6 to i32*		; <i32*>:7 [#uses=1]
-	load i32* %7, align 4		; <i32>:8 [#uses=1]
-	bitcast %"struct.std::basic_istream<char,std::char_traits<char> >"* %this to i8*		; <i8*>:9 [#uses=1]
-	%ctg2186 = getelementptr i8* %9, i32 %8		; <i8*> [#uses=1]
-	bitcast i8* %ctg2186 to %"struct.std::basic_ios<char,std::char_traits<char> >"*		; <%"struct.std::basic_ios<char,std::char_traits<char> >"*>:10 [#uses=1]
-	getelementptr %"struct.std::basic_ios<char,std::char_traits<char> >"* %10, i32 0, i32 4		; <%"struct.std::basic_streambuf<char,std::char_traits<char> >"**>:11 [#uses=1]
-	load %"struct.std::basic_streambuf<char,std::char_traits<char> >"** %11, align 4		; <%"struct.std::basic_streambuf<char,std::char_traits<char> >"*>:12 [#uses=9]
-	getelementptr %"struct.std::basic_streambuf<char,std::char_traits<char> >"* %12, i32 0, i32 2		; <i8**>:13 [#uses=10]
-	load i8** %13, align 4		; <i8*>:14 [#uses=2]
-	getelementptr %"struct.std::basic_streambuf<char,std::char_traits<char> >"* %12, i32 0, i32 3		; <i8**>:15 [#uses=6]
-	load i8** %15, align 4		; <i8*>:16 [#uses=1]
-	icmp ult i8* %14, %16		; <i1>:17 [#uses=1]
-	br i1 %17, label %bb81, label %bb82
-
-bb81:		; preds = %bb
-	load i8* %14, align 1		; <i8>:18 [#uses=1]
-	zext i8 %18 to i32		; <i32>:19 [#uses=1]
-	%.pre = getelementptr %"struct.std::basic_streambuf<char,std::char_traits<char> >"* %12, i32 0, i32 0		; <i32 (...)***> [#uses=1]
-	br label %bb119.preheader
-
-bb82:		; preds = %bb
-	getelementptr %"struct.std::basic_streambuf<char,std::char_traits<char> >"* %12, i32 0, i32 0		; <i32 (...)***>:20 [#uses=2]
-	load i32 (...)*** %20, align 4		; <i32 (...)**>:21 [#uses=1]
-	getelementptr i32 (...)** %21, i32 9		; <i32 (...)**>:22 [#uses=1]
-	load i32 (...)** %22, align 4		; <i32 (...)*>:23 [#uses=1]
-	bitcast i32 (...)* %23 to i32 (%"struct.std::basic_streambuf<char,std::char_traits<char> >"*)*		; <i32 (%"struct.std::basic_streambuf<char,std::char_traits<char> >"*)*>:24 [#uses=1]
-	invoke i32 %24( %"struct.std::basic_streambuf<char,std::char_traits<char> >"* %12 )
-			to label %bb119.preheader unwind label %lpad		; <i32>:25 [#uses=1]
-
-bb119.preheader:		; preds = %bb82, %bb81
-	%.pre-phi = phi i32 (...)*** [ %.pre, %bb81 ], [ %20, %bb82 ]		; <i32 (...)***> [#uses=4]
-	%__c79.0.ph = phi i32 [ %19, %bb81 ], [ %25, %bb82 ]		; <i32> [#uses=1]
-	sext i8 %__delim to i32		; <i32>:26 [#uses=1]
-	br label %bb119
-
-bb84:		; preds = %bb119
-	sub i32 %__n, %82		; <i32>:27 [#uses=1]
-	add i32 %27, -1		; <i32>:28 [#uses=2]
-	load i8** %15, align 4		; <i8*>:29 [#uses=1]
-	ptrtoint i8* %29 to i32		; <i32>:30 [#uses=1]
-	load i8** %13, align 4		; <i8*>:31 [#uses=3]
-	ptrtoint i8* %31 to i32		; <i32>:32 [#uses=2]
-	sub i32 %30, %32		; <i32>:33 [#uses=2]
-	icmp slt i32 %28, %33		; <i1>:34 [#uses=1]
-	select i1 %34, i32 %28, i32 %33		; <i32>:35 [#uses=3]
-	icmp sgt i32 %35, 1		; <i1>:36 [#uses=1]
-	br i1 %36, label %bb90, label %bb99
-
-bb90:		; preds = %bb84
-	call i8* @memchr( i8* %31, i32 %26, i32 %35 ) nounwind readonly 		; <i8*>:37 [#uses=2]
-	icmp eq i8* %37, null		; <i1>:38 [#uses=1]
-	br i1 %38, label %bb93, label %bb92
-
-bb92:		; preds = %bb90
-	ptrtoint i8* %37 to i32		; <i32>:39 [#uses=1]
-	sub i32 %39, %32		; <i32>:40 [#uses=1]
-	br label %bb93
-
-bb93:		; preds = %bb92, %bb90
-	%__size.0 = phi i32 [ %40, %bb92 ], [ %35, %bb90 ]		; <i32> [#uses=4]
-	call void @llvm.memcpy.i32( i8* %__s_addr.0, i8* %31, i32 %__size.0, i32 1 )
-	getelementptr i8* %__s_addr.0, i32 %__size.0		; <i8*>:41 [#uses=3]
-	load i8** %13, align 4		; <i8*>:42 [#uses=1]
-	getelementptr i8* %42, i32 %__size.0		; <i8*>:43 [#uses=1]
-	store i8* %43, i8** %13, align 4
-	load i32* %0, align 4		; <i32>:44 [#uses=1]
-	add i32 %44, %__size.0		; <i32>:45 [#uses=1]
-	store i32 %45, i32* %0, align 4
-	load i8** %13, align 4		; <i8*>:46 [#uses=2]
-	load i8** %15, align 4		; <i8*>:47 [#uses=1]
-	icmp ult i8* %46, %47		; <i1>:48 [#uses=1]
-	br i1 %48, label %bb95, label %bb96
-
-bb95:		; preds = %bb93
-	load i8* %46, align 1		; <i8>:49 [#uses=1]
-	zext i8 %49 to i32		; <i32>:50 [#uses=1]
-	br label %bb119
-
-bb96:		; preds = %bb93
-	load i32 (...)*** %.pre-phi, align 4		; <i32 (...)**>:51 [#uses=1]
-	getelementptr i32 (...)** %51, i32 9		; <i32 (...)**>:52 [#uses=1]
-	load i32 (...)** %52, align 4		; <i32 (...)*>:53 [#uses=1]
-	bitcast i32 (...)* %53 to i32 (%"struct.std::basic_streambuf<char,std::char_traits<char> >"*)*		; <i32 (%"struct.std::basic_streambuf<char,std::char_traits<char> >"*)*>:54 [#uses=1]
-	invoke i32 %54( %"struct.std::basic_streambuf<char,std::char_traits<char> >"* %12 )
-			to label %bb119 unwind label %lpad		; <i32>:55 [#uses=1]
-
-bb99:		; preds = %bb84
-	trunc i32 %__c79.0 to i8		; <i8>:56 [#uses=1]
-	store i8 %56, i8* %__s_addr.0, align 1
-	getelementptr i8* %__s_addr.0, i32 1		; <i8*>:57 [#uses=5]
-	load i32* %0, align 4		; <i32>:58 [#uses=1]
-	add i32 %58, 1		; <i32>:59 [#uses=1]
-	store i32 %59, i32* %0, align 4
-	load i8** %13, align 4		; <i8*>:60 [#uses=3]
-	load i8** %15, align 4		; <i8*>:61 [#uses=1]
-	icmp ult i8* %60, %61		; <i1>:62 [#uses=1]
-	br i1 %62, label %bb101, label %bb102
-
-bb101:		; preds = %bb99
-	load i8* %60, align 1		; <i8>:63 [#uses=1]
-	zext i8 %63 to i32		; <i32>:64 [#uses=1]
-	getelementptr i8* %60, i32 1		; <i8*>:65 [#uses=1]
-	store i8* %65, i8** %13, align 4
-	br label %bb104
-
-bb102:		; preds = %bb99
-	load i32 (...)*** %.pre-phi, align 4		; <i32 (...)**>:66 [#uses=1]
-	getelementptr i32 (...)** %66, i32 10		; <i32 (...)**>:67 [#uses=1]
-	load i32 (...)** %67, align 4		; <i32 (...)*>:68 [#uses=1]
-	bitcast i32 (...)* %68 to i32 (%"struct.std::basic_streambuf<char,std::char_traits<char> >"*)*		; <i32 (%"struct.std::basic_streambuf<char,std::char_traits<char> >"*)*>:69 [#uses=1]
-	invoke i32 %69( %"struct.std::basic_streambuf<char,std::char_traits<char> >"* %12 )
-			to label %bb104 unwind label %lpad		; <i32>:70 [#uses=1]
-
-bb104:		; preds = %bb102, %bb101
-	%__ret44.0 = phi i32 [ %64, %bb101 ], [ %70, %bb102 ]		; <i32> [#uses=1]
-	icmp eq i32 %__ret44.0, -1		; <i1>:71 [#uses=1]
-	br i1 %71, label %bb119, label %bb112
-
-bb112:		; preds = %bb104
-	load i8** %13, align 4		; <i8*>:72 [#uses=2]
-	load i8** %15, align 4		; <i8*>:73 [#uses=1]
-	icmp ult i8* %72, %73		; <i1>:74 [#uses=1]
-	br i1 %74, label %bb114, label %bb115
-
-bb114:		; preds = %bb112
-	load i8* %72, align 1		; <i8>:75 [#uses=1]
-	zext i8 %75 to i32		; <i32>:76 [#uses=1]
-	br label %bb119
-
-bb115:		; preds = %bb112
-	load i32 (...)*** %.pre-phi, align 4		; <i32 (...)**>:77 [#uses=1]
-	getelementptr i32 (...)** %77, i32 9		; <i32 (...)**>:78 [#uses=1]
-	load i32 (...)** %78, align 4		; <i32 (...)*>:79 [#uses=1]
-	bitcast i32 (...)* %79 to i32 (%"struct.std::basic_streambuf<char,std::char_traits<char> >"*)*		; <i32 (%"struct.std::basic_streambuf<char,std::char_traits<char> >"*)*>:80 [#uses=1]
-	invoke i32 %80( %"struct.std::basic_streambuf<char,std::char_traits<char> >"* %12 )
-			to label %bb119 unwind label %lpad		; <i32>:81 [#uses=1]
-
-bb119:		; preds = %bb115, %bb114, %bb104, %bb96, %bb95, %bb119.preheader
-	%__c79.0 = phi i32 [ %__c79.0.ph, %bb119.preheader ], [ %50, %bb95 ], [ %76, %bb114 ], [ %55, %bb96 ], [ -1, %bb104 ], [ %81, %bb115 ]		; <i32> [#uses=3]
-	%__s_addr.0 = phi i8* [ %__s, %bb119.preheader ], [ %41, %bb95 ], [ %57, %bb114 ], [ %41, %bb96 ], [ %57, %bb104 ], [ %57, %bb115 ]		; <i8*> [#uses=5]
-	load i32* %0, align 4		; <i32>:82 [#uses=2]
-	add i32 %82, 1		; <i32>:83 [#uses=2]
-	%.not = icmp sge i32 %83, %__n		; <i1> [#uses=1]
-	icmp eq i32 %__c79.0, -1		; <i1>:84 [#uses=3]
-	icmp eq i32 %__c79.0, %3		; <i1>:85 [#uses=2]
-	%or.cond = or i1 %84, %85		; <i1> [#uses=1]
-	%or.cond188 = or i1 %or.cond, %.not		; <i1> [#uses=1]
-	br i1 %or.cond188, label %bb141, label %bb84
-
-bb141:		; preds = %bb119
-	%.not194 = xor i1 %85, true		; <i1> [#uses=1]
-	%brmerge = or i1 %84, %.not194		; <i1> [#uses=1]
-	%.mux = select i1 %84, i32 2, i32 4		; <i32> [#uses=0]
-	br i1 %brmerge, label %bb162, label %bb146
-
-bb146:		; preds = %bb141
-	store i32 %83, i32* %0, align 4
-	load i8** %13, align 4		; <i8*>:86 [#uses=2]
-	load i8** %15, align 4		; <i8*>:87 [#uses=1]
-	icmp ult i8* %86, %87		; <i1>:88 [#uses=1]
-	br i1 %88, label %bb148, label %bb149
-
-bb148:		; preds = %bb146
-	getelementptr i8* %86, i32 1		; <i8*>:89 [#uses=1]
-	store i8* %89, i8** %13, align 4
-	ret %"struct.std::basic_istream<char,std::char_traits<char> >"* %this
-
-bb149:		; preds = %bb146
-	load i32 (...)*** %.pre-phi, align 4		; <i32 (...)**>:90 [#uses=1]
-	getelementptr i32 (...)** %90, i32 10		; <i32 (...)**>:91 [#uses=1]
-	load i32 (...)** %91, align 4		; <i32 (...)*>:92 [#uses=1]
-	bitcast i32 (...)* %92 to i32 (%"struct.std::basic_streambuf<char,std::char_traits<char> >"*)*		; <i32 (%"struct.std::basic_streambuf<char,std::char_traits<char> >"*)*>:93 [#uses=1]
-	invoke i32 %93( %"struct.std::basic_streambuf<char,std::char_traits<char> >"* %12 )
-			to label %bb162 unwind label %lpad		; <i32>:94 [#uses=0]
-
-bb162:		; preds = %bb149, %bb141, %entry
-	ret %"struct.std::basic_istream<char,std::char_traits<char> >"* %this
-
-lpad:		; preds = %bb149, %bb115, %bb102, %bb96, %bb82
-	%__s_addr.1 = phi i8* [ %__s, %bb82 ], [ %__s_addr.0, %bb149 ], [ %41, %bb96 ], [ %57, %bb102 ], [ %57, %bb115 ]		; <i8*> [#uses=0]
-	call void @__cxa_rethrow( ) noreturn 
-	unreachable
-}
-
-declare i8* @__cxa_begin_catch(i8*) nounwind 
-
-declare i8* @llvm.eh.exception() nounwind 
-
-declare i32 @llvm.eh.selector.i32(i8*, i8*, ...) nounwind 
-
-declare void @__cxa_rethrow() noreturn 
-
-declare void @__cxa_end_catch()
-
-declare i32 @__gxx_personality_v0(...)
-
-declare void @_ZNSi6sentryC1ERSib(%"struct.std::basic_istream<char,std::char_traits<char> >::sentry"*, %"struct.std::basic_istream<char,std::char_traits<char> >"*, i8 zeroext )
-
-declare i8* @memchr(i8*, i32, i32) nounwind readonly 
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
-
-declare void @_ZNSt9basic_iosIcSt11char_traitsIcEE5clearESt12_Ios_Iostate(%"struct.std::basic_ios<char,std::char_traits<char> >"*, i32)
-
-declare extern_weak i32 @pthread_once(i32*, void ()*)
-
-declare extern_weak i8* @pthread_getspecific(i32)
-
-declare extern_weak i32 @pthread_setspecific(i32, i8*)
-
-declare extern_weak i32 @pthread_create(i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)
-
-declare extern_weak i32 @pthread_cancel(i32)
-
-declare extern_weak i32 @pthread_mutex_lock(%struct.pthread_mutex_t*)
-
-declare extern_weak i32 @pthread_mutex_trylock(%struct.pthread_mutex_t*)
-
-declare extern_weak i32 @pthread_mutex_unlock(%struct.pthread_mutex_t*)
-
-declare extern_weak i32 @pthread_mutex_init(%struct.pthread_mutex_t*, %struct..0._11*)
-
-declare extern_weak i32 @pthread_key_create(i32*, void (i8*)*)
-
-declare extern_weak i32 @pthread_key_delete(i32)
-
-declare extern_weak i32 @pthread_mutexattr_init(%struct..0._11*)
-
-declare extern_weak i32 @pthread_mutexattr_settype(%struct..0._11*, i32)
-
-declare extern_weak i32 @pthread_mutexattr_destroy(%struct..0._11*)
diff --git a/test/Integer/BitArith.ll b/test/Integer/BitArith.ll
deleted file mode 100644
index 350a984..0000000
--- a/test/Integer/BitArith.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-
-declare void @"foo"(i31 %i, i63 %j, i10 %k)
-
-
-; foo test basic arith operations
-define void @"foo"(i31 %i, i63 %j, i10 %k)
-begin
-	%t1 = trunc i63 %j to i31 
-        %t2 = add i31 %t1, %i
-        %t20 = add i31 3, %t1
-        %t3 = zext i31 %i to i63
-        %t4 = sub i63 %t3, %j
-        %t40 = sub i63 %j, -100 
-        %t5 = mul i10 %k, 7
-        %t6 = sdiv i63 %j, -2
-        %t7 = udiv i63 %j, %t3
-        %t8 = urem i10 %k, 10
-        %t9 = srem i10 %k, -10
-	ret void
-end
-
diff --git a/test/Integer/BitBit.ll b/test/Integer/BitBit.ll
deleted file mode 100644
index 420bbe5..0000000
--- a/test/Integer/BitBit.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-declare void @"foo"(i31 %i, i33 %j)
-
-
-; foo test basic bitwise operations
-define void @"foo"(i31 %i, i33 %j)
-begin
-	%t1 = trunc i33 %j to i31 
-        %t2 = and i31 %t1, %i
-        %t3 = sext i31 %i to i33
-        %t4 = or i33 %t3, %j 
-        %t5 = xor i31 %t2, 7 
-        %t6 = shl i31 %i, 2
-        %t7 = trunc i31 %i to i8
-        %t8 = shl i8 %t7, 3
-        %t9 = lshr i33 %j, 31
-        %t7z = zext i8 %t7 to i33
-        %t10 = ashr i33 %j, %t7z
-	ret void
-end
-
diff --git a/test/Integer/BitCast.ll b/test/Integer/BitCast.ll
deleted file mode 100644
index 0bef023..0000000
--- a/test/Integer/BitCast.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-declare void @"foo"(i31 %i, i1280 %j, i1 %k, float %f)
-
-
-; foo test basic arith operations
-define void @"foo"(i31 %i, i1280 %j, i1 %k, float %f)
-begin
-	%t1 = trunc i1280 %j to i31
-        %t2 = trunc i31 %t1 to i1
- 
-        %t3 = zext i31 %i to i1280
-        %t4 = sext i31 %i to i1280
-
-        %t5 = fptoui float 0x400921FA00000000 to i31
-        %t6 = uitofp i31 %t5 to double
-
-        %t7 = fptosi double 0xC0934A456D5CFAAD to i28
-        %t8 = sitofp i8 -1 to double
-        %t9 = uitofp i8 255 to double
-        
-	ret void
-end
-
diff --git a/test/Integer/BitIcmp.ll b/test/Integer/BitIcmp.ll
deleted file mode 100644
index c224612..0000000
--- a/test/Integer/BitIcmp.ll
+++ /dev/null
@@ -1,43 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-
-define i55 @"simpleIcmp"(i55 %i0, i55 %j0)
-begin
-	%t1 = icmp eq i55 %i0, %j0
-	%t2 = icmp ne i55 %i0, %j0
-	%t3 = icmp ult i55 %i0, %j0
-        %t4 = icmp sgt i55 %i0, %j0
-	%t5 = icmp ule i55 %i0, %j0
-        %t6 = icmp sge i55 %i0, %j0
-
-	%t7 = icmp eq i55 %i0, 1098765432
-        %t8 = icmp ne i55 %i0, -31415926
-
-        %t9 = icmp ult i55 10000, %j0
-        %t10 = icmp sgt i55 -10000, %j0
-
-	ret i55 %i0
-end
-
-define i31 @"phitest"(i12 %i)
-begin
-
-HasArg:
-        %n1 = add i12 1, %i
-        br label %Continue
-        
-Continue:
-        %n = phi i12 [%n1, %HasArg], [%next, %Continue]
-        %next = add i12 1, %n
-        br label %Continue
-end
-
-define i18 @"select"(i18 %i)
-begin
-        %t = icmp sgt i18 %i, 100
-        %k = select i1 %t, i18 %i, i18 999
-        ret i18 %k
-end
-
diff --git a/test/Integer/BitMisc.ll b/test/Integer/BitMisc.ll
deleted file mode 100644
index 8ce4d4a..0000000
--- a/test/Integer/BitMisc.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-
-@MyVar     = external global i19
-@MyIntList = external global { i39 *, i19 }
-             external global i19      ; i19*:0
-
-@AConst    = constant i19 -123
-
-@AString   = constant [4 x i8] c"test"
-
-@ZeroInit  = global { [100 x i19 ], [40 x float ] } { [100 x i19] zeroinitializer,
-                                                      [40  x float] zeroinitializer }
-
-
-define i19 @"foo"(i19 %blah)
-begin
-	store i19 5, i19* @MyVar
-	%idx = getelementptr { i39 *, i19 } * @MyIntList, i64 0, i32 1
-  	store i19 12, i19* %idx
-  	ret i19 %blah
-end
diff --git a/test/Integer/cfgstructures_bt.ll b/test/Integer/cfgstructures_bt.ll
deleted file mode 100644
index 09aec1f..0000000
--- a/test/Integer/cfgstructures_bt.ll
+++ /dev/null
@@ -1,56 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-
-;; This is an irreducible flow graph
-
-
-define void @"irreducible"(i1 %cond)
-begin
-	br i1 %cond, label %X, label %Y
-
-X:
-	br label %Y
-Y:
-	br label %X
-end
-
-;; This is a pair of loops that share the same header
-
-define void @"sharedheader"(i1 %cond)
-begin
-	br label %A
-A:
-	br i1 %cond, label %X, label %Y
-
-X:
-	br label %A
-Y:
-	br label %A
-end
-
-;; This is a simple nested loop
-define void @"nested"(i1 %cond1, i1 %cond2, i1 %cond3)
-begin
-	br label %Loop1
-
-Loop1:
-	br label %Loop2
-
-Loop2:
-	br label %Loop3
-
-Loop3:
-	br i1 %cond3, label %Loop3, label %L3Exit
-
-L3Exit:
-	br i1 %cond2, label %Loop2, label %L2Exit
-
-L2Exit:
-	br i1 %cond1, label %Loop1, label %L1Exit
-
-L1Exit:
-	ret void
-end
-
diff --git a/test/Integer/forwardreftest_bt.ll b/test/Integer/forwardreftest_bt.ll
deleted file mode 100644
index 5d73eff..0000000
--- a/test/Integer/forwardreftest_bt.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-  %myty = type i55 
-  %myfn = type float (i55,double,i55,i16)
-  type i55(%myfn*)
-  type i55(i55)
-  type i55(i55(i55)*)
-
-  %thisfuncty = type i55 (i55) *
-
-declare void @F(%thisfuncty, %thisfuncty, %thisfuncty)
-
-; This function always returns zero
-define i55 @zarro(i55 %Func)
-begin
-Startup:
-    add i55 0, 10
-    ret i55 0 
-end
-
-define i55 @test(i55) 
-begin
-    call void @F(%thisfuncty @zarro, %thisfuncty @test, %thisfuncty @foozball)
-    ret i55 0
-end
-
-define i55 @foozball(i55)
-begin
-    ret i55 0
-end
-
diff --git a/test/Integer/globalredefinition_bt.ll b/test/Integer/globalredefinition_bt.ll
deleted file mode 100644
index b369b2a..0000000
--- a/test/Integer/globalredefinition_bt.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-; Test forward references and redefinitions of globals
-
-@A = global i17* @B
-@B = global i17 7
-
-declare void @X()
-
-declare void @X()
-
-define void @X() {
-  ret void
-}
-
-declare void @X()
diff --git a/test/Integer/globalvars_bt.ll b/test/Integer/globalvars_bt.ll
deleted file mode 100644
index 5c43185..0000000
--- a/test/Integer/globalvars_bt.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-
-
-@MyVar     = external global i27
-@MyIntList = external global { \2 *, i27 }
-             external global i27      ; i27*:0
-
-@AConst    = constant i27 123
-
-@AString   = constant [4 x i8] c"test"
-
-@ZeroInit  = global { [100 x i27 ], [40 x float ] } { [100 x i27] zeroinitializer,
-                                                      [40  x float] zeroinitializer }
-
-
-define i27 @"foo"(i27 %blah)
-begin
-	store i27 5, i27 *@MyVar
-        %idx = getelementptr { \2 *, i27 } * @MyIntList, i64 0, i32 1
-  	store i27 12, i27* %idx
-  	ret i27 %blah
-end
-
diff --git a/test/Integer/indirectcall2_bt.ll b/test/Integer/indirectcall2_bt.ll
deleted file mode 100644
index 5b7c68d..0000000
--- a/test/Integer/indirectcall2_bt.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-
-define i63 @"test"(i63 %X)
-begin
-	ret i63 %X
-end
-
-define i63 @"fib"(i63 %n)
-begin
-  %T = icmp ult i63 %n, 2       ; {i1}:0
-  br i1 %T, label %BaseCase, label %RecurseCase
-
-RecurseCase:
-  %result = call i63 @test(i63 %n)
-  br label %BaseCase
-
-BaseCase:
-  %X = phi i63 [1, %0], [2, %RecurseCase]
-  ret i63 %X
-end
-
diff --git a/test/Integer/indirectcall_bt.ll b/test/Integer/indirectcall_bt.ll
deleted file mode 100644
index d586fca..0000000
--- a/test/Integer/indirectcall_bt.ll
+++ /dev/null
@@ -1,52 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-
-declare i32 @"atoi"(i8 *)
-
-define i63 @"fib"(i63 %n)
-begin
-  icmp ult i63 %n, 2       ; {i1}:1
-  br i1 %1, label %BaseCase, label %RecurseCase
-
-BaseCase:
-  ret i63 1
-
-RecurseCase:
-  %n2 = sub i63 %n, 2
-  %n1 = sub i63 %n, 1
-  %f2 = call i63(i63) * @fib(i63 %n2)
-  %f1 = call i63(i63) * @fib(i63 %n1)
-  %result = add i63 %f2, %f1
-  ret i63 %result
-end
-
-define i63 @"realmain"(i32 %argc, i8 ** %argv)
-begin
-  icmp eq i32 %argc, 2      ; {i1}:1
-  br i1 %1, label %HasArg, label %Continue
-HasArg:
-  ; %n1 = atoi(argv[1])
-  %n1 = add i32 1, 1
-  br label %Continue
-
-Continue:
-  %n = phi i32 [%n1, %HasArg], [1, %0]
-  %N = sext i32 %n to i63
-  %F = call i63(i63) *@fib(i63 %N)
-  ret i63 %F
-end
-
-define i63 @"trampoline"(i63 %n, i63(i63)* %fibfunc)
-begin
-  %F = call i63(i63) *%fibfunc(i63 %n)
-  ret i63 %F
-end
-
-define i32 @"main"()
-begin
-  %Result = call i63 @trampoline(i63 10, i63(i63) *@fib)
-  %Result2 = trunc i63 %Result to i32
-  ret i32 %Result2
-end
diff --git a/test/Integer/opaquetypes_bt.ll b/test/Integer/opaquetypes_bt.ll
deleted file mode 100644
index 5771342..0000000
--- a/test/Integer/opaquetypes_bt.ll
+++ /dev/null
@@ -1,58 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-; This test case is used to test opaque type processing, forward references,
-; and recursive types.  Oh my.
-; 
-
-%SQ1 = type { i31 }
-%SQ2 = type { %ITy }
-%ITy = type i31
-
-
-%CCC = type { \2* }
-%BBB = type { \2*, \2 * }
-%AAA = type { \2*, {\2*}, [12x{\2*}], {[1x{\2*}]} }
-
-; Test numbered types
-type %CCC
-type %BBB
-%Composite = type { %0, %1 }
-
-; Test simple opaque type resolution...
-%i31ty = type i31
-
-; Perform a simple forward reference...
-%ty1 = type { %ty2, i31 }
-%ty2 = type float
-
-; Do a recursive type...
-%list = type { %list * }
-%listp = type { %listp } *
-
-; Do two mutually recursive types...
-%TyA = type { %ty2, %TyB * }
-%TyB = type { double, %TyA * }
-
-; A complex recursive type...
-%Y = type { {%Y*}, %Y* }
-%Z = type { { %Z * }, [12x%Z] *, {{{ %Z * }}} }
-
-; More ridiculous test cases...
-%A = type [ 123x %A*]
-%M = type %M (%M, %M) *
-%P = type %P*
-
-; Recursive ptrs
-%u = type %v*
-%v = type %u*
-
-; Test the parser for unnamed recursive types...
-%P1 = type \1 *
-%Y1 = type { { \3 * }, \2 * }
-%Z1 = type { { \3 * }, [12x\3] *, { { { \5 * } } } }
-
-
-
-
diff --git a/test/Integer/paramattrs_bt.ll b/test/Integer/paramattrs_bt.ll
deleted file mode 100644
index 6db9a53..0000000
--- a/test/Integer/paramattrs_bt.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llvm-as < %s | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-%ZFunTy = type i33(i8 zeroext)
-%SFunTy = type i33(i8 signext)
-
-declare signext i16 @"test"(i16 signext %arg)  
-declare zeroext i8  @"test2" (i16 zeroext %a2)  
-
-
-define i33 @main(i33 %argc, i8 **%argv) {
-    %val = trunc i33 %argc to i16
-    %res = call signext i16 (i16 signext) *@test(i16 signext %val) 
-    %two = add i16 %res, %res
-    %res2 = call zeroext i8 @test2(i16 zeroext %two )  
-    %retVal = sext i16 %two to i33
-    ret i33 %retVal
-}
diff --git a/test/Integer/prototype_bt.ll b/test/Integer/prototype_bt.ll
deleted file mode 100644
index 2236e8b..0000000
--- a/test/Integer/prototype_bt.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-
-declare i31 @"bar"(i31 %in) 
-
-define i31 @"foo"(i31 %blah)
-begin
-  %xx = call i31 @bar(i31 %blah)
-  ret i31 %xx
-end
-
diff --git a/test/Integer/recursivetype_bt.ll b/test/Integer/recursivetype_bt.ll
deleted file mode 100644
index d5ce3f5..0000000
--- a/test/Integer/recursivetype_bt.ll
+++ /dev/null
@@ -1,108 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-
-; This file contains the output from the following compiled C code:
-; typedef struct list {
-;   struct list *Next;
-;   i32 Data;
-; } list;
-;
-; // Iterative insert fn
-; void InsertIntoListTail(list **L, i32 Data) {
-;   while (*L)
-;     L = &(*L)->Next;
-;   *L = (list*)malloc(sizeof(list));
-;   (*L)->Data = Data;
-;   (*L)->Next = 0;
-; }
-;
-; // Recursive list search fn
-; list *FindData(list *L, i32 Data) {
-;   if (L == 0) return 0;
-;   if (L->Data == Data) return L;
-;   return FindData(L->Next, Data);
-; }
-;
-; void DoListStuff() {
-;   list *MyList = 0;
-;   InsertIntoListTail(&MyList, 100);
-;   InsertIntoListTail(&MyList, 12);
-;   InsertIntoListTail(&MyList, 42);
-;   InsertIntoListTail(&MyList, 1123);
-;   InsertIntoListTail(&MyList, 1213);
-;
-;   if (FindData(MyList, 75)) foundIt();
-;   if (FindData(MyList, 42)) foundIt();
-;   if (FindData(MyList, 700)) foundIt();
-; }
-
-%list = type { %list*, i36 }
-
-declare i8 *@"malloc"(i32)
-
-;;**********************
-;;**********************
-
-define void @"InsertIntoListTail"(%list** %L, i36 %Data)
-begin
-bb1:
-        %reg116 = load %list** %L                               ;;<%list*>
-        %cast1004 = inttoptr i64 0 to %list*                      ;;<%list*>
-        %cond1000 = icmp eq %list* %reg116, %cast1004             ;;<i1>
-        br i1 %cond1000, label %bb3, label %bb2
-
-bb2:
-        %reg117 = phi %list** [ %reg118, %bb2 ], [ %L, %bb1 ]   ;;<%list**>
-        %cast1010 = bitcast %list** %reg117 to %list***            ;;<%list***>
-        %reg118 = load %list*** %cast1010                       ;;<%list**>
-        %reg109 = load %list** %reg118                          ;;<%list*>
-        %cast1005 = inttoptr i64 0 to %list*                      ;;<%list*>
-        %cond1001 = icmp ne %list* %reg109, %cast1005             ;;<i1>
-        br i1 %cond1001, label %bb2, label %bb3
-
-bb3:
-        %reg119 = phi %list** [ %reg118, %bb2 ], [ %L, %bb1 ]   ;;<%list**>
-        %cast1006 = bitcast %list** %reg119 to i8**             ;;<i8**>
-        %reg111 = call i8* @malloc(i32 16)                  ;;<i8*>
-        store i8* %reg111, i8** %cast1006                 ;;<void>
-	%reg112 = ptrtoint i8* %reg111 to i64
-	%reg1002 = add i64 %reg112, 8
-        %reg1005 = inttoptr i64 %reg1002 to i8*             ;;<i8*>
-        %cast1008 = bitcast i8* %reg1005 to i36*                ;;<i36*>
-        store i36 %Data, i36* %cast1008                         ;;<void>
-        %cast1003 = inttoptr i64 0 to i64*                      ;;<i64*>
-        %cast1009 = bitcast i8* %reg111 to i64**              ;;<i64**>
-        store i64* %cast1003, i64** %cast1009               ;;<void>
-        ret void
-end
-
-define %list* @"FindData"(%list* %L, i36 %Data)
-begin
-bb1:
-        br label %bb2
-
-bb2:
-        %reg115 = phi %list* [ %reg116, %bb6 ], [ %L, %bb1 ]    ;;<%list*>
-        %cast1014 = inttoptr i64 0 to %list*                      ;;<%list*>
-        %cond1011 = icmp ne %list* %reg115, %cast1014             ;;<i1>
-        br i1 %cond1011, label %bb4, label %bb3
-
-bb3:
-        ret %list* null
-
-bb4:
-	%idx = getelementptr %list* %reg115, i64 0, i32 1                  ;;<i36>
-        %reg111 = load i36* %idx
-        %cond1013 = icmp ne i36 %reg111, %Data                    ;;<i1>
-        br i1 %cond1013, label %bb6, label %bb5
-
-bb5:
-        ret %list* %reg115
-
-bb6:
-	%idx2 = getelementptr %list* %reg115, i64 0, i32 0                  ;;<%list*>
-        %reg116 = load %list** %idx2
-        br label %bb2
-end
diff --git a/test/Integer/simplecalltest_bt.ll b/test/Integer/simplecalltest_bt.ll
deleted file mode 100644
index 45dc0f1..0000000
--- a/test/Integer/simplecalltest_bt.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-%FunTy = type i31(i31)
-
-
-define void @"invoke"(%FunTy *%x)
-begin
-	%foo = call %FunTy* %x(i31 123)
-	ret void
-end
-
-define i31 @"main"(i31 %argc, i8 **%argv, i8 **%envp)
-begin
-        %retval = call i31 (i31) *@test(i31 %argc)
-        %two    = add i31 %retval, %retval
-	%retval2 = call i31 @test(i31 %argc)
-
-	%two2 = add i31 %two, %retval2
-	call void @invoke (%FunTy* @test)
-        ret i31 %two2
-end
-
-define i31 @"test"(i31 %i0)
-begin
-    ret i31 %i0
-end
diff --git a/test/Integer/small_bt.ll b/test/Integer/small_bt.ll
deleted file mode 100644
index 00fcace..0000000
--- a/test/Integer/small_bt.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-%x = type i19
-
-
-define i19 @"foo"(i19 %in) 
-begin
-label: 
-  ret i19 2
-end
-
diff --git a/test/Integer/testalloca_bt.ll b/test/Integer/testalloca_bt.ll
deleted file mode 100644
index e8e73c5..0000000
--- a/test/Integer/testalloca_bt.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-
-%inners = type {float, {i8 } }
-%struct = type { i33 , {float, {i8 } } , i64 }
-
-
-define i33 @testfunction(i33 %i0, i33 %j0)
-begin
-    alloca i8, i32 5
-    %ptr = alloca i33                       ; yields {i33*}:ptr
-    store i33 3, i33* %ptr                  ; yields {void}
-    %val = load i33* %ptr                   ; yields {i33}:val = i33 %3
-
-    %sptr = alloca %struct                  ; yields {%struct*}:sptr
-    %nsptr = getelementptr %struct * %sptr, i64 0, i32 1  ; yields {inners*}:nsptr
-    %ubsptr = getelementptr %inners * %nsptr, i64 0, i32 1  ; yields {{i8}*}:ubsptr
-    %idx = getelementptr {i8} * %ubsptr, i64 0, i32 0
-    store i8 4, i8* %idx
-    
-    %fptr = getelementptr %struct * %sptr, i64 0, i32 1, i32 0  ; yields {float*}:fptr
-    store float 4.0, float * %fptr
-    
-    ret i33 3
-end
-
diff --git a/test/Integer/testarith_bt.ll b/test/Integer/testarith_bt.ll
deleted file mode 100644
index 0820399..0000000
--- a/test/Integer/testarith_bt.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-
-define i31 @"simpleArith"(i31 %i0, i31 %j0)
-begin
-	%t1 = add i31 %i0, %j0
-	%t2 = sub i31 %i0, %j0
-	%t3 = mul i31 %t1, %t2
-        %t4 = udiv i31 %t1, %t2
-        %t5 = sdiv i31 %t1, %t2
-        %t6 = urem i31 %t1, %t2
-        %t7 = srem i31 %t1, %t2
-        %t8 = shl  i31 %t1, 9
-        %t9 = lshr i31 %t1, 9
-        %t10= ashr i31 %t1, 9
-        %f1 = sitofp i31 %t1 to float
-        %f2 = fdiv float 4.0, %f1
-	ret i31 %t3
-end
diff --git a/test/Integer/testconstants_bt.ll b/test/Integer/testconstants_bt.ll
deleted file mode 100644
index 8ca49cf..0000000
--- a/test/Integer/testconstants_bt.ll
+++ /dev/null
@@ -1,32 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-@somestr = constant [11x i8] c"hello world"
-@array   = constant [2 x i55] [ i55 12, i55 52 ]
-           constant { i55, i55 } { i55 4, i55 3 }
-
- 
-define [2 x i55]* @testfunction(i55 %i0, i55 %j0)
-begin
-	ret [2x i55]* @array
-end
-
-define  i8* @otherfunc(i55, double)
-begin
-	%somestr = getelementptr [11x i8]* @somestr, i64 0, i64 0
-	ret i8* %somestr
-end
-
-define i8* @yetanotherfunc(i55, double)
-begin
-	ret i8* null            ; Test null
-end
-
-define i55 @negativeUnsigned() {
-        ret i55 -1
-}
-
-define i55 @largeSigned() {
-       ret i55 3900000000
-}
diff --git a/test/Integer/testicmp_bt.ll b/test/Integer/testicmp_bt.ll
deleted file mode 100644
index 40a2465..0000000
--- a/test/Integer/testicmp_bt.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-
-define i31 @"simpleIcmp"(i31 %i0, i31 %j0)
-begin
-	%t1 = icmp eq i31 %i0, %j0
-	%t2 = icmp ne i31 %i0, %j0
-	%t3 = icmp ult i31 %i0, %j0
-        %t4 = icmp sgt i31 %i0, %j0
-	%t5 = icmp ule i31 %i0, %j0
-        %t6 = icmp sge i31 %i0, %j0
-
-	%t7 = icmp eq i31 %i0, 1098765432
-        %t8 = icmp ne i31 %i0, -31415926
-
-        %t9 = icmp ult i31 10000, %j0
-        %t10 = icmp sgt i31 -10000, %j0
-
-
-	ret i31 %i0
-end
diff --git a/test/Integer/testlogical_bt.ll b/test/Integer/testlogical_bt.ll
deleted file mode 100644
index a2c927d..0000000
--- a/test/Integer/testlogical_bt.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-
-define i31 @"simpleAdd"(i31 %i0, i31 %j0)
-begin
-	%t1 = xor i31 %i0, %j0
-	%t2 = or i31 %i0, %j0
-	%t3 = and i31 %t1, %t2
-	ret i31 %t3
-end
-
diff --git a/test/Integer/testlogical_new_bt.ll b/test/Integer/testlogical_new_bt.ll
deleted file mode 100644
index 49a26dc..0000000
--- a/test/Integer/testlogical_new_bt.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-
-define i31 @"simpleAdd"(i31 %i0, i31 %j0)
-begin
-	%t1 = xor i31 %i0, %j0
-	%t2 = or i31 %i0, %j0
-	%t3 = and i31 %t1, %t2
-        %t4 = shl i31 %i0, 2
-        %t5 = ashr i31 %i0, 2
-        %t6 = lshr i31 %j0, 22
-	ret i31 %t3
-end
diff --git a/test/Integer/testswitch_bt.ll b/test/Integer/testswitch_bt.ll
deleted file mode 100644
index bf7cdc5..0000000
--- a/test/Integer/testswitch_bt.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-  %i35 = type i35
-
-
-define i35 @"squared"(%i35 %i0)
-begin
-	switch i35 %i0, label %Default [ 
-		i35 1, label %Case1
-		i35 2, label %Case2
-		i35 4, label %Case4 ]
-
-Default:
-    ret i35 -1                      ; Unrecognized input value
-
-Case1:
-    ret i35 1
-Case2:
-    ret i35 4
-Case4:
-    ret i35 16
-end
diff --git a/test/Integer/testvarargs_bt.ll b/test/Integer/testvarargs_bt.ll
deleted file mode 100644
index 3227d14..0000000
--- a/test/Integer/testvarargs_bt.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-
-declare i31 @"printf"(i8*, ...)   ;; Prototype for: i32 __builtin_printf(const char*, ...)
-
-define i31 @"testvarar"()
-begin
-	call i31(i8*, ...) *@printf(i8 * null, i31 12, i8 42)
-	ret i31 %1
-end
-
-
diff --git a/test/MC/AsmParser/directive_comm.s b/test/MC/AsmParser/directive_comm.s
index 6cc7937..5e60ab7 100644
--- a/test/MC/AsmParser/directive_comm.s
+++ b/test/MC/AsmParser/directive_comm.s
@@ -3,6 +3,8 @@
 # CHECK: TEST0:
 # CHECK: .comm a,6,2
 # CHECK: .comm b,8
+# CHECK: .comm c,8
 TEST0:  
         .comm a, 4+2, 2
         .comm b,8
+        .common c,8
diff --git a/test/MC/X86/x86-64.s b/test/MC/X86/x86-64.s
index a36ba25..472748f 100644
--- a/test/MC/X86/x86-64.s
+++ b/test/MC/X86/x86-64.s
@@ -1128,3 +1128,11 @@ xsetbv // CHECK: xsetbv # encoding: [0x0f,0x01,0xd1]
 // CHECK: strq
 // CHECK: encoding: [0x48,0x0f,0x00,0xc8]
 	str %rax
+
+// CHECK: movd %rdi, %xmm0
+// CHECK: encoding: [0x66,0x48,0x0f,0x6e,0xc7]
+	movq %rdi,%xmm0
+
+// CHECK: movd %rdi, %xmm0
+// CHECK: encoding: [0x66,0x48,0x0f,0x6e,0xc7]
+	movd %rdi,%xmm0
diff --git a/test/Makefile b/test/Makefile
index e38226a..4e34e72 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -171,15 +171,21 @@ site.exp: FORCE
 	@test ! -f site.exp || mv site.exp site.bak
 	@mv site.tmp site.exp
 
+ifeq ($(DISABLE_ASSERTIONS),1)
+ENABLE_ASSERTIONS=0
+else
+ENABLE_ASSERTIONS=1
+endif
+
 lit.site.cfg: site.exp
 	@echo "Making LLVM 'lit.site.cfg' file..."
 	@$(ECHOPATH) s=@LLVM_SOURCE_DIR@=$(LLVM_SRC_ROOT)=g > lit.tmp
 	@$(ECHOPATH) s=@LLVM_BINARY_DIR@=$(LLVM_OBJ_ROOT)=g >> lit.tmp
 	@$(ECHOPATH) s=@LLVM_TOOLS_DIR@=$(ToolDir)=g >> lit.tmp
-	@$(ECHOPATH) s=@LLVM_BUILD_MODE@=$(BuildMode)=g >> lit.tmp
 	@$(ECHOPATH) s=@LLVMGCCDIR@=$(LLVMGCCDIR)=g >> lit.tmp
 	@$(ECHOPATH) s=@PYTHON_EXECUTABLE@=python=g >> lit.tmp
 	@$(ECHOPATH) s=@ENABLE_SHARED@=$(ENABLE_SHARED)=g >> lit.tmp
+	@$(ECHOPATH) s=@LLVM_ENABLE_ASSERTIONS@=$(ENABLE_ASSERTIONS)=g >> lit.tmp
 	@sed -f lit.tmp $(PROJ_SRC_DIR)/lit.site.cfg.in > $@
 	@-rm -f lit.tmp
 
diff --git a/test/Transforms/ADCE/2002-01-31-UseStuckAround.ll b/test/Transforms/ADCE/2002-01-31-UseStuckAround.ll
index 43462fa..8910bda 100644
--- a/test/Transforms/ADCE/2002-01-31-UseStuckAround.ll
+++ b/test/Transforms/ADCE/2002-01-31-UseStuckAround.ll
@@ -1,7 +1,6 @@
 ; RUN:  opt < %s -adce
 
-define i32 @"main"(i32 %argc)
-begin
+define i32 @"main"(i32 %argc) {
 	br label %2
 
 	%retval = phi i32 [ %argc, %2 ]		; <i32>	[#uses=2]
@@ -9,5 +8,4 @@ begin
 	ret i32 %two
 
 	br label %1
-end
-
+}
diff --git a/test/Transforms/DeadStoreElimination/crash.ll b/test/Transforms/DeadStoreElimination/crash.ll
index bb279cd..148695f 100644
--- a/test/Transforms/DeadStoreElimination/crash.ll
+++ b/test/Transforms/DeadStoreElimination/crash.ll
@@ -36,11 +36,11 @@ bb14:                                             ; preds = %bb4
   %6 = getelementptr inbounds i16* %2, i64 undef  ; <i16*> [#uses=1]
   store i16 undef, i16* %6, align 2
   %7 = getelementptr inbounds i8* %5, i64 undef   ; <i8*> [#uses=1]
-  call void @llvm.memcpy.i64(i8* %7, i8* undef, i64 undef, i32 1) nounwind
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %7, i8* undef, i64 undef, i32 1, i1 false)
   unreachable
 }
 
-declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 
 ; rdar://7635088
diff --git a/test/Transforms/DeadStoreElimination/lifetime.ll b/test/Transforms/DeadStoreElimination/lifetime.ll
index 2b5cc5a..6785653 100644
--- a/test/Transforms/DeadStoreElimination/lifetime.ll
+++ b/test/Transforms/DeadStoreElimination/lifetime.ll
@@ -4,7 +4,7 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1
 
 declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
 declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
-declare void @llvm.memset.i8(i8*, i8, i8, i32)
+declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i32, i1) nounwind
 
 define void @test1() {
 ; CHECK: @test1
@@ -14,7 +14,7 @@ define void @test1() {
   call void @llvm.lifetime.end(i64 1, i8* %A)
 ; CHECK: lifetime.end
 
-  call void @llvm.memset.i8(i8* %A, i8 0, i8 -1, i32 0)
+  call void @llvm.memset.p0i8.i8(i8* %A, i8 0, i8 -1, i32 0, i1 false)
 ; CHECK-NOT: memset
 
   ret void
diff --git a/test/Transforms/DeadStoreElimination/memintrinsics.ll b/test/Transforms/DeadStoreElimination/memintrinsics.ll
index e31e9fa..d5c5365 100644
--- a/test/Transforms/DeadStoreElimination/memintrinsics.ll
+++ b/test/Transforms/DeadStoreElimination/memintrinsics.ll
@@ -1,8 +1,8 @@
 ; RUN: opt -S -dse < %s | FileCheck %s
 
-declare void @llvm.memcpy.i8(i8*, i8*, i8, i32)
-declare void @llvm.memmove.i8(i8*, i8*, i8, i32)
-declare void @llvm.memset.i8(i8*, i8, i8, i32)
+declare void @llvm.memcpy.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i32, i1) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i32, i1) nounwind
 
 define void @test1() {
 ; CHECK: @test1
@@ -12,7 +12,7 @@ define void @test1() {
   store i8 0, i8* %A  ;; Written to by memcpy
 ; CHECK-NOT: store
 
-  call void @llvm.memcpy.i8(i8* %A, i8* %B, i8 -1, i32 0)
+  call void @llvm.memcpy.p0i8.p0i8.i8(i8* %A, i8* %B, i8 -1, i32 0, i1 false)
 
   ret void
 ; CHECK: ret void
@@ -26,7 +26,7 @@ define void @test2() {
   store i8 0, i8* %A  ;; Written to by memmove
 ; CHECK-NOT: store
 
-  call void @llvm.memmove.i8(i8* %A, i8* %B, i8 -1, i32 0)
+  call void @llvm.memmove.p0i8.p0i8.i8(i8* %A, i8* %B, i8 -1, i32 0, i1 false)
 
   ret void
 ; CHECK: ret void
@@ -40,7 +40,7 @@ define void @test3() {
   store i8 0, i8* %A  ;; Written to by memset
 ; CHECK-NOT: store
 
-  call void @llvm.memset.i8(i8* %A, i8 0, i8 -1, i32 0)
+  call void @llvm.memset.p0i8.i8(i8* %A, i8 0, i8 -1, i32 0, i1 false)
 
   ret void
 ; CHECK: ret void
diff --git a/test/Transforms/DeadStoreElimination/simple.ll b/test/Transforms/DeadStoreElimination/simple.ll
index 23576da..5f143fc 100644
--- a/test/Transforms/DeadStoreElimination/simple.ll
+++ b/test/Transforms/DeadStoreElimination/simple.ll
@@ -3,8 +3,6 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1
 
 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
-declare void @llvm.memset.i64(i8*, i8, i64, i32)
-declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
 declare i8* @llvm.init.trampoline(i8*, i8*, i8*)
 
 define void @test1(i32* %Q, i32* %P) {
@@ -65,7 +63,7 @@ define void @test5(i32* %Q) {
 ; alias).
 define void @test6(i32 *%p, i8 *%q) {
   store i32 10, i32* %p, align 4       ;; dead.
-  call void @llvm.memset.i64(i8* %q, i8 42, i64 900, i32 1)
+  call void @llvm.memset.p0i8.i64(i8* %q, i8 42, i64 900, i32 1, i1 false)
   store i32 30, i32* %p, align 4
   ret void
 ; CHECK: @test6
@@ -76,7 +74,7 @@ define void @test6(i32 *%p, i8 *%q) {
 ; alias).
 define void @test7(i32 *%p, i8 *%q, i8* noalias %r) {
   store i32 10, i32* %p, align 4       ;; dead.
-  call void @llvm.memcpy.i64(i8* %q, i8* %r, i64 900, i32 1)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %q, i8* %r, i64 900, i32 1, i1 false)
   store i32 30, i32* %p, align 4
   ret void
 ; CHECK: @test7
@@ -184,8 +182,8 @@ define void @test14(i32* %Q) {
 
 ;; Fully dead overwrite of memcpy.
 define void @test15(i8* %P, i8* %Q) nounwind ssp {
-  tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 12, i32 1)
-  tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 12, i32 1)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
   ret void
 ; CHECK: @test15
 ; CHECK-NEXT: call void @llvm.memcpy
@@ -194,8 +192,8 @@ define void @test15(i8* %P, i8* %Q) nounwind ssp {
 
 ;; Full overwrite of smaller memcpy.
 define void @test16(i8* %P, i8* %Q) nounwind ssp {
-  tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 8, i32 1)
-  tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 12, i32 1)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
   ret void
 ; CHECK: @test16
 ; CHECK-NEXT: call void @llvm.memcpy
@@ -204,8 +202,8 @@ define void @test16(i8* %P, i8* %Q) nounwind ssp {
 
 ;; Overwrite of memset by memcpy.
 define void @test17(i8* %P, i8* noalias %Q) nounwind ssp {
-  tail call void @llvm.memset.i64(i8* %P, i8 42, i64 8, i32 1)
-  tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 12, i32 1)
+  tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
   ret void
 ; CHECK: @test17
 ; CHECK-NEXT: call void @llvm.memcpy
@@ -228,8 +226,8 @@ define void @test17v(i8* %P, i8* %Q) nounwind ssp {
 ; A = B
 ; A = A
 define void @test18(i8* %P, i8* %Q, i8* %R) nounwind ssp {
-  tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 12, i32 1)
-  tail call void @llvm.memcpy.i64(i8* %P, i8* %R, i64 12, i32 1)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false)
   ret void
 ; CHECK: @test18
 ; CHECK-NEXT: call void @llvm.memcpy
diff --git a/test/Transforms/FunctionAttrs/2008-10-04-LocalMemory.ll b/test/Transforms/FunctionAttrs/2008-10-04-LocalMemory.ll
deleted file mode 100644
index faac118..0000000
--- a/test/Transforms/FunctionAttrs/2008-10-04-LocalMemory.ll
+++ /dev/null
@@ -1,64 +0,0 @@
-; RUN: opt < %s -basicaa -functionattrs -S | FileCheck %s
-
-%struct.X = type { i32*, i32* }
-
-declare i32 @g(i32*) readnone
-
-define i32 @f() {
-; CHECK: @f() readnone
-	%x = alloca i32		; <i32*> [#uses=2]
-	store i32 0, i32* %x
-	%y = call i32 @g(i32* %x)		; <i32> [#uses=1]
-	ret i32 %y
-}
-
-define i32 @foo() nounwind {
-; CHECK: @foo() nounwind readonly
-entry:
-  %y = alloca %struct.X                           ; <%struct.X*> [#uses=2]
-  %x = alloca %struct.X                           ; <%struct.X*> [#uses=2]
-  %j = alloca i32                                 ; <i32*> [#uses=2]
-  %i = alloca i32                                 ; <i32*> [#uses=2]
-  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  store i32 0, i32* %i, align 4
-  store i32 1, i32* %j, align 4
-  %0 = getelementptr inbounds %struct.X* %y, i32 0, i32 0 ; <i32**> [#uses=1]
-  store i32* %i, i32** %0, align 8
-  %1 = getelementptr inbounds %struct.X* %x, i32 0, i32 1 ; <i32**> [#uses=1]
-  store i32* %j, i32** %1, align 8
-  %x1 = bitcast %struct.X* %x to i8*              ; <i8*> [#uses=2]
-  %y2 = bitcast %struct.X* %y to i8*              ; <i8*> [#uses=1]
-  call void @llvm.memcpy.i64(i8* %x1, i8* %y2, i64 8, i32 1)
-  %2 = bitcast i8* %x1 to i32**                   ; <i32**> [#uses=1]
-  %3 = load i32** %2, align 8                     ; <i32*> [#uses=1]
-  %4 = load i32* %3, align 4                      ; <i32> [#uses=1]
-  br label %return
-
-return:                                           ; preds = %entry
-  ret i32 %4
-}
-
-define i32 @t(i32 %a, i32 %b, i32 %c) nounwind {
-; CHECK: @t(i32 %a, i32 %b, i32 %c) nounwind readnone
-entry:
-  %a.addr = alloca i32                            ; <i32*> [#uses=3]
-  %c.addr = alloca i32                            ; <i32*> [#uses=2]
-  store i32 %a, i32* %a.addr
-  store i32 %c, i32* %c.addr
-  %tmp = load i32* %a.addr                        ; <i32> [#uses=1]
-  %tobool = icmp ne i32 %tmp, 0                   ; <i1> [#uses=1]
-  br i1 %tobool, label %if.then, label %if.else
-
-if.then:                                          ; preds = %entry
-  br label %if.end
-
-if.else:                                          ; preds = %entry
-  br label %if.end
-
-if.end:                                           ; preds = %if.else, %if.then
-  %p.0 = phi i32* [ %a.addr, %if.then ], [ %c.addr, %if.else ] ; <i32*> [#uses=1]
-  %tmp2 = load i32* %p.0                          ; <i32> [#uses=1]
-  ret i32 %tmp2
-}
-
-declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
diff --git a/test/Transforms/GVN/2008-02-24-NonDominatedMemcpy.ll b/test/Transforms/GVN/2008-02-24-NonDominatedMemcpy.ll
deleted file mode 100644
index 9a75e1a..0000000
--- a/test/Transforms/GVN/2008-02-24-NonDominatedMemcpy.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: opt < %s -gvn -dse -S | grep {call.*memcpy} | count 1
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-target triple = "x86_64-apple-darwin8"
-	%struct.ggFrame3 = type { %struct.ggPoint3, %struct.ggONB3 }
-	%struct.ggHMatrix3 = type { [4 x [4 x double]] }
-	%struct.ggONB3 = type { %struct.ggPoint3, %struct.ggPoint3, %struct.ggPoint3 }
-	%struct.ggPoint3 = type { [3 x double] }
-	%struct.ggQuaternion = type { [4 x double], i32, %struct.ggHMatrix3 }
-
-declare void @llvm.memcpy.i64(i8*, i8*, i64, i32) nounwind 
-
-define void @_Z10ggCRSplineRK8ggFrame3S1_S1_S1_d(%struct.ggFrame3* noalias sret  %agg.result, %struct.ggFrame3* %f0, %struct.ggFrame3* %f1, %struct.ggFrame3* %f2, %struct.ggFrame3* %f3, double %t) nounwind  {
-entry:
-	%qresult = alloca %struct.ggQuaternion		; <%struct.ggQuaternion*> [#uses=1]
-	%tmp = alloca %struct.ggONB3		; <%struct.ggONB3*> [#uses=2]
-	call void @_ZN12ggQuaternion7getONB3Ev( %struct.ggONB3* noalias sret  %tmp, %struct.ggQuaternion* %qresult ) nounwind 
-	%tmp1.i = getelementptr %struct.ggFrame3* %agg.result, i32 0, i32 1		; <%struct.ggONB3*> [#uses=1]
-	%tmp13.i = bitcast %struct.ggONB3* %tmp1.i to i8*		; <i8*> [#uses=1]
-	%tmp24.i = bitcast %struct.ggONB3* %tmp to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i64( i8* %tmp13.i, i8* %tmp24.i, i64 72, i32 8 ) nounwind 
-	ret void
-}
-
-declare void @_ZN12ggQuaternion7getONB3Ev(%struct.ggONB3* noalias sret , %struct.ggQuaternion*) nounwind 
diff --git a/test/Transforms/GVN/2008-02-26-MemCpySize.ll b/test/Transforms/GVN/2008-02-26-MemCpySize.ll
deleted file mode 100644
index 6ed8a76..0000000
--- a/test/Transforms/GVN/2008-02-26-MemCpySize.ll
+++ /dev/null
@@ -1,46 +0,0 @@
-; RUN: opt < %s -gvn -dse -S | grep {call.*memcpy.*cell} | count 2
-; PR2099
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i686-apple-darwin9"
-	%struct.s = type { [11 x i8], i32 }
-@.str = internal constant [11 x i8] c"0123456789\00"		; <[11 x i8]*> [#uses=1]
-@cell = weak global %struct.s zeroinitializer		; <%struct.s*> [#uses=2]
-
-declare i32 @check(%struct.s* byval  %p) nounwind
-
-declare i32 @strcmp(i8*, i8*) nounwind readonly 
-
-define i32 @main() noreturn nounwind  {
-entry:
-	%p = alloca %struct.s, align 8		; <%struct.s*> [#uses=2]
-	store i32 99, i32* getelementptr (%struct.s* @cell, i32 0, i32 1), align 4
-	call void @llvm.memcpy.i32( i8* getelementptr (%struct.s* @cell, i32 0, i32 0, i32 0), i8* getelementptr ([11 x i8]* @.str, i32 0, i32 0), i32 11, i32 1 )
-	%tmp = getelementptr %struct.s* %p, i32 0, i32 0, i32 0		; <i8*> [#uses=2]
-	call void @llvm.memcpy.i64( i8* %tmp, i8* getelementptr (%struct.s* @cell, i32 0, i32 0, i32 0), i64 16, i32 8 )
-	%tmp1.i = getelementptr %struct.s* %p, i32 0, i32 1		; <i32*> [#uses=1]
-	%tmp2.i = load i32* %tmp1.i, align 4		; <i32> [#uses=1]
-	%tmp3.i = icmp eq i32 %tmp2.i, 99		; <i1> [#uses=1]
-	br i1 %tmp3.i, label %bb5.i, label %bb
-
-bb5.i:		; preds = %entry
-	%tmp91.i = call i32 @strcmp( i8* %tmp, i8* getelementptr ([11 x i8]* @.str, i32 0, i32 0) ) nounwind readonly 		; <i32> [#uses=1]
-	%tmp53 = icmp eq i32 %tmp91.i, 0		; <i1> [#uses=1]
-	br i1 %tmp53, label %bb7, label %bb
-
-bb:		; preds = %bb5.i, %entry
-	call void @abort( ) noreturn nounwind 
-	unreachable
-
-bb7:		; preds = %bb5.i
-	call void @exit( i32 0 ) noreturn nounwind 
-	unreachable
-}
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
-
-declare void @abort() noreturn nounwind 
-
-declare void @exit(i32) noreturn nounwind 
-
-declare void @llvm.memcpy.i64(i8*, i8*, i64, i32) nounwind 
diff --git a/test/Transforms/GVN/2011-04-27-phioperands.ll b/test/Transforms/GVN/2011-04-27-phioperands.ll
new file mode 100644
index 0000000..6e5075d
--- /dev/null
+++ b/test/Transforms/GVN/2011-04-27-phioperands.ll
@@ -0,0 +1,106 @@
+; RUN: opt %s -gvn -disable-output
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
+
+@nuls = external global [10 x i8]
+
+define fastcc void @p_ere() nounwind {
+entry:
+  br label %"<bb 5>"
+
+"<L18>.i":
+  br i1 undef, label %"<bb 3>.i30.i", label %doemit.exit51.i
+
+"<bb 3>.i30.i":
+  unreachable
+
+doemit.exit51.i:
+  br label %"<bb 53>.i"
+
+"<L19>.i":
+  br i1 undef, label %"<bb 3>.i55.i", label %doemit.exit76.i
+
+"<bb 3>.i55.i":
+  unreachable
+
+doemit.exit76.i:
+  br label %"<bb 53>.i"
+
+"<L98>.i":
+  store i8* getelementptr inbounds ([10 x i8]* @nuls, i64 0, i64 0), i8** undef, align 8
+  br label %"<bb 53>.i"
+
+"<L99>.i":
+  br label %"<bb 53>.i"
+
+"<L24>.i":
+  br i1 undef, label %"<bb 53>.i", label %"<bb 35>.i"
+
+"<bb 35>.i":
+  br label %"<bb 53>.i"
+
+"<L28>.i":
+  br label %"<bb 53>.i"
+
+"<L29>.i":
+  br label %"<bb 53>.i"
+
+"<L39>.i":
+  br label %"<bb 53>.i"
+
+"<bb 53>.i":
+  %wascaret_2.i = phi i32 [ 0, %"<L39>.i" ], [ 0, %"<L29>.i" ], [ 0, %"<L28>.i" ], [ 0, %"<bb 35>.i" ], [ 0, %"<L99>.i" ], [ 0, %"<L98>.i" ], [ 0, %doemit.exit76.i ], [ 1, %doemit.exit51.i ], [ 0, %"<L24>.i" ]
+  %D.5496_84.i = load i8** undef, align 8
+  br i1 undef, label %"<bb 54>.i", label %"<bb 5>"
+
+"<bb 54>.i":
+  br i1 undef, label %"<bb 5>", label %"<bb 58>.i"
+
+"<bb 58>.i":
+  br i1 undef, label %"<bb 64>.i", label %"<bb 59>.i"
+
+"<bb 59>.i":
+  br label %"<bb 64>.i"
+
+"<bb 64>.i":
+  switch i32 undef, label %"<bb 5>" [
+    i32 42, label %"<L54>.i"
+    i32 43, label %"<L55>.i"
+    i32 63, label %"<L56>.i"
+    i32 123, label %"<bb 5>.i258.i"
+  ]
+
+"<L54>.i":
+  br i1 undef, label %"<bb 3>.i105.i", label %doemit.exit127.i
+
+"<bb 3>.i105.i":
+  unreachable
+
+doemit.exit127.i:
+  unreachable
+
+"<L55>.i":
+  br i1 undef, label %"<bb 3>.i157.i", label %"<bb 5>"
+
+"<bb 3>.i157.i":
+  unreachable
+
+"<L56>.i":
+  br label %"<bb 5>"
+
+"<bb 5>.i258.i":
+  unreachable
+
+"<bb 5>":
+  switch i32 undef, label %"<L39>.i" [
+    i32 36, label %"<L19>.i"
+    i32 94, label %"<L18>.i"
+    i32 124, label %"<L98>.i"
+    i32 42, label %"<L99>.i"
+    i32 43, label %"<L99>.i"
+    i32 46, label %"<L24>.i"
+    i32 63, label %"<L99>.i"
+    i32 91, label %"<L28>.i"
+    i32 92, label %"<L29>.i"
+  ]
+}
diff --git a/test/Transforms/GVN/nonescaping-malloc.ll b/test/Transforms/GVN/nonescaping-malloc.ll
index 1d50205..dba9d81 100644
--- a/test/Transforms/GVN/nonescaping-malloc.ll
+++ b/test/Transforms/GVN/nonescaping-malloc.ll
@@ -20,13 +20,13 @@ target triple = "x86_64-apple-darwin10.0"
 
 define %"struct.llvm::StringMapEntry<void*>"* @_Z3fooRN4llvm9StringMapIPvNS_15MallocAllocatorEEEPKc(%"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %X, i8* %P) ssp {
 entry:
-  %tmp = alloca %"struct.llvm::StringRef", align 8 ; <%"struct.llvm::StringRef"*> [#uses=3]
-  %tmp.i = getelementptr inbounds %"struct.llvm::StringRef"* %tmp, i64 0, i32 0 ; <i8**> [#uses=1]
+  %tmp = alloca %"struct.llvm::StringRef", align 8
+  %tmp.i = getelementptr inbounds %"struct.llvm::StringRef"* %tmp, i64 0, i32 0
   store i8* %P, i8** %tmp.i, align 8
-  %tmp1.i = call i64 @strlen(i8* %P) nounwind readonly ; <i64> [#uses=1]
-  %tmp2.i = getelementptr inbounds %"struct.llvm::StringRef"* %tmp, i64 0, i32 1 ; <i64*> [#uses=1]
+  %tmp1.i = call i64 @strlen(i8* %P) nounwind readonly
+  %tmp2.i = getelementptr inbounds %"struct.llvm::StringRef"* %tmp, i64 0, i32 1
   store i64 %tmp1.i, i64* %tmp2.i, align 8
-  %tmp1 = call %"struct.llvm::StringMapEntry<void*>"* @_ZN4llvm9StringMapIPvNS_15MallocAllocatorEE16GetOrCreateValueERKNS_9StringRefE(%"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %X, %"struct.llvm::StringRef"* %tmp) ssp ; <%"struct.llvm::StringMapEntry<void*>"*> [#uses=1]
+  %tmp1 = call %"struct.llvm::StringMapEntry<void*>"* @_ZN4llvm9StringMapIPvNS_15MallocAllocatorEE16GetOrCreateValueERKNS_9StringRefE(%"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %X, %"struct.llvm::StringRef"* %tmp) ssp
   ret %"struct.llvm::StringMapEntry<void*>"* %tmp1
 }
 
@@ -34,75 +34,75 @@ declare i64 @strlen(i8* nocapture) nounwind readonly
 
 declare noalias i8* @malloc(i64) nounwind
 
-declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
-
 declare i32 @_ZN4llvm13StringMapImpl15LookupBucketForENS_9StringRefE(%"struct.llvm::StringMapImpl"*, i64, i64)
 
 define linkonce_odr %"struct.llvm::StringMapEntry<void*>"* @_ZN4llvm9StringMapIPvNS_15MallocAllocatorEE16GetOrCreateValueERKNS_9StringRefE(%"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %this, %"struct.llvm::StringRef"* nocapture %Key) ssp align 2 {
 entry:
-  %elt = bitcast %"struct.llvm::StringRef"* %Key to i64* ; <i64*> [#uses=1]
-  %val = load i64* %elt                           ; <i64> [#uses=3]
-  %tmp = getelementptr inbounds %"struct.llvm::StringRef"* %Key, i64 0, i32 1 ; <i64*> [#uses=1]
-  %val2 = load i64* %tmp                          ; <i64> [#uses=2]
-  %tmp2.i = getelementptr inbounds %"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %this, i64 0, i32 0 ; <%"struct.llvm::StringMapImpl"*> [#uses=1]
-  %tmp3.i = tail call i32 @_ZN4llvm13StringMapImpl15LookupBucketForENS_9StringRefE(%"struct.llvm::StringMapImpl"* %tmp2.i, i64 %val, i64 %val2) ; <i32> [#uses=1]
-  %tmp4.i = getelementptr inbounds %"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %this, i64 0, i32 0, i32 0 ; <%"struct.llvm::StringMapImpl::ItemBucket"**> [#uses=1]
-  %tmp5.i = load %"struct.llvm::StringMapImpl::ItemBucket"** %tmp4.i, align 8 ; <%"struct.llvm::StringMapImpl::ItemBucket"*> [#uses=1]
-  %tmp6.i = zext i32 %tmp3.i to i64               ; <i64> [#uses=1]
-  %tmp7.i = getelementptr inbounds %"struct.llvm::StringMapImpl::ItemBucket"* %tmp5.i, i64 %tmp6.i, i32 1 ; <%"struct.llvm::StringMapEntryBase"**> [#uses=2]
-  %tmp8.i = load %"struct.llvm::StringMapEntryBase"** %tmp7.i, align 8 ; <%"struct.llvm::StringMapEntryBase"*> [#uses=3]
-  %tmp9.i = icmp eq %"struct.llvm::StringMapEntryBase"* %tmp8.i, null ; <i1> [#uses=1]
-  %tmp13.i = icmp eq %"struct.llvm::StringMapEntryBase"* %tmp8.i, inttoptr (i64 -1 to %"struct.llvm::StringMapEntryBase"*) ; <i1> [#uses=1]
-  %or.cond.i = or i1 %tmp9.i, %tmp13.i            ; <i1> [#uses=1]
+  %elt = bitcast %"struct.llvm::StringRef"* %Key to i64*
+  %val = load i64* %elt
+  %tmp = getelementptr inbounds %"struct.llvm::StringRef"* %Key, i64 0, i32 1
+  %val2 = load i64* %tmp
+  %tmp2.i = getelementptr inbounds %"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %this, i64 0, i32 0
+  %tmp3.i = tail call i32 @_ZN4llvm13StringMapImpl15LookupBucketForENS_9StringRefE(%"struct.llvm::StringMapImpl"* %tmp2.i, i64 %val, i64 %val2)
+  %tmp4.i = getelementptr inbounds %"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %this, i64 0, i32 0, i32 0
+  %tmp5.i = load %"struct.llvm::StringMapImpl::ItemBucket"** %tmp4.i, align 8
+  %tmp6.i = zext i32 %tmp3.i to i64
+  %tmp7.i = getelementptr inbounds %"struct.llvm::StringMapImpl::ItemBucket"* %tmp5.i, i64 %tmp6.i, i32 1
+  %tmp8.i = load %"struct.llvm::StringMapEntryBase"** %tmp7.i, align 8
+  %tmp9.i = icmp eq %"struct.llvm::StringMapEntryBase"* %tmp8.i, null
+  %tmp13.i = icmp eq %"struct.llvm::StringMapEntryBase"* %tmp8.i, inttoptr (i64 -1 to %"struct.llvm::StringMapEntryBase"*)
+  %or.cond.i = or i1 %tmp9.i, %tmp13.i
   br i1 %or.cond.i, label %bb4.i, label %bb6.i
 
 bb4.i:                                            ; preds = %entry
-  %tmp41.i = inttoptr i64 %val to i8*             ; <i8*> [#uses=2]
-  %tmp4.i35.i = getelementptr inbounds i8* %tmp41.i, i64 %val2 ; <i8*> [#uses=1]
-  %tmp.i.i = ptrtoint i8* %tmp4.i35.i to i64      ; <i64> [#uses=1]
-  %tmp1.i.i = trunc i64 %tmp.i.i to i32           ; <i32> [#uses=1]
-  %tmp3.i.i = trunc i64 %val to i32               ; <i32> [#uses=1]
-  %tmp4.i.i = sub i32 %tmp1.i.i, %tmp3.i.i        ; <i32> [#uses=3]
-  %tmp5.i.i = add i32 %tmp4.i.i, 17               ; <i32> [#uses=1]
-  %tmp8.i.i = zext i32 %tmp5.i.i to i64           ; <i64> [#uses=1]
-  %tmp.i20.i.i = tail call noalias i8* @malloc(i64 %tmp8.i.i) nounwind ; <i8*> [#uses=7]
-  %tmp10.i.i = bitcast i8* %tmp.i20.i.i to %"struct.llvm::StringMapEntry<void*>"* ; <%"struct.llvm::StringMapEntry<void*>"*> [#uses=2]
-  %tmp12.i.i = icmp eq i8* %tmp.i20.i.i, null     ; <i1> [#uses=1]
+  %tmp41.i = inttoptr i64 %val to i8*
+  %tmp4.i35.i = getelementptr inbounds i8* %tmp41.i, i64 %val2
+  %tmp.i.i = ptrtoint i8* %tmp4.i35.i to i64
+  %tmp1.i.i = trunc i64 %tmp.i.i to i32
+  %tmp3.i.i = trunc i64 %val to i32
+  %tmp4.i.i = sub i32 %tmp1.i.i, %tmp3.i.i
+  %tmp5.i.i = add i32 %tmp4.i.i, 17
+  %tmp8.i.i = zext i32 %tmp5.i.i to i64
+  %tmp.i20.i.i = tail call noalias i8* @malloc(i64 %tmp8.i.i) nounwind
+  %tmp10.i.i = bitcast i8* %tmp.i20.i.i to %"struct.llvm::StringMapEntry<void*>"*
+  %tmp12.i.i = icmp eq i8* %tmp.i20.i.i, null
   br i1 %tmp12.i.i, label %_ZN4llvm14StringMapEntryIPvE6CreateINS_15MallocAllocatorES1_EEPS2_PKcS7_RT_T0_.exit.i, label %bb.i.i
 
 bb.i.i:                                           ; preds = %bb4.i
-  %tmp.i.i.i.i = bitcast i8* %tmp.i20.i.i to i32* ; <i32*> [#uses=1]
+  %tmp.i.i.i.i = bitcast i8* %tmp.i20.i.i to i32*
   store i32 %tmp4.i.i, i32* %tmp.i.i.i.i, align 4
-  %tmp1.i19.i.i = getelementptr inbounds i8* %tmp.i20.i.i, i64 8 ; <i8*> [#uses=1]
-  %0 = bitcast i8* %tmp1.i19.i.i to i8**          ; <i8**> [#uses=1]
+  %tmp1.i19.i.i = getelementptr inbounds i8* %tmp.i20.i.i, i64 8
+  %0 = bitcast i8* %tmp1.i19.i.i to i8**
   store i8* null, i8** %0, align 8
   br label %_ZN4llvm14StringMapEntryIPvE6CreateINS_15MallocAllocatorES1_EEPS2_PKcS7_RT_T0_.exit.i
 
-_ZN4llvm14StringMapEntryIPvE6CreateINS_15MallocAllocatorES1_EEPS2_PKcS7_RT_T0_.exit.i: ; preds = %bb4.i, %bb.i.i
-  %tmp.i18.i.i = getelementptr inbounds i8* %tmp.i20.i.i, i64 16 ; <i8*> [#uses=1]
-  %tmp15.i.i = zext i32 %tmp4.i.i to i64          ; <i64> [#uses=2]
-  tail call void @llvm.memcpy.i64(i8* %tmp.i18.i.i, i8* %tmp41.i, i64 %tmp15.i.i, i32 1) nounwind
-  %tmp.i18.sum.i.i = add i64 %tmp15.i.i, 16       ; <i64> [#uses=1]
-  %tmp17.i.i = getelementptr inbounds i8* %tmp.i20.i.i, i64 %tmp.i18.sum.i.i ; <i8*> [#uses=1]
+_ZN4llvm14StringMapEntryIPvE6CreateINS_15MallocAllocatorES1_EEPS2_PKcS7_RT_T0_.exit.i: ; preds = %bb.i.i, %bb4.i
+  %tmp.i18.i.i = getelementptr inbounds i8* %tmp.i20.i.i, i64 16
+  %tmp15.i.i = zext i32 %tmp4.i.i to i64
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp.i18.i.i, i8* %tmp41.i, i64 %tmp15.i.i, i32 1, i1 false)
+  %tmp.i18.sum.i.i = add i64 %tmp15.i.i, 16
+  %tmp17.i.i = getelementptr inbounds i8* %tmp.i20.i.i, i64 %tmp.i18.sum.i.i
   store i8 0, i8* %tmp17.i.i, align 1
-  %tmp.i.i.i = getelementptr inbounds i8* %tmp.i20.i.i, i64 8 ; <i8*> [#uses=1]
-  %1 = bitcast i8* %tmp.i.i.i to i8**             ; <i8**> [#uses=1]
+  %tmp.i.i.i = getelementptr inbounds i8* %tmp.i20.i.i, i64 8
+  %1 = bitcast i8* %tmp.i.i.i to i8**
   store i8* null, i8** %1, align 8
-  %tmp22.i = load %"struct.llvm::StringMapEntryBase"** %tmp7.i, align 8 ; <%"struct.llvm::StringMapEntryBase"*> [#uses=1]
-  %tmp24.i = icmp eq %"struct.llvm::StringMapEntryBase"* %tmp22.i, inttoptr (i64 -1 to %"struct.llvm::StringMapEntryBase"*) ; <i1> [#uses=1]
+  %tmp22.i = load %"struct.llvm::StringMapEntryBase"** %tmp7.i, align 8
+  %tmp24.i = icmp eq %"struct.llvm::StringMapEntryBase"* %tmp22.i, inttoptr (i64 -1 to %"struct.llvm::StringMapEntryBase"*)
   br i1 %tmp24.i, label %bb9.i, label %_ZN4llvm9StringMapIPvNS_15MallocAllocatorEE16GetOrCreateValueIS1_EERNS_14StringMapEntryIS1_EENS_9StringRefET_.exit
 
 bb6.i:                                            ; preds = %entry
-  %tmp16.i = bitcast %"struct.llvm::StringMapEntryBase"* %tmp8.i to %"struct.llvm::StringMapEntry<void*>"* ; <%"struct.llvm::StringMapEntry<void*>"*> [#uses=1]
+  %tmp16.i = bitcast %"struct.llvm::StringMapEntryBase"* %tmp8.i to %"struct.llvm::StringMapEntry<void*>"*
   ret %"struct.llvm::StringMapEntry<void*>"* %tmp16.i
 
 bb9.i:                                            ; preds = %_ZN4llvm14StringMapEntryIPvE6CreateINS_15MallocAllocatorES1_EEPS2_PKcS7_RT_T0_.exit.i
-  %tmp25.i = getelementptr inbounds %"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %this, i64 0, i32 0, i32 3 ; <i32*> [#uses=2]
-  %tmp26.i = load i32* %tmp25.i, align 8          ; <i32> [#uses=1]
-  %tmp27.i = add i32 %tmp26.i, -1                 ; <i32> [#uses=1]
+  %tmp25.i = getelementptr inbounds %"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %this, i64 0, i32 0, i32 3
+  %tmp26.i = load i32* %tmp25.i, align 8
+  %tmp27.i = add i32 %tmp26.i, -1
   store i32 %tmp27.i, i32* %tmp25.i, align 8
   ret %"struct.llvm::StringMapEntry<void*>"* %tmp10.i.i
 
 _ZN4llvm9StringMapIPvNS_15MallocAllocatorEE16GetOrCreateValueIS1_EERNS_14StringMapEntryIS1_EENS_9StringRefET_.exit: ; preds = %_ZN4llvm14StringMapEntryIPvE6CreateINS_15MallocAllocatorES1_EEPS2_PKcS7_RT_T0_.exit.i
   ret %"struct.llvm::StringMapEntry<void*>"* %tmp10.i.i
 }
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/Transforms/GVN/rle.ll b/test/Transforms/GVN/rle.ll
index 28b1fc7..2f0d2eb 100644
--- a/test/Transforms/GVN/rle.ll
+++ b/test/Transforms/GVN/rle.ll
@@ -135,7 +135,7 @@ define i8* @coerce_mustalias7(i64 %V, i64* %P) {
 define signext i16 @memset_to_i16_local(i16* %A) nounwind ssp {
 entry:
   %conv = bitcast i16* %A to i8* 
-  tail call void @llvm.memset.i64(i8* %conv, i8 1, i64 200, i32 1)
+  tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 1, i64 200, i32 1, i1 false)
   %arrayidx = getelementptr inbounds i16* %A, i64 42
   %tmp2 = load i16* %arrayidx
   ret i16 %tmp2
@@ -148,7 +148,7 @@ entry:
 define float @memset_to_float_local(float* %A, i8 %Val) nounwind ssp {
 entry:
   %conv = bitcast float* %A to i8*                ; <i8*> [#uses=1]
-  tail call void @llvm.memset.i64(i8* %conv, i8 %Val, i64 400, i32 1)
+  tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 %Val, i64 400, i32 1, i1 false)
   %arrayidx = getelementptr inbounds float* %A, i64 42 ; <float*> [#uses=1]
   %tmp2 = load float* %arrayidx                   ; <float> [#uses=1]
   ret float %tmp2
@@ -168,11 +168,11 @@ define i16 @memset_to_i16_nonlocal0(i16* %P, i1 %cond) {
   %P3 = bitcast i16* %P to i8*
   br i1 %cond, label %T, label %F
 T:
-  tail call void @llvm.memset.i64(i8* %P3, i8 1, i64 400, i32 1)
+  tail call void @llvm.memset.p0i8.i64(i8* %P3, i8 1, i64 400, i32 1, i1 false)
   br label %Cont
   
 F:
-  tail call void @llvm.memset.i64(i8* %P3, i8 2, i64 400, i32 1)
+  tail call void @llvm.memset.p0i8.i64(i8* %P3, i8 2, i64 400, i32 1, i1 false)
   br label %Cont
 
 Cont:
@@ -193,7 +193,7 @@ Cont:
 define float @memcpy_to_float_local(float* %A) nounwind ssp {
 entry:
   %conv = bitcast float* %A to i8*                ; <i8*> [#uses=1]
-  tail call void @llvm.memcpy.i64(i8* %conv, i8* bitcast ({i32, float, i32 }* @GCst to i8*), i64 12, i32 1)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %conv, i8* bitcast ({i32, float, i32 }* @GCst to i8*), i64 12, i32 1, i1 false)
   %arrayidx = getelementptr inbounds float* %A, i64 1 ; <float*> [#uses=1]
   %tmp2 = load float* %arrayidx                   ; <float> [#uses=1]
   ret float %tmp2
@@ -203,11 +203,6 @@ entry:
 }
 
 
-declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
-declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
-
-
-
 
 ;; non-local i32/float -> i8 load forwarding.
 define i8 @coerce_mustalias_nonlocal0(i32* %P, i1 %cond) {
@@ -539,7 +534,7 @@ define i32 @memset_to_load() nounwind readnone {
 entry:
   %x = alloca [256 x i32], align 4                ; <[256 x i32]*> [#uses=2]
   %tmp = bitcast [256 x i32]* %x to i8*           ; <i8*> [#uses=1]
-  call void @llvm.memset.i64(i8* %tmp, i8 0, i64 1024, i32 4)
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 1024, i32 4, i1 false)
   %arraydecay = getelementptr inbounds [256 x i32]* %x, i32 0, i32 0 ; <i32*>
   %tmp1 = load i32* %arraydecay                   ; <i32> [#uses=1]
   ret i32 %tmp1
@@ -643,3 +638,7 @@ entry:
 ; CHECK-ret i32
 }
 
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
diff --git a/test/Transforms/GlobalOpt/memcpy.ll b/test/Transforms/GlobalOpt/memcpy.ll
index 8f063a2..94e07a0 100644
--- a/test/Transforms/GlobalOpt/memcpy.ll
+++ b/test/Transforms/GlobalOpt/memcpy.ll
@@ -3,13 +3,11 @@
 
 @G1 = internal global [58 x i8] c"asdlfkajsdlfkajsd;lfkajds;lfkjasd;flkajsd;lkfja;sdlkfjasd\00"         ; <[58 x i8]*> [#uses=1]
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
 define void @foo() {
-        %Blah = alloca [58 x i8]                ; <[58 x i8]*> [#uses=1]
-        %tmp.0 = getelementptr [58 x i8]* %Blah, i32 0, i32 0           ; <i8*> [#uses=1]
-        call void @llvm.memcpy.i32( i8* %tmp.0, i8* getelementptr ([58 x i8]* @G1, i32 0, i32 0), i32 58, i32 1 )
-        ret void
+  %Blah = alloca [58 x i8]
+  %tmp.0 = getelementptr [58 x i8]* %Blah, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp.0, i8* getelementptr inbounds ([58 x i8]* @G1, i32 0, i32 0), i32 58, i32 1, i1 false)
+  ret void
 }
 
-
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/GlobalOpt/memset.ll b/test/Transforms/GlobalOpt/memset.ll
index a9b9d5e..3bb5ce9 100644
--- a/test/Transforms/GlobalOpt/memset.ll
+++ b/test/Transforms/GlobalOpt/memset.ll
@@ -1,21 +1,18 @@
 ; both globals are write only, delete them.
 
-; RUN: opt < %s -globalopt -S | \
-; RUN:   not grep internal
+; RUN: opt < %s -globalopt -S | not grep internal
 
 @G0 = internal global [58 x i8] c"asdlfkajsdlfkajsd;lfkajds;lfkjasd;flkajsd;lkfja;sdlkfjasd\00"         ; <[58 x i8]*> [#uses=1]
 @G1 = internal global [4 x i32] [ i32 1, i32 2, i32 3, i32 4 ]          ; <[4 x i32]*> [#uses=1]
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-declare void @llvm.memset.i32(i8*, i8, i32, i32)
-
 define void @foo() {
-        %Blah = alloca [58 x i8]                ; <[58 x i8]*> [#uses=1]
-        %tmp3 = bitcast [58 x i8]* %Blah to i8*         ; <i8*> [#uses=1]
-        call void @llvm.memcpy.i32( i8* bitcast ([4 x i32]* @G1 to i8*), i8* %tmp3, i32 16, i32 1 )
-        call void @llvm.memset.i32( i8* getelementptr ([58 x i8]* @G0, i32 0, i32 0), i8 17, i32 58, i32 1 )
-        ret void
+  %Blah = alloca [58 x i8]
+  %tmp3 = bitcast [58 x i8]* %Blah to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([4 x i32]* @G1 to i8*), i8* %tmp3, i32 16, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([58 x i8]* @G0, i32 0, i32 0), i8 17, i32 58, i32 1, i1 false)
+  ret void
 }
 
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/Transforms/IPConstantProp/return-constants.ll b/test/Transforms/IPConstantProp/return-constants.ll
index 79220dd..2cd99fe 100644
--- a/test/Transforms/IPConstantProp/return-constants.ll
+++ b/test/Transforms/IPConstantProp/return-constants.ll
@@ -4,38 +4,43 @@
 ;; Check that the second return values didn't get propagated
 ; RUN: cat %t | grep {%N = add i32 %B, %D}
 
-define internal {i32, i32} @foo(i1 %Q) {
-        br i1 %Q, label %T, label %F
+%0 = type { i32, i32 }
 
-T:              ; preds = %0
-        ret i32 21, i32 22
+define internal %0 @foo(i1 %Q) {
+  br i1 %Q, label %T, label %F
 
-F:              ; preds = %0
-        ret i32 21, i32 23
+T:                                                ; preds = %0
+  %mrv = insertvalue %0 undef, i32 21, 0
+  %mrv1 = insertvalue %0 %mrv, i32 22, 1
+  ret %0 %mrv1
+
+F:                                                ; preds = %0
+  %mrv2 = insertvalue %0 undef, i32 21, 0
+  %mrv3 = insertvalue %0 %mrv2, i32 23, 1
+  ret %0 %mrv3
 }
 
-define internal {i32, i32} @bar(i1 %Q) {
-        %A = insertvalue { i32, i32 } undef, i32 21, 0
-        br i1 %Q, label %T, label %F
+define internal %0 @bar(i1 %Q) {
+  %A = insertvalue %0 undef, i32 21, 0
+  br i1 %Q, label %T, label %F
 
-T:              ; preds = %0
-        %B = insertvalue { i32, i32 } %A, i32 22, 1
-        ret { i32, i32 } %B
+T:                                                ; preds = %0
+  %B = insertvalue %0 %A, i32 22, 1
+  ret %0 %B
 
-F:              ; preds = %0
-        %C = insertvalue { i32, i32 } %A, i32 23, 1
-        ret { i32, i32 } %C
+F:                                                ; preds = %0
+  %C = insertvalue %0 %A, i32 23, 1
+  ret %0 %C
 }
 
-define { i32, i32 } @caller(i1 %Q) {
-        %X = call {i32, i32} @foo( i1 %Q )
-        %A = getresult {i32, i32} %X, 0
-        %B = getresult {i32, i32} %X, 1
-        %Y = call {i32, i32} @bar( i1 %Q )
-        %C = extractvalue {i32, i32} %Y, 0
-        %D = extractvalue {i32, i32} %Y, 1
-        %M = add i32 %A, %C
-        %N = add i32 %B, %D
-        ret { i32, i32 } %X
+define %0 @caller(i1 %Q) {
+  %X = call %0 @foo(i1 %Q)
+  %A = extractvalue %0 %X, 0
+  %B = extractvalue %0 %X, 1
+  %Y = call %0 @bar(i1 %Q)
+  %C = extractvalue %0 %Y, 0
+  %D = extractvalue %0 %Y, 1
+  %M = add i32 %A, %C
+  %N = add i32 %B, %D
+  ret %0 %X
 }
-
diff --git a/test/Transforms/IndVarSimplify/iv-zext.ll b/test/Transforms/IndVarSimplify/iv-zext.ll
index 00018ec..3a05c89 100644
--- a/test/Transforms/IndVarSimplify/iv-zext.ll
+++ b/test/Transforms/IndVarSimplify/iv-zext.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -indvars -S | FileCheck %s
+; RUN: opt < %s -indvars -disable-iv-rewrite -S | FileCheck %s
 ; CHECK-NOT: and
 ; CHECK-NOT: zext
 
diff --git a/test/Transforms/Inline/2008-03-04-StructRet.ll b/test/Transforms/Inline/2008-03-04-StructRet.ll
deleted file mode 100644
index 3311d56..0000000
--- a/test/Transforms/Inline/2008-03-04-StructRet.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: opt < %s -inline -disable-output
-	%struct.Benchmark = type { i32 (...)** }
-	%struct.Complex = type { double, double }
-	%struct.ComplexBenchmark = type { %struct.Benchmark }
-
-define %struct.Complex @_Zml7ComplexS_1(double %a.0, double %a.1, double %b.0, double %b.1) nounwind  {
-entry:
-	%mrv = alloca %struct.Complex		; <%struct.Complex*> [#uses=2]
-	%mrv.gep = getelementptr %struct.Complex* %mrv, i32 0, i32 0		; <double*> [#uses=1]
-	%mrv.ld = load double* %mrv.gep		; <double> [#uses=1]
-	%mrv.gep1 = getelementptr %struct.Complex* %mrv, i32 0, i32 1		; <double*> [#uses=1]
-	%mrv.ld2 = load double* %mrv.gep1		; <double> [#uses=1]
-	ret double %mrv.ld, double %mrv.ld2
-}
-
-define void @_ZNK16ComplexBenchmark9oop_styleEv(%struct.ComplexBenchmark* %this) nounwind  {
-entry:
-	%tmp = alloca %struct.Complex		; <%struct.Complex*> [#uses=0]
-	br label %bb31
-bb:		; preds = %bb31
-	call %struct.Complex @_Zml7ComplexS_1( double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00 ) nounwind 		; <%struct.Complex>:0 [#uses=1]
-	%gr = getresult %struct.Complex %0, 1		; <double> [#uses=0]
-	br label %bb31
-bb31:		; preds = %bb, %entry
-	br i1 false, label %bb, label %return
-return:		; preds = %bb31
-	ret void
-}
diff --git a/test/Transforms/Inline/2008-03-07-Inline-2.ll b/test/Transforms/Inline/2008-03-07-Inline-2.ll
deleted file mode 100644
index 0c968e6..0000000
--- a/test/Transforms/Inline/2008-03-07-Inline-2.ll
+++ /dev/null
@@ -1,53 +0,0 @@
-; RUN: opt < %s -inline -disable-output
-	%struct.Demand = type { double, double }
-	%struct.branch = type { %struct.Demand, double, double, double, double, %struct.branch*, [12 x %struct.leaf*] }
-	%struct.leaf = type { %struct.Demand, double, double }
-@P = external global double		; <double*> [#uses=1]
-
-define %struct.leaf* @build_leaf() nounwind  {
-entry:
-	unreachable
-}
-
-define %struct.Demand @Compute_Branch2(%struct.branch* %br, double %theta_R, double %theta_I, double %pi_R, double %pi_I) nounwind  {
-entry:
-	%mrv = alloca %struct.Demand		; <%struct.Demand*> [#uses=4]
-	%a2 = alloca %struct.Demand		; <%struct.Demand*> [#uses=0]
-	br i1 false, label %bb46, label %bb
-bb:		; preds = %entry
-	%mrv.gep = getelementptr %struct.Demand* %mrv, i32 0, i32 0		; <double*> [#uses=1]
-	%mrv.ld = load double* %mrv.gep		; <double> [#uses=1]
-	%mrv.gep1 = getelementptr %struct.Demand* %mrv, i32 0, i32 1		; <double*> [#uses=1]
-	%mrv.ld2 = load double* %mrv.gep1		; <double> [#uses=1]
-	ret double %mrv.ld, double %mrv.ld2
-bb46:		; preds = %entry
-	br label %bb72
-bb49:		; preds = %bb72
-	call %struct.Demand @Compute_Leaf1( %struct.leaf* null, double 0.000000e+00, double 0.000000e+00 ) nounwind 		; <%struct.Demand>:0 [#uses=1]
-	%gr = getresult %struct.Demand %0, 1		; <double> [#uses=0]
-	br label %bb72
-bb72:		; preds = %bb49, %bb46
-	br i1 false, label %bb49, label %bb77
-bb77:		; preds = %bb72
-	%mrv.gep3 = getelementptr %struct.Demand* %mrv, i32 0, i32 0		; <double*> [#uses=1]
-	%mrv.ld4 = load double* %mrv.gep3		; <double> [#uses=1]
-	%mrv.gep5 = getelementptr %struct.Demand* %mrv, i32 0, i32 1		; <double*> [#uses=1]
-	%mrv.ld6 = load double* %mrv.gep5		; <double> [#uses=1]
-	ret double %mrv.ld4, double %mrv.ld6
-}
-
-define %struct.Demand @Compute_Leaf1(%struct.leaf* %l, double %pi_R, double %pi_I) nounwind  {
-entry:
-	%mrv = alloca %struct.Demand		; <%struct.Demand*> [#uses=2]
-	%tmp10 = load double* @P, align 8		; <double> [#uses=1]
-	%tmp11 = fcmp olt double %tmp10, 0.000000e+00		; <i1> [#uses=1]
-	br i1 %tmp11, label %bb, label %bb13
-bb:		; preds = %entry
-	br label %bb13
-bb13:		; preds = %bb, %entry
-	%mrv.gep = getelementptr %struct.Demand* %mrv, i32 0, i32 0		; <double*> [#uses=1]
-	%mrv.ld = load double* %mrv.gep		; <double> [#uses=1]
-	%mrv.gep1 = getelementptr %struct.Demand* %mrv, i32 0, i32 1		; <double*> [#uses=1]
-	%mrv.ld2 = load double* %mrv.gep1		; <double> [#uses=1]
-	ret double %mrv.ld, double %mrv.ld2
-}
diff --git a/test/Transforms/Inline/2008-03-07-Inline.ll b/test/Transforms/Inline/2008-03-07-Inline.ll
deleted file mode 100644
index 86afb2d..0000000
--- a/test/Transforms/Inline/2008-03-07-Inline.ll
+++ /dev/null
@@ -1,57 +0,0 @@
-; RUN: opt < %s -inline -disable-output
-	%struct.Demand = type { double, double }
-	%struct.branch = type { %struct.Demand, double, double, double, double, %struct.branch*, [12 x %struct.leaf*] }
-	%struct.leaf = type { %struct.Demand, double, double }
-@P = external global double		; <double*> [#uses=1]
-
-define %struct.leaf* @build_leaf() nounwind  {
-entry:
-	unreachable
-}
-
-define %struct.Demand @Compute_Branch2(%struct.branch* %br, double %theta_R, double %theta_I, double %pi_R, double %pi_I) nounwind  {
-entry:
-	%mrv = alloca %struct.Demand		; <%struct.Demand*> [#uses=4]
-	%a2 = alloca %struct.Demand		; <%struct.Demand*> [#uses=0]
-	br i1 false, label %bb46, label %bb
-bb:		; preds = %entry
-	%mrv.gep = getelementptr %struct.Demand* %mrv, i32 0, i32 0		; <double*> [#uses=1]
-	%mrv.ld = load double* %mrv.gep		; <double> [#uses=1]
-	%mrv.gep1 = getelementptr %struct.Demand* %mrv, i32 0, i32 1		; <double*> [#uses=1]
-	%mrv.ld2 = load double* %mrv.gep1		; <double> [#uses=1]
-	ret double %mrv.ld, double %mrv.ld2
-bb46:		; preds = %entry
-	br label %bb72
-bb49:		; preds = %bb72
-	call %struct.Demand @Compute_Leaf1( %struct.leaf* null, double 0.000000e+00, double 0.000000e+00 ) nounwind 		; <%struct.Demand>:0 [#uses=1]
-	%gr = getresult %struct.Demand %0, 1		; <double> [#uses=0]
-	br label %bb72
-bb72:		; preds = %bb49, %bb46
-	br i1 false, label %bb49, label %bb77
-bb77:		; preds = %bb72
-	%mrv.gep3 = getelementptr %struct.Demand* %mrv, i32 0, i32 0		; <double*> [#uses=1]
-	%mrv.ld4 = load double* %mrv.gep3		; <double> [#uses=1]
-	%mrv.gep5 = getelementptr %struct.Demand* %mrv, i32 0, i32 1		; <double*> [#uses=1]
-	%mrv.ld6 = load double* %mrv.gep5		; <double> [#uses=1]
-	ret double %mrv.ld4, double %mrv.ld6
-}
-
-define %struct.Demand @Compute_Leaf1(%struct.leaf* %l, double %pi_R, double %pi_I) nounwind  {
-entry:
-	%mrv = alloca %struct.Demand		; <%struct.Demand*> [#uses=4]
-	%tmp10 = load double* @P, align 8		; <double> [#uses=1]
-	%tmp11 = fcmp olt double %tmp10, 0.000000e+00		; <i1> [#uses=1]
-	br i1 %tmp11, label %bb, label %bb13
-bb:		; preds = %entry
-	%mrv.gep = getelementptr %struct.Demand* %mrv, i32 0, i32 0		; <double*> [#uses=1]
-	%mrv.ld = load double* %mrv.gep		; <double> [#uses=1]
-	%mrv.gep1 = getelementptr %struct.Demand* %mrv, i32 0, i32 1		; <double*> [#uses=1]
-	%mrv.ld2 = load double* %mrv.gep1		; <double> [#uses=1]
-	ret double %mrv.ld, double %mrv.ld2
-bb13:		; preds = %entry
-	%mrv.gep3 = getelementptr %struct.Demand* %mrv, i32 0, i32 0		; <double*> [#uses=1]
-	%mrv.ld4 = load double* %mrv.gep3		; <double> [#uses=1]
-	%mrv.gep5 = getelementptr %struct.Demand* %mrv, i32 0, i32 1		; <double*> [#uses=1]
-	%mrv.ld6 = load double* %mrv.gep5		; <double> [#uses=1]
-	ret double %mrv.ld4, double %mrv.ld6
-}
diff --git a/test/Transforms/Inline/inline-invoke-tail.ll b/test/Transforms/Inline/inline-invoke-tail.ll
index 961f678..462c29a 100644
--- a/test/Transforms/Inline/inline-invoke-tail.ll
+++ b/test/Transforms/Inline/inline-invoke-tail.ll
@@ -4,7 +4,7 @@
 define internal void @foo(i32* %p, i32* %q) {
 	%pp = bitcast i32* %p to i8*
 	%qq = bitcast i32* %q to i8*
-	tail call void @llvm.memcpy.i32(i8* %pp, i8* %qq, i32 4, i32 1)
+	tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %pp, i8* %qq, i32 4, i32 1, i1 false)
 	ret void
 }
 
@@ -24,12 +24,14 @@ invcont:
 
 lpad:
 	%eh_ptr = call i8* @llvm.eh.exception()
-	%eh_select = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32(i8* %eh_ptr, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* null)
+	%eh_select = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* null)
 	unreachable
 }
 
-declare i8* @llvm.eh.exception() nounwind
+declare i8* @llvm.eh.exception() nounwind readonly
 
-declare i32 @llvm.eh.selector.i32(i8*, i8*, ...) nounwind
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
 
 declare i32 @__gxx_personality_v0(...)
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/InstCombine/2006-11-03-Memmove64.ll b/test/Transforms/InstCombine/2006-11-03-Memmove64.ll
deleted file mode 100644
index 35bb45e..0000000
--- a/test/Transforms/InstCombine/2006-11-03-Memmove64.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: opt < %s -instcombine -S | \
-; RUN:    not grep memmove.i32
-; Instcombine was trying to turn this into a memmove.i32
-
-target datalayout = "e-p:64:64"
-target triple = "alphaev67-unknown-linux-gnu"
-@str10 = internal constant [1 x i8] zeroinitializer             ; <[1 x i8]*> [#uses=1]
-
-define void @do_join(i8* %b) {
-entry:
-        call void @llvm.memmove.i64( i8* %b, i8* getelementptr ([1 x i8]* @str10, i32 0, i64 0), i64 1, i32 1 )
-        ret void
-}
-
-declare void @llvm.memmove.i64(i8*, i8*, i64, i32)
-
diff --git a/test/Transforms/InstCombine/2007-05-04-Crash.ll b/test/Transforms/InstCombine/2007-05-04-Crash.ll
deleted file mode 100644
index 9f50d8a..0000000
--- a/test/Transforms/InstCombine/2007-05-04-Crash.ll
+++ /dev/null
@@ -1,30 +0,0 @@
-; RUN: opt < %s -instcombine -disable-output
-; PR1384
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "i686-apple-darwin8"
-	%struct.CFRuntimeBase = type { i32, [4 x i8] }
-	%struct.CGColor = type opaque
-	%struct.CGColorSpace = type { %struct.CFRuntimeBase, i8, i8, i8, i32, i32, i32, %struct.CGColor*, float*, %struct.CGMD5Signature, %struct.CGMD5Signature*, [0 x %struct.CGColorSpaceDescriptor] }
-	%struct.CGColorSpaceCalibratedRGBData = type { [3 x float], [3 x float], [3 x float], [9 x float] }
-	%struct.CGColorSpaceDescriptor = type { %struct.CGColorSpaceCalibratedRGBData }
-	%struct.CGColorSpaceLabData = type { [3 x float], [3 x float], [4 x float] }
-	%struct.CGMD5Signature = type { [16 x i8], i8 }
-
-declare fastcc %struct.CGColorSpace* @CGColorSpaceCreate(i32, i32)
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-define %struct.CGColorSpace* @CGColorSpaceCreateLab(float* %whitePoint, float* %blackPoint, float* %range) {
-entry:
-	%tmp17 = call fastcc %struct.CGColorSpace* @CGColorSpaceCreate( i32 5, i32 3 )		; <%struct.CGColorSpace*> [#uses=2]
-	%tmp28 = getelementptr %struct.CGColorSpace* %tmp17, i32 0, i32 11		; <[0 x %struct.CGColorSpaceDescriptor]*> [#uses=1]
-	%tmp29 = getelementptr [0 x %struct.CGColorSpaceDescriptor]* %tmp28, i32 0, i32 0		; <%struct.CGColorSpaceDescriptor*> [#uses=1]
-	%tmp30 = getelementptr %struct.CGColorSpaceDescriptor* %tmp29, i32 0, i32 0		; <%struct.CGColorSpaceCalibratedRGBData*> [#uses=1]
-	%tmp3031 = bitcast %struct.CGColorSpaceCalibratedRGBData* %tmp30 to %struct.CGColorSpaceLabData*		; <%struct.CGColorSpaceLabData*> [#uses=1]
-	%tmp45 = getelementptr %struct.CGColorSpaceLabData* %tmp3031, i32 0, i32 2		; <[4 x float]*> [#uses=1]
-	%tmp46 = getelementptr [4 x float]* %tmp45, i32 0, i32 0		; <float*> [#uses=1]
-	%tmp4648 = bitcast float* %tmp46 to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %tmp4648, i8* null, i32 16, i32 4 )
-	ret %struct.CGColorSpace* %tmp17
-}
diff --git a/test/Transforms/InstCombine/2007-05-18-CastFoldBug.ll b/test/Transforms/InstCombine/2007-05-18-CastFoldBug.ll
index 1c24df3..15988b6 100644
--- a/test/Transforms/InstCombine/2007-05-18-CastFoldBug.ll
+++ b/test/Transforms/InstCombine/2007-05-18-CastFoldBug.ll
@@ -3,7 +3,7 @@
 
 define void @blah(i16* %tmp10) {
 entry:
-	call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend_stret to void (i16* sret )*)( i16*  sret %tmp10  )
+	call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend_stret to void (i16*)*)( i16*  sret %tmp10  )
 	ret void
 }
 
diff --git a/test/Transforms/InstCombine/2007-09-11-Trampoline.ll b/test/Transforms/InstCombine/2007-09-11-Trampoline.ll
index d8f3d97..6190aa9 100644
--- a/test/Transforms/InstCombine/2007-09-11-Trampoline.ll
+++ b/test/Transforms/InstCombine/2007-09-11-Trampoline.ll
@@ -15,7 +15,7 @@ entry:
 	%tmp3 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 0		; <i32*> [#uses=1]
 	store i32 %n, i32* %tmp3, align 8
 	%FRAME.06 = bitcast %struct.FRAME.nest* %FRAME.0 to i8*		; <i8*> [#uses=1]
-	%tramp = call i8* @llvm.init.trampoline( i8* %TRAMP.216.sub, i8* bitcast (i32 (%struct.FRAME.nest* nest , i32)* @f to i8*), i8* %FRAME.06 )		; <i8*> [#uses=1]
+	%tramp = call i8* @llvm.init.trampoline( i8* %TRAMP.216.sub, i8* bitcast (i32 (%struct.FRAME.nest* , i32)* @f to i8*), i8* %FRAME.06 )		; <i8*> [#uses=1]
 	%tmp7 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 1		; <i32 (i32)**> [#uses=1]
 	%tmp89 = bitcast i8* %tramp to i32 (i32)*		; <i32 (i32)*> [#uses=2]
 	store i32 (i32)* %tmp89, i32 (i32)** %tmp7, align 8
diff --git a/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll b/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
index 710aff2..fe935f9 100644
--- a/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
+++ b/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
@@ -6,16 +6,15 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1
 
 define void @foo(i8* %P) {
 entry:
-	%P_addr = alloca i8*		; <i8**> [#uses=2]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	store i8* %P, i8** %P_addr
-	%tmp = load i8** %P_addr, align 4		; <i8*> [#uses=1]
-	%tmp1 = getelementptr [4 x i8]* @.str, i32 0, i32 0		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %tmp, i8* %tmp1, i32 4, i32 1 )
-	br label %return
+  %P_addr = alloca i8*
+  store i8* %P, i8** %P_addr
+  %tmp = load i8** %P_addr, align 4
+  %tmp1 = getelementptr [4 x i8]* @.str, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* %tmp1, i32 4, i32 1, i1 false)
+  br label %return
 
-return:		; preds = %entry
-	ret void
+return:                                           ; preds = %entry
+  ret void
 }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/InstCombine/2007-11-25-CompatibleAttributes.ll b/test/Transforms/InstCombine/2007-11-25-CompatibleAttributes.ll
index 2109d34..e3192a9 100644
--- a/test/Transforms/InstCombine/2007-11-25-CompatibleAttributes.ll
+++ b/test/Transforms/InstCombine/2007-11-25-CompatibleAttributes.ll
@@ -5,7 +5,7 @@
 
 define i32 @main(i32 %argc, i8** %argv) {
 entry:
-	%tmp32 = tail call i32 (i8* noalias , ...) * bitcast (i32 (i8*, ...) nounwind * @printf to i32 (i8* noalias , ...) nounwind *)( i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0)  , i32 0 ) nounwind 		; <i32> [#uses=0]
+	%tmp32 = tail call i32 (i8*  , ...) * bitcast (i32 (i8*, ...)  * @printf to i32 (i8*  , ...)  *)( i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0)  , i32 0 ) nounwind 		; <i32> [#uses=0]
 	ret i32 undef
 }
 
diff --git a/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll b/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
index b29d8d2..a51c47d 100644
--- a/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
+++ b/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
@@ -4,275 +4,276 @@
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.6"
-	%"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >" = type { i32* }
-	%"struct.std::_Vector_base<int,std::allocator<int> >" = type { %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl" }
-	%"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl" = type { i32*, i32*, i32* }
-	%"struct.std::bidirectional_iterator_tag" = type <{ i8 }>
-	%"struct.std::forward_iterator_tag" = type <{ i8 }>
-	%"struct.std::input_iterator_tag" = type <{ i8 }>
-	%"struct.std::random_access_iterator_tag" = type <{ i8 }>
-	%"struct.std::vector<int,std::allocator<int> >" = type { %"struct.std::_Vector_base<int,std::allocator<int> >" }
+
+%"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >" = type { i32* }
+%"struct.std::_Vector_base<int,std::allocator<int> >" = type { %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl" }
+%"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl" = type { i32*, i32*, i32* }
+%"struct.std::bidirectional_iterator_tag" = type <{ i8 }>
+%"struct.std::forward_iterator_tag" = type <{ i8 }>
+%"struct.std::input_iterator_tag" = type <{ i8 }>
+%"struct.std::random_access_iterator_tag" = type <{ i8 }>
+%"struct.std::vector<int,std::allocator<int> >" = type { %"struct.std::_Vector_base<int,std::allocator<int> >" }
 
 define i32* @_Z3fooRSt6vectorIiSaIiEE(%"struct.std::vector<int,std::allocator<int> >"* %X) {
 entry:
-	%0 = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"		; <%"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"*> [#uses=2]
-	%__first_addr.i.i = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"		; <%"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"*> [#uses=31]
-	%__last_addr.i.i = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"		; <%"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"*> [#uses=4]
-	%unnamed_arg.i = alloca %"struct.std::bidirectional_iterator_tag", align 8		; <%"struct.std::bidirectional_iterator_tag"*> [#uses=1]
-	%1 = alloca %"struct.std::bidirectional_iterator_tag"		; <%"struct.std::bidirectional_iterator_tag"*> [#uses=1]
-	%__first_addr.i = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"		; <%"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"*> [#uses=2]
-	%2 = alloca %"struct.std::bidirectional_iterator_tag"		; <%"struct.std::bidirectional_iterator_tag"*> [#uses=2]
-	%3 = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"		; <%"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"*> [#uses=2]
-	%4 = alloca i32		; <i32*> [#uses=8]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	store i32 42, i32* %4, align 4
-	%5 = getelementptr %"struct.std::vector<int,std::allocator<int> >"* %X, i32 0, i32 0		; <%"struct.std::_Vector_base<int,std::allocator<int> >"*> [#uses=1]
-	%6 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >"* %5, i32 0, i32 0		; <%"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl"*> [#uses=1]
-	%7 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl"* %6, i32 0, i32 1		; <i32**> [#uses=1]
-	%8 = load i32** %7, align 4		; <i32*> [#uses=1]
-	%9 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %3, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %8, i32** %9, align 4
-	%10 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %3, i32 0, i32 0		; <i32**> [#uses=1]
-	%11 = load i32** %10, align 4		; <i32*> [#uses=1]
-	%tmp2.i = ptrtoint i32* %11 to i32		; <i32> [#uses=1]
-	%tmp1.i = inttoptr i32 %tmp2.i to i32*		; <i32*> [#uses=1]
-	%tmp3 = ptrtoint i32* %tmp1.i to i32		; <i32> [#uses=1]
-	%tmp2 = inttoptr i32 %tmp3 to i32*		; <i32*> [#uses=1]
-	%12 = getelementptr %"struct.std::vector<int,std::allocator<int> >"* %X, i32 0, i32 0		; <%"struct.std::_Vector_base<int,std::allocator<int> >"*> [#uses=1]
-	%13 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >"* %12, i32 0, i32 0		; <%"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl"*> [#uses=1]
-	%14 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl"* %13, i32 0, i32 0		; <i32**> [#uses=1]
-	%15 = load i32** %14, align 4		; <i32*> [#uses=1]
-	%16 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %0, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %15, i32** %16, align 4
-	%17 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %0, i32 0, i32 0		; <i32**> [#uses=1]
-	%18 = load i32** %17, align 4		; <i32*> [#uses=1]
-	%tmp2.i17 = ptrtoint i32* %18 to i32		; <i32> [#uses=1]
-	%tmp1.i18 = inttoptr i32 %tmp2.i17 to i32*		; <i32*> [#uses=1]
-	%tmp8 = ptrtoint i32* %tmp1.i18 to i32		; <i32> [#uses=1]
-	%tmp6 = inttoptr i32 %tmp8 to i32*		; <i32*> [#uses=1]
-	%19 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %tmp6, i32** %19
-	%20 = getelementptr %"struct.std::bidirectional_iterator_tag"* %1, i32 0, i32 0		; <i8*> [#uses=1]
-	%21 = load i8* %20, align 1		; <i8> [#uses=1]
-	%22 = or i8 %21, 0		; <i8> [#uses=1]
-	%23 = or i8 %22, 0		; <i8> [#uses=1]
-	%24 = or i8 %23, 0		; <i8> [#uses=0]
-	%25 = getelementptr %"struct.std::bidirectional_iterator_tag"* %2, i32 0, i32 0		; <i8*> [#uses=1]
-	store i8 0, i8* %25, align 1
-	%elt.i = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%val.i = load i32** %elt.i		; <i32*> [#uses=1]
-	%tmp.i = bitcast %"struct.std::bidirectional_iterator_tag"* %unnamed_arg.i to i8*		; <i8*> [#uses=1]
-	%tmp9.i = bitcast %"struct.std::bidirectional_iterator_tag"* %2 to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i64(i8* %tmp.i, i8* %tmp9.i, i64 1, i32 1)
-	%26 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %val.i, i32** %26
-	%27 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %tmp2, i32** %27
-	%28 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%29 = load i32** %28, align 4		; <i32*> [#uses=1]
-	%30 = ptrtoint i32* %29 to i32		; <i32> [#uses=1]
-	%31 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%32 = load i32** %31, align 4		; <i32*> [#uses=1]
-	%33 = ptrtoint i32* %32 to i32		; <i32> [#uses=1]
-	%34 = sub i32 %30, %33		; <i32> [#uses=1]
-	%35 = ashr i32 %34, 2		; <i32> [#uses=1]
-	%36 = ashr i32 %35, 2		; <i32> [#uses=1]
-	br label %bb12.i.i
+  %0 = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"
+  %__first_addr.i.i = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"
+  %__last_addr.i.i = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"
+  %unnamed_arg.i = alloca %"struct.std::bidirectional_iterator_tag", align 8
+  %1 = alloca %"struct.std::bidirectional_iterator_tag"
+  %__first_addr.i = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"
+  %2 = alloca %"struct.std::bidirectional_iterator_tag"
+  %3 = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"
+  %4 = alloca i32
+  %"alloca point" = bitcast i32 0 to i32
+  store i32 42, i32* %4, align 4
+  %5 = getelementptr %"struct.std::vector<int,std::allocator<int> >"* %X, i32 0, i32 0
+  %6 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >"* %5, i32 0, i32 0
+  %7 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl"* %6, i32 0, i32 1
+  %8 = load i32** %7, align 4
+  %9 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %3, i32 0, i32 0
+  store i32* %8, i32** %9, align 4
+  %10 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %3, i32 0, i32 0
+  %11 = load i32** %10, align 4
+  %tmp2.i = ptrtoint i32* %11 to i32
+  %tmp1.i = inttoptr i32 %tmp2.i to i32*
+  %tmp3 = ptrtoint i32* %tmp1.i to i32
+  %tmp2 = inttoptr i32 %tmp3 to i32*
+  %12 = getelementptr %"struct.std::vector<int,std::allocator<int> >"* %X, i32 0, i32 0
+  %13 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >"* %12, i32 0, i32 0
+  %14 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl"* %13, i32 0, i32 0
+  %15 = load i32** %14, align 4
+  %16 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %0, i32 0, i32 0
+  store i32* %15, i32** %16, align 4
+  %17 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %0, i32 0, i32 0
+  %18 = load i32** %17, align 4
+  %tmp2.i17 = ptrtoint i32* %18 to i32
+  %tmp1.i18 = inttoptr i32 %tmp2.i17 to i32*
+  %tmp8 = ptrtoint i32* %tmp1.i18 to i32
+  %tmp6 = inttoptr i32 %tmp8 to i32*
+  %19 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i, i32 0, i32 0
+  store i32* %tmp6, i32** %19
+  %20 = getelementptr %"struct.std::bidirectional_iterator_tag"* %1, i32 0, i32 0
+  %21 = load i8* %20, align 1
+  %22 = or i8 %21, 0
+  %23 = or i8 %22, 0
+  %24 = or i8 %23, 0
+  %25 = getelementptr %"struct.std::bidirectional_iterator_tag"* %2, i32 0, i32 0
+  store i8 0, i8* %25, align 1
+  %elt.i = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i, i32 0, i32 0
+  %val.i = load i32** %elt.i
+  %tmp.i = bitcast %"struct.std::bidirectional_iterator_tag"* %unnamed_arg.i to i8*
+  %tmp9.i = bitcast %"struct.std::bidirectional_iterator_tag"* %2 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp.i, i8* %tmp9.i, i64 1, i32 1, i1 false)
+  %26 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  store i32* %val.i, i32** %26
+  %27 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0
+  store i32* %tmp2, i32** %27
+  %28 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0
+  %29 = load i32** %28, align 4
+  %30 = ptrtoint i32* %29 to i32
+  %31 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %32 = load i32** %31, align 4
+  %33 = ptrtoint i32* %32 to i32
+  %34 = sub i32 %30, %33
+  %35 = ashr i32 %34, 2
+  %36 = ashr i32 %35, 2
+  br label %bb12.i.i
 
-bb.i.i:		; preds = %bb12.i.i
-	%37 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%38 = load i32** %37, align 4		; <i32*> [#uses=1]
-	%39 = load i32* %38, align 4		; <i32> [#uses=1]
-	%40 = load i32* %4, align 4		; <i32> [#uses=1]
-	%41 = icmp eq i32 %39, %40		; <i1> [#uses=1]
-	%42 = zext i1 %41 to i8		; <i8> [#uses=1]
-	%toBool.i.i = icmp ne i8 %42, 0		; <i1> [#uses=1]
-	br i1 %toBool.i.i, label %bb1.i.i, label %bb2.i.i
+bb.i.i:                                           ; preds = %bb12.i.i
+  %37 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %38 = load i32** %37, align 4
+  %39 = load i32* %38, align 4
+  %40 = load i32* %4, align 4
+  %41 = icmp eq i32 %39, %40
+  %42 = zext i1 %41 to i8
+  %toBool.i.i = icmp ne i8 %42, 0
+  br i1 %toBool.i.i, label %bb1.i.i, label %bb2.i.i
 
-bb1.i.i:		; preds = %bb.i.i
-	%43 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%44 = load i32** %43, align 4		; <i32*> [#uses=1]
-	br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+bb1.i.i:                                          ; preds = %bb.i.i
+  %43 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %44 = load i32** %43, align 4
+  br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
 
-bb2.i.i:		; preds = %bb.i.i
-	%45 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%46 = load i32** %45, align 4		; <i32*> [#uses=1]
-	%47 = getelementptr i32* %46, i64 1		; <i32*> [#uses=1]
-	%48 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %47, i32** %48, align 4
-	%49 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%50 = load i32** %49, align 4		; <i32*> [#uses=1]
-	%51 = load i32* %50, align 4		; <i32> [#uses=1]
-	%52 = load i32* %4, align 4		; <i32> [#uses=1]
-	%53 = icmp eq i32 %51, %52		; <i1> [#uses=1]
-	%54 = zext i1 %53 to i8		; <i8> [#uses=1]
-	%toBool3.i.i = icmp ne i8 %54, 0		; <i1> [#uses=1]
-	br i1 %toBool3.i.i, label %bb4.i.i, label %bb5.i.i
+bb2.i.i:                                          ; preds = %bb.i.i
+  %45 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %46 = load i32** %45, align 4
+  %47 = getelementptr i32* %46, i64 1
+  %48 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  store i32* %47, i32** %48, align 4
+  %49 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %50 = load i32** %49, align 4
+  %51 = load i32* %50, align 4
+  %52 = load i32* %4, align 4
+  %53 = icmp eq i32 %51, %52
+  %54 = zext i1 %53 to i8
+  %toBool3.i.i = icmp ne i8 %54, 0
+  br i1 %toBool3.i.i, label %bb4.i.i, label %bb5.i.i
 
-bb4.i.i:		; preds = %bb2.i.i
-	%55 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%56 = load i32** %55, align 4		; <i32*> [#uses=1]
-	br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+bb4.i.i:                                          ; preds = %bb2.i.i
+  %55 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %56 = load i32** %55, align 4
+  br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
 
-bb5.i.i:		; preds = %bb2.i.i
-	%57 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%58 = load i32** %57, align 4		; <i32*> [#uses=1]
-	%59 = getelementptr i32* %58, i64 1		; <i32*> [#uses=1]
-	%60 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %59, i32** %60, align 4
-	%61 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%62 = load i32** %61, align 4		; <i32*> [#uses=1]
-	%63 = load i32* %62, align 4		; <i32> [#uses=1]
-	%64 = load i32* %4, align 4		; <i32> [#uses=1]
-	%65 = icmp eq i32 %63, %64		; <i1> [#uses=1]
-	%66 = zext i1 %65 to i8		; <i8> [#uses=1]
-	%toBool6.i.i = icmp ne i8 %66, 0		; <i1> [#uses=1]
-	br i1 %toBool6.i.i, label %bb7.i.i, label %bb8.i.i
+bb5.i.i:                                          ; preds = %bb2.i.i
+  %57 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %58 = load i32** %57, align 4
+  %59 = getelementptr i32* %58, i64 1
+  %60 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  store i32* %59, i32** %60, align 4
+  %61 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %62 = load i32** %61, align 4
+  %63 = load i32* %62, align 4
+  %64 = load i32* %4, align 4
+  %65 = icmp eq i32 %63, %64
+  %66 = zext i1 %65 to i8
+  %toBool6.i.i = icmp ne i8 %66, 0
+  br i1 %toBool6.i.i, label %bb7.i.i, label %bb8.i.i
 
-bb7.i.i:		; preds = %bb5.i.i
-	%67 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%68 = load i32** %67, align 4		; <i32*> [#uses=1]
-	br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+bb7.i.i:                                          ; preds = %bb5.i.i
+  %67 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %68 = load i32** %67, align 4
+  br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
 
-bb8.i.i:		; preds = %bb5.i.i
-	%69 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%70 = load i32** %69, align 4		; <i32*> [#uses=1]
-	%71 = getelementptr i32* %70, i64 1		; <i32*> [#uses=1]
-	%72 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %71, i32** %72, align 4
-	%73 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%74 = load i32** %73, align 4		; <i32*> [#uses=1]
-	%75 = load i32* %74, align 4		; <i32> [#uses=1]
-	%76 = load i32* %4, align 4		; <i32> [#uses=1]
-	%77 = icmp eq i32 %75, %76		; <i1> [#uses=1]
-	%78 = zext i1 %77 to i8		; <i8> [#uses=1]
-	%toBool9.i.i = icmp ne i8 %78, 0		; <i1> [#uses=1]
-	br i1 %toBool9.i.i, label %bb10.i.i, label %bb11.i.i
+bb8.i.i:                                          ; preds = %bb5.i.i
+  %69 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %70 = load i32** %69, align 4
+  %71 = getelementptr i32* %70, i64 1
+  %72 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  store i32* %71, i32** %72, align 4
+  %73 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %74 = load i32** %73, align 4
+  %75 = load i32* %74, align 4
+  %76 = load i32* %4, align 4
+  %77 = icmp eq i32 %75, %76
+  %78 = zext i1 %77 to i8
+  %toBool9.i.i = icmp ne i8 %78, 0
+  br i1 %toBool9.i.i, label %bb10.i.i, label %bb11.i.i
 
-bb10.i.i:		; preds = %bb8.i.i
-	%79 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%80 = load i32** %79, align 4		; <i32*> [#uses=1]
-	br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+bb10.i.i:                                         ; preds = %bb8.i.i
+  %79 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %80 = load i32** %79, align 4
+  br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
 
-bb11.i.i:		; preds = %bb8.i.i
-	%81 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%82 = load i32** %81, align 4		; <i32*> [#uses=1]
-	%83 = getelementptr i32* %82, i64 1		; <i32*> [#uses=1]
-	%84 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %83, i32** %84, align 4
-	%85 = sub i32 %__trip_count.0.i.i, 1		; <i32> [#uses=1]
-	br label %bb12.i.i
+bb11.i.i:                                         ; preds = %bb8.i.i
+  %81 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %82 = load i32** %81, align 4
+  %83 = getelementptr i32* %82, i64 1
+  %84 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  store i32* %83, i32** %84, align 4
+  %85 = sub i32 %__trip_count.0.i.i, 1
+  br label %bb12.i.i
 
-bb12.i.i:		; preds = %bb11.i.i, %entry
-	%__trip_count.0.i.i = phi i32 [ %36, %entry ], [ %85, %bb11.i.i ]		; <i32> [#uses=2]
-	%86 = icmp sgt i32 %__trip_count.0.i.i, 0		; <i1> [#uses=1]
-	br i1 %86, label %bb.i.i, label %bb13.i.i
+bb12.i.i:                                         ; preds = %bb11.i.i, %entry
+  %__trip_count.0.i.i = phi i32 [ %36, %entry ], [ %85, %bb11.i.i ]
+  %86 = icmp sgt i32 %__trip_count.0.i.i, 0
+  br i1 %86, label %bb.i.i, label %bb13.i.i
 
-bb13.i.i:		; preds = %bb12.i.i
-	%87 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%88 = load i32** %87, align 4		; <i32*> [#uses=1]
-	%89 = ptrtoint i32* %88 to i32		; <i32> [#uses=1]
-	%90 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%91 = load i32** %90, align 4		; <i32*> [#uses=1]
-	%92 = ptrtoint i32* %91 to i32		; <i32> [#uses=1]
-	%93 = sub i32 %89, %92		; <i32> [#uses=1]
-	%94 = ashr i32 %93, 2		; <i32> [#uses=1]
-	switch i32 %94, label %bb26.i.i [
-		i32 1, label %bb22.i.i
-		i32 2, label %bb18.i.i
-		i32 3, label %bb14.i.i
-	]
+bb13.i.i:                                         ; preds = %bb12.i.i
+  %87 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0
+  %88 = load i32** %87, align 4
+  %89 = ptrtoint i32* %88 to i32
+  %90 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %91 = load i32** %90, align 4
+  %92 = ptrtoint i32* %91 to i32
+  %93 = sub i32 %89, %92
+  %94 = ashr i32 %93, 2
+  switch i32 %94, label %bb26.i.i [
+    i32 1, label %bb22.i.i
+    i32 2, label %bb18.i.i
+    i32 3, label %bb14.i.i
+  ]
 
-bb14.i.i:		; preds = %bb13.i.i
-	%95 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%96 = load i32** %95, align 4		; <i32*> [#uses=1]
-	%97 = load i32* %96, align 4		; <i32> [#uses=1]
-	%98 = load i32* %4, align 4		; <i32> [#uses=1]
-	%99 = icmp eq i32 %97, %98		; <i1> [#uses=1]
-	%100 = zext i1 %99 to i8		; <i8> [#uses=1]
-	%toBool15.i.i = icmp ne i8 %100, 0		; <i1> [#uses=1]
-	br i1 %toBool15.i.i, label %bb16.i.i, label %bb17.i.i
+bb14.i.i:                                         ; preds = %bb13.i.i
+  %95 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %96 = load i32** %95, align 4
+  %97 = load i32* %96, align 4
+  %98 = load i32* %4, align 4
+  %99 = icmp eq i32 %97, %98
+  %100 = zext i1 %99 to i8
+  %toBool15.i.i = icmp ne i8 %100, 0
+  br i1 %toBool15.i.i, label %bb16.i.i, label %bb17.i.i
 
-bb16.i.i:		; preds = %bb14.i.i
-	%101 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%102 = load i32** %101, align 4		; <i32*> [#uses=1]
-	br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+bb16.i.i:                                         ; preds = %bb14.i.i
+  %101 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %102 = load i32** %101, align 4
+  br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
 
-bb17.i.i:		; preds = %bb14.i.i
-	%103 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%104 = load i32** %103, align 4		; <i32*> [#uses=1]
-	%105 = getelementptr i32* %104, i64 1		; <i32*> [#uses=1]
-	%106 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %105, i32** %106, align 4
-	br label %bb18.i.i
+bb17.i.i:                                         ; preds = %bb14.i.i
+  %103 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %104 = load i32** %103, align 4
+  %105 = getelementptr i32* %104, i64 1
+  %106 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  store i32* %105, i32** %106, align 4
+  br label %bb18.i.i
 
-bb18.i.i:		; preds = %bb17.i.i, %bb13.i.i
-	%107 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%108 = load i32** %107, align 4		; <i32*> [#uses=1]
-	%109 = load i32* %108, align 4		; <i32> [#uses=1]
-	%110 = load i32* %4, align 4		; <i32> [#uses=1]
-	%111 = icmp eq i32 %109, %110		; <i1> [#uses=1]
-	%112 = zext i1 %111 to i8		; <i8> [#uses=1]
-	%toBool19.i.i = icmp ne i8 %112, 0		; <i1> [#uses=1]
-	br i1 %toBool19.i.i, label %bb20.i.i, label %bb21.i.i
+bb18.i.i:                                         ; preds = %bb17.i.i, %bb13.i.i
+  %107 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %108 = load i32** %107, align 4
+  %109 = load i32* %108, align 4
+  %110 = load i32* %4, align 4
+  %111 = icmp eq i32 %109, %110
+  %112 = zext i1 %111 to i8
+  %toBool19.i.i = icmp ne i8 %112, 0
+  br i1 %toBool19.i.i, label %bb20.i.i, label %bb21.i.i
 
-bb20.i.i:		; preds = %bb18.i.i
-	%113 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%114 = load i32** %113, align 4		; <i32*> [#uses=1]
-	br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+bb20.i.i:                                         ; preds = %bb18.i.i
+  %113 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %114 = load i32** %113, align 4
+  br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
 
-bb21.i.i:		; preds = %bb18.i.i
-	%115 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%116 = load i32** %115, align 4		; <i32*> [#uses=1]
-	%117 = getelementptr i32* %116, i64 1		; <i32*> [#uses=1]
-	%118 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %117, i32** %118, align 4
-	br label %bb22.i.i
+bb21.i.i:                                         ; preds = %bb18.i.i
+  %115 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %116 = load i32** %115, align 4
+  %117 = getelementptr i32* %116, i64 1
+  %118 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  store i32* %117, i32** %118, align 4
+  br label %bb22.i.i
 
-bb22.i.i:		; preds = %bb21.i.i, %bb13.i.i
-	%119 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%120 = load i32** %119, align 4		; <i32*> [#uses=1]
-	%121 = load i32* %120, align 4		; <i32> [#uses=1]
-	%122 = load i32* %4, align 4		; <i32> [#uses=1]
-	%123 = icmp eq i32 %121, %122		; <i1> [#uses=1]
-	%124 = zext i1 %123 to i8		; <i8> [#uses=1]
-	%toBool23.i.i = icmp ne i8 %124, 0		; <i1> [#uses=1]
-	br i1 %toBool23.i.i, label %bb24.i.i, label %bb25.i.i
+bb22.i.i:                                         ; preds = %bb21.i.i, %bb13.i.i
+  %119 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %120 = load i32** %119, align 4
+  %121 = load i32* %120, align 4
+  %122 = load i32* %4, align 4
+  %123 = icmp eq i32 %121, %122
+  %124 = zext i1 %123 to i8
+  %toBool23.i.i = icmp ne i8 %124, 0
+  br i1 %toBool23.i.i, label %bb24.i.i, label %bb25.i.i
 
-bb24.i.i:		; preds = %bb22.i.i
-	%125 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%126 = load i32** %125, align 4		; <i32*> [#uses=1]
-	br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+bb24.i.i:                                         ; preds = %bb22.i.i
+  %125 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %126 = load i32** %125, align 4
+  br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
 
-bb25.i.i:		; preds = %bb22.i.i
-	%127 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%128 = load i32** %127, align 4		; <i32*> [#uses=1]
-	%129 = getelementptr i32* %128, i64 1		; <i32*> [#uses=1]
-	%130 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %129, i32** %130, align 4
-	br label %bb26.i.i
+bb25.i.i:                                         ; preds = %bb22.i.i
+  %127 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %128 = load i32** %127, align 4
+  %129 = getelementptr i32* %128, i64 1
+  %130 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  store i32* %129, i32** %130, align 4
+  br label %bb26.i.i
 
-bb26.i.i:		; preds = %bb25.i.i, %bb13.i.i
-	%131 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%132 = load i32** %131, align 4		; <i32*> [#uses=1]
-	br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+bb26.i.i:                                         ; preds = %bb25.i.i, %bb13.i.i
+  %131 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0
+  %132 = load i32** %131, align 4
+  br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
 
-_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit:		; preds = %bb26.i.i, %bb24.i.i, %bb20.i.i, %bb16.i.i, %bb10.i.i, %bb7.i.i, %bb4.i.i, %bb1.i.i
-	%.0.0.i.i = phi i32* [ %132, %bb26.i.i ], [ %126, %bb24.i.i ], [ %114, %bb20.i.i ], [ %102, %bb16.i.i ], [ %80, %bb10.i.i ], [ %68, %bb7.i.i ], [ %56, %bb4.i.i ], [ %44, %bb1.i.i ]		; <i32*> [#uses=1]
-	%tmp2.i.i = ptrtoint i32* %.0.0.i.i to i32		; <i32> [#uses=1]
-	%tmp1.i.i = inttoptr i32 %tmp2.i.i to i32*		; <i32*> [#uses=1]
-	%tmp4.i = ptrtoint i32* %tmp1.i.i to i32		; <i32> [#uses=1]
-	%tmp3.i = inttoptr i32 %tmp4.i to i32*		; <i32*> [#uses=1]
-	%tmp8.i = ptrtoint i32* %tmp3.i to i32		; <i32> [#uses=1]
-	%tmp6.i = inttoptr i32 %tmp8.i to i32*		; <i32*> [#uses=1]
-	%tmp12 = ptrtoint i32* %tmp6.i to i32		; <i32> [#uses=1]
-	%tmp10 = inttoptr i32 %tmp12 to i32*		; <i32*> [#uses=1]
-	%tmp16 = ptrtoint i32* %tmp10 to i32		; <i32> [#uses=1]
-	br label %return
+_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit: ; preds = %bb26.i.i, %bb24.i.i, %bb20.i.i, %bb16.i.i, %bb10.i.i, %bb7.i.i, %bb4.i.i, %bb1.i.i
+  %.0.0.i.i = phi i32* [ %132, %bb26.i.i ], [ %126, %bb24.i.i ], [ %114, %bb20.i.i ], [ %102, %bb16.i.i ], [ %80, %bb10.i.i ], [ %68, %bb7.i.i ], [ %56, %bb4.i.i ], [ %44, %bb1.i.i ]
+  %tmp2.i.i = ptrtoint i32* %.0.0.i.i to i32
+  %tmp1.i.i = inttoptr i32 %tmp2.i.i to i32*
+  %tmp4.i = ptrtoint i32* %tmp1.i.i to i32
+  %tmp3.i = inttoptr i32 %tmp4.i to i32*
+  %tmp8.i = ptrtoint i32* %tmp3.i to i32
+  %tmp6.i = inttoptr i32 %tmp8.i to i32*
+  %tmp12 = ptrtoint i32* %tmp6.i to i32
+  %tmp10 = inttoptr i32 %tmp12 to i32*
+  %tmp16 = ptrtoint i32* %tmp10 to i32
+  br label %return
 
-return:		; preds = %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
-	%tmp14 = inttoptr i32 %tmp16 to i32*		; <i32*> [#uses=1]
-	ret i32* %tmp14
+return:                                           ; preds = %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+  %tmp14 = inttoptr i32 %tmp16 to i32*
+  ret i32* %tmp14
 }
 
-declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/Transforms/InstCombine/2011-02-16-InsertelementHang.ll b/test/Transforms/InstCombine/2011-02-16-InsertelementHang.ll
deleted file mode 100644
index 2f6034e..0000000
--- a/test/Transforms/InstCombine/2011-02-16-InsertelementHang.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
-; PR9218
-
-%vec2x2 = type { <2 x double>, <2 x double> }
-
-define %vec2x2 @split(double) nounwind alwaysinline {
-; CHECK: @split
-; CHECK: ret %vec2x2 undef
-  %vba = insertelement <2 x double> undef, double %0, i32 2
-  ret <2 x double> %vba, <2 x double> %vba
-}
diff --git a/test/Transforms/InstCombine/bswap-fold.ll b/test/Transforms/InstCombine/bswap-fold.ll
index a6b30c0..442ce58 100644
--- a/test/Transforms/InstCombine/bswap-fold.ll
+++ b/test/Transforms/InstCombine/bswap-fold.ll
@@ -50,10 +50,6 @@ entry:
 }
 
 ; PR5284
-declare i64 @llvm.bswap.i64(i64)
-declare i32 @llvm.bswap.i32(i32)
-declare i16 @llvm.bswap.i16(i16)
-
 define i16 @test7(i32 %A) {
   %B = tail call i32 @llvm.bswap.i32(i32 %A) nounwind 
   %C = trunc i32 %B to i16
diff --git a/test/Transforms/InstCombine/call2.ll b/test/Transforms/InstCombine/call2.ll
index 3a6bd67..1f45c7a 100644
--- a/test/Transforms/InstCombine/call2.ll
+++ b/test/Transforms/InstCombine/call2.ll
@@ -4,7 +4,6 @@
 define i32 @bar() {
 entry:
 	%retval = alloca i32, align 4		; <i32*> [#uses=1]
-	"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	%tmp = call i32 (...)* bitcast (i32 (i8*)* @f to i32 (...)*)( double 3.000000e+00 )		; <i32> [#uses=0]
 	br label %return
 
@@ -17,7 +16,6 @@ define i32 @f(i8* %p) {
 entry:
 	%p_addr = alloca i8*		; <i8**> [#uses=1]
 	%retval = alloca i32, align 4		; <i32*> [#uses=1]
-	"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store i8* %p, i8** %p_addr
 	br label %return
 
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
index 107f313..0d84ae4 100644
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -213,5 +213,4 @@ define i32 @cttz_simplify1(i32 %x) nounwind readnone ssp {
 ; CHECK-NEXT: ret i32
 }
 
-declare i32 @llvm.ctlz.i32(i32) nounwind readnone
 
diff --git a/test/Transforms/InstCombine/memcpy-to-load.ll b/test/Transforms/InstCombine/memcpy-to-load.ll
index ebb8711..04aac98 100644
--- a/test/Transforms/InstCombine/memcpy-to-load.ll
+++ b/test/Transforms/InstCombine/memcpy-to-load.ll
@@ -4,11 +4,10 @@ target triple = "i686-apple-darwin8"
 
 define void @foo(double* %X, double* %Y) {
 entry:
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	%tmp2 = bitcast double* %X to i8*		; <i8*> [#uses=1]
-	%tmp13 = bitcast double* %Y to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %tmp2, i8* %tmp13, i32 8, i32 1 )
-	ret void
+  %tmp2 = bitcast double* %X to i8*
+  %tmp13 = bitcast double* %Y to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp13, i32 8, i32 1, i1 false)
+  ret void
 }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/InstCombine/memmove.ll b/test/Transforms/InstCombine/memmove.ll
index 1806cfc..4602c12 100644
--- a/test/Transforms/InstCombine/memmove.ll
+++ b/test/Transforms/InstCombine/memmove.ll
@@ -1,24 +1,20 @@
 ; This test makes sure that memmove instructions are properly eliminated.
 ;
-; RUN: opt < %s -instcombine -S | \
-; RUN:    not grep {call void @llvm.memmove}
+; RUN: opt < %s -instcombine -S | not grep {call void @llvm.memmove}
 
 @S = internal constant [33 x i8] c"panic: restorelist inconsistency\00"		; <[33 x i8]*> [#uses=1]
 @h = constant [2 x i8] c"h\00"		; <[2 x i8]*> [#uses=1]
 @hel = constant [4 x i8] c"hel\00"		; <[4 x i8]*> [#uses=1]
 @hello_u = constant [8 x i8] c"hello_u\00"		; <[8 x i8]*> [#uses=1]
 
-
-declare void @llvm.memmove.i32(i8*, i8*, i32, i32)
-
 define void @test1(i8* %A, i8* %B, i32 %N) {
-	call void @llvm.memmove.i32( i8* %A, i8* %B, i32 0, i32 1 )
+	call void @llvm.memmove.p0i8.p0i8.i32(i8* %A, i8* %B, i32 0, i32 1, i1 false)
 	ret void
 }
 
 define void @test2(i8* %A, i32 %N) {
         ;; dest can't alias source since we can't write to source!
-	call void @llvm.memmove.i32( i8* %A, i8* getelementptr ([33 x i8]* @S, i32 0, i32 0), i32 %N, i32 1 )
+	call void @llvm.memmove.p0i8.p0i8.i32(i8* %A, i8* getelementptr inbounds ([33 x i8]* @S, i32 0, i32 0), i32 %N, i32 1, i1 false)
 	ret void
 }
 
@@ -28,15 +24,16 @@ define i32 @test3() {
 	%hello_u_p = getelementptr [8 x i8]* @hello_u, i32 0, i32 0		; <i8*> [#uses=1]
 	%target = alloca [1024 x i8]		; <[1024 x i8]*> [#uses=1]
 	%target_p = getelementptr [1024 x i8]* %target, i32 0, i32 0		; <i8*> [#uses=3]
-	call void @llvm.memmove.i32( i8* %target_p, i8* %h_p, i32 2, i32 2 )
-	call void @llvm.memmove.i32( i8* %target_p, i8* %hel_p, i32 4, i32 4 )
-	call void @llvm.memmove.i32( i8* %target_p, i8* %hello_u_p, i32 8, i32 8 )
+        call void @llvm.memmove.p0i8.p0i8.i32(i8* %target_p, i8* %h_p, i32 2, i32 2, i1 false)
+        call void @llvm.memmove.p0i8.p0i8.i32(i8* %target_p, i8* %hel_p, i32 4, i32 4, i1 false)
+        call void @llvm.memmove.p0i8.p0i8.i32(i8* %target_p, i8* %hello_u_p, i32 8, i32 8, i1 false)
 	ret i32 0
 }
 
 ; PR2370
 define void @test4(i8* %a) {
-        tail call void @llvm.memmove.i32( i8* %a, i8* %a, i32 100, i32 1 )
-        ret void
+  tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %a, i8* %a, i32 100, i32 1, i1 false)
+  ret void
 }
 
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/InstCombine/memset.ll b/test/Transforms/InstCombine/memset.ll
index 8e85694..7f7bc9f 100644
--- a/test/Transforms/InstCombine/memset.ll
+++ b/test/Transforms/InstCombine/memset.ll
@@ -1,15 +1,14 @@
 ; RUN: opt < %s -instcombine -S | not grep {call.*llvm.memset}
 
-declare void @llvm.memset.i32(i8*, i8, i32, i32)
-
 define i32 @main() {
-	%target = alloca [1024 x i8]		; <[1024 x i8]*> [#uses=1]
-	%target_p = getelementptr [1024 x i8]* %target, i32 0, i32 0		; <i8*> [#uses=5]
-	call void @llvm.memset.i32( i8* %target_p, i8 1, i32 0, i32 1 )
-	call void @llvm.memset.i32( i8* %target_p, i8 1, i32 1, i32 1 )
-	call void @llvm.memset.i32( i8* %target_p, i8 1, i32 2, i32 2 )
-	call void @llvm.memset.i32( i8* %target_p, i8 1, i32 4, i32 4 )
-	call void @llvm.memset.i32( i8* %target_p, i8 1, i32 8, i32 8 )
-	ret i32 0
+  %target = alloca [1024 x i8]
+  %target_p = getelementptr [1024 x i8]* %target, i32 0, i32 0
+  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 0, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 1, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 2, i32 2, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 4, i32 4, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 8, i32 8, i1 false)
+  ret i32 0
 }
 
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/Transforms/InstCombine/objsize.ll b/test/Transforms/InstCombine/objsize.ll
index 043525b..28ceb68 100644
--- a/test/Transforms/InstCombine/objsize.ll
+++ b/test/Transforms/InstCombine/objsize.ll
@@ -149,8 +149,6 @@ declare i8* @__memset_chk(i8*, i32, i32, i32) nounwind
 
 declare noalias i8* @malloc(i32) nounwind
 
-declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly
-
 define i32 @test7() {
 ; CHECK: @test7
   %alloc = call noalias i8* @malloc(i32 48) nounwind
diff --git a/test/Transforms/InstCombine/stack-overalign.ll b/test/Transforms/InstCombine/stack-overalign.ll
index 88b4114..2fc8414 100644
--- a/test/Transforms/InstCombine/stack-overalign.ll
+++ b/test/Transforms/InstCombine/stack-overalign.ll
@@ -17,13 +17,13 @@
 
 define void @foo() nounwind {
 entry:
-	%src = alloca [1024 x i8], align 1
-	%src1 = getelementptr [1024 x i8]* %src, i32 0, i32 0
-	call void @llvm.memcpy.i32(i8* getelementptr ([1024 x i8]* @dst, i32 0, i32 0), i8* %src1, i32 1024, i32 1)
-	call void @frob(i8* %src1) nounwind
-	ret void
+  %src = alloca [1024 x i8], align 1
+  %src1 = getelementptr [1024 x i8]* %src, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([1024 x i8]* @dst, i32 0, i32 0), i8* %src1, i32 1024, i32 1, i1 false)
+  call void @frob(i8* %src1) nounwind
+  ret void
 }
 
-declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
-
 declare void @frob(i8*)
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/JumpThreading/crash.ll b/test/Transforms/JumpThreading/crash.ll
index aed51a1..2115dd3 100644
--- a/test/Transforms/JumpThreading/crash.ll
+++ b/test/Transforms/JumpThreading/crash.ll
@@ -18,7 +18,7 @@ bb13:		; preds = %bb
 	br label %bb
 
 bb110:		; preds = %bb
-	%mrv_gr124 = getresult %struct.system__secondary_stack__mark_id %tmp120, 1		; <i64> [#uses=0]
+	%mrv_gr124 = extractvalue %struct.system__secondary_stack__mark_id %tmp120, 1		; <i64> [#uses=0]
 	unreachable
 }
 
diff --git a/test/Transforms/LoopDeletion/2011-06-21-phioperands.ll b/test/Transforms/LoopDeletion/2011-06-21-phioperands.ll
new file mode 100644
index 0000000..40c6629
--- /dev/null
+++ b/test/Transforms/LoopDeletion/2011-06-21-phioperands.ll
@@ -0,0 +1,182 @@
+; RUN: opt %s -loop-deletion -disable-output
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%0 = type { %"class.llvm::SmallVectorImpl", [1 x %"union.llvm::SmallVectorBase::U"] }
+%"class.clang::SourceLocation" = type { i32 }
+%"class.clang::driver::Arg" = type { %"class.clang::driver::Option"*, %"class.clang::driver::Arg"*, i32, i8, %0 }
+%"class.clang::driver::Option" = type { i32 (...)**, i32, %"class.clang::SourceLocation", i8*, %"class.clang::driver::OptionGroup"*, %"class.clang::driver::Option"*, i8 }
+%"class.clang::driver::OptionGroup" = type { %"class.clang::driver::Option" }
+%"class.llvm::SmallVectorBase" = type { i8*, i8*, i8*, %"union.llvm::SmallVectorBase::U" }
+%"class.llvm::SmallVectorImpl" = type { %"class.llvm::SmallVectorTemplateBase" }
+%"class.llvm::SmallVectorTemplateBase" = type { %"class.llvm::SmallVectorTemplateCommon" }
+%"class.llvm::SmallVectorTemplateCommon" = type { %"class.llvm::SmallVectorBase" }
+%"union.llvm::SmallVectorBase::U" = type { x86_fp80 }
+
+define void @_ZNK5clang6driver7ArgList20AddAllArgsTranslatedERN4llvm11SmallVectorIPKcLj16EEENS0_12OptSpecifierES5_b(i1 zeroext %Joined) nounwind align 2 {
+entry:
+  br i1 undef, label %entry.split.us, label %entry.entry.split_crit_edge
+
+entry.entry.split_crit_edge:                      ; preds = %entry
+  br label %entry.split
+
+entry.split.us:                                   ; preds = %entry
+  br label %for.cond.i14.us
+
+for.cond.i14.us:                                  ; preds = %for.inc.i38.us, %entry.split.us
+  br i1 true, label %for.cond.i50.us-lcssa.us, label %if.end.i23.us
+
+for.inc.i38.us:                                   ; preds = %if.end.i23.us
+  br label %for.cond.i14.us
+
+if.end.i23.us:                                    ; preds = %for.cond.i14.us
+  br i1 true, label %for.cond.i50.us-lcssa.us, label %for.inc.i38.us
+
+for.cond.i50.us-lcssa.us:                         ; preds = %if.end.i23.us, %for.cond.i14.us
+  br label %for.cond.i50
+
+entry.split:                                      ; preds = %entry.entry.split_crit_edge
+  br label %for.cond.i14
+
+for.cond.i14:                                     ; preds = %for.inc.i38, %entry.split
+  br i1 undef, label %for.cond.i50.us-lcssa, label %if.end.i23
+
+if.end.i23:                                       ; preds = %for.cond.i14
+  br i1 undef, label %for.cond.i50.us-lcssa, label %for.inc.i38
+
+for.inc.i38:                                      ; preds = %if.end.i23
+  br label %for.cond.i14
+
+for.cond.i50.us-lcssa:                            ; preds = %if.end.i23, %for.cond.i14
+  br label %for.cond.i50
+
+for.cond.i50:                                     ; preds = %for.cond.i50.us-lcssa, %for.cond.i50.us-lcssa.us
+  br label %for.cond
+
+for.cond.loopexit.us-lcssa:                       ; preds = %if.end.i, %for.cond.i
+  br label %for.cond.loopexit
+
+for.cond.loopexit:                                ; preds = %for.cond.loopexit.us-lcssa.us, %for.cond.loopexit.us-lcssa
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond.loopexit, %for.cond.i50
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.cond
+  br i1 %Joined, label %if.then, label %if.else
+
+if.then:                                          ; preds = %for.body
+  br i1 undef, label %cond.false.i.i, label %_ZN4llvm9StringRefC1EPKc.exit
+
+cond.false.i.i:                                   ; preds = %if.then
+  unreachable
+
+_ZN4llvm9StringRefC1EPKc.exit:                    ; preds = %if.then
+  br i1 undef, label %_ZNK5clang6driver3Arg8getValueERKNS0_7ArgListEj.exit, label %cond.false.i.i91
+
+cond.false.i.i91:                                 ; preds = %_ZN4llvm9StringRefC1EPKc.exit
+  unreachable
+
+_ZNK5clang6driver3Arg8getValueERKNS0_7ArgListEj.exit: ; preds = %_ZN4llvm9StringRefC1EPKc.exit
+  br i1 undef, label %cond.false.i.i.i, label %if.end13.i.i.i.i
+
+if.end13.i.i.i.i:                                 ; preds = %_ZNK5clang6driver3Arg8getValueERKNS0_7ArgListEj.exit
+  br i1 undef, label %land.lhs.true16.i.i.i.i, label %if.end19.i.i.i.i
+
+land.lhs.true16.i.i.i.i:                          ; preds = %if.end13.i.i.i.i
+  br i1 undef, label %cond.false.i.i.i, label %_ZNK4llvm5Twine8isBinaryEv.exit8.i.i.i.i
+
+_ZNK4llvm5Twine8isBinaryEv.exit8.i.i.i.i:         ; preds = %land.lhs.true16.i.i.i.i
+  br i1 undef, label %cond.false.i.i.i, label %if.end19.i.i.i.i
+
+if.end19.i.i.i.i:                                 ; preds = %_ZNK4llvm5Twine8isBinaryEv.exit8.i.i.i.i, %if.end13.i.i.i.i
+  br i1 undef, label %land.lhs.true22.i.i.i.i, label %_ZN4llvmplERKNS_9StringRefEPKc.exit
+
+land.lhs.true22.i.i.i.i:                          ; preds = %if.end19.i.i.i.i
+  br i1 undef, label %cond.false.i.i.i, label %_ZNK4llvm5Twine8isBinaryEv.exit.i.i.i.i
+
+_ZNK4llvm5Twine8isBinaryEv.exit.i.i.i.i:          ; preds = %land.lhs.true22.i.i.i.i
+  br i1 undef, label %cond.false.i.i.i, label %_ZN4llvmplERKNS_9StringRefEPKc.exit
+
+cond.false.i.i.i:                                 ; preds = %_ZNK4llvm5Twine8isBinaryEv.exit.i.i.i.i, %land.lhs.true22.i.i.i.i, %_ZNK4llvm5Twine8isBinaryEv.exit8.i.i.i.i, %land.lhs.true16.i.i.i.i, %_ZNK5clang6driver3Arg8getValueERKNS0_7ArgListEj.exit
+  unreachable
+
+_ZN4llvmplERKNS_9StringRefEPKc.exit:              ; preds = %_ZNK4llvm5Twine8isBinaryEv.exit.i.i.i.i, %if.end19.i.i.i.i
+  br i1 undef, label %Retry.i, label %if.end.i99
+
+Retry.i:                                          ; preds = %if.end.i99, %_ZN4llvmplERKNS_9StringRefEPKc.exit
+  br i1 undef, label %_ZN4llvm15SmallVectorImplIPKcE9push_backERKS2_.exit, label %new.notnull.i
+
+new.notnull.i:                                    ; preds = %Retry.i
+  br label %_ZN4llvm15SmallVectorImplIPKcE9push_backERKS2_.exit
+
+if.end.i99:                                       ; preds = %_ZN4llvmplERKNS_9StringRefEPKc.exit
+  br label %Retry.i
+
+_ZN4llvm15SmallVectorImplIPKcE9push_backERKS2_.exit: ; preds = %new.notnull.i, %Retry.i
+  br label %for.cond.i.preheader
+
+if.else:                                          ; preds = %for.body
+  br i1 undef, label %Retry.i108, label %if.end.i113
+
+Retry.i108:                                       ; preds = %if.end.i113, %if.else
+  br i1 undef, label %_ZN4llvm15SmallVectorImplIPKcE9push_backERKS2_.exit114, label %new.notnull.i110
+
+new.notnull.i110:                                 ; preds = %Retry.i108
+  br label %_ZN4llvm15SmallVectorImplIPKcE9push_backERKS2_.exit114
+
+if.end.i113:                                      ; preds = %if.else
+  br label %Retry.i108
+
+_ZN4llvm15SmallVectorImplIPKcE9push_backERKS2_.exit114: ; preds = %new.notnull.i110, %Retry.i108
+  br i1 undef, label %_ZNK5clang6driver3Arg8getValueERKNS0_7ArgListEj.exit125, label %cond.false.i.i123
+
+cond.false.i.i123:                                ; preds = %_ZN4llvm15SmallVectorImplIPKcE9push_backERKS2_.exit114
+  unreachable
+
+_ZNK5clang6driver3Arg8getValueERKNS0_7ArgListEj.exit125: ; preds = %_ZN4llvm15SmallVectorImplIPKcE9push_backERKS2_.exit114
+  br i1 undef, label %Retry.i134, label %if.end.i139
+
+Retry.i134:                                       ; preds = %if.end.i139, %_ZNK5clang6driver3Arg8getValueERKNS0_7ArgListEj.exit125
+  br i1 undef, label %_ZN4llvm15SmallVectorImplIPKcE9push_backERKS2_.exit140, label %new.notnull.i136
+
+new.notnull.i136:                                 ; preds = %Retry.i134
+  br label %_ZN4llvm15SmallVectorImplIPKcE9push_backERKS2_.exit140
+
+if.end.i139:                                      ; preds = %_ZNK5clang6driver3Arg8getValueERKNS0_7ArgListEj.exit125
+  br label %Retry.i134
+
+_ZN4llvm15SmallVectorImplIPKcE9push_backERKS2_.exit140: ; preds = %new.notnull.i136, %Retry.i134
+  br label %for.cond.i.preheader
+
+for.cond.i.preheader:                             ; preds = %_ZN4llvm15SmallVectorImplIPKcE9push_backERKS2_.exit140, %_ZN4llvm15SmallVectorImplIPKcE9push_backERKS2_.exit
+  br i1 undef, label %for.cond.i.preheader.split.us, label %for.cond.i.preheader.for.cond.i.preheader.split_crit_edge
+
+for.cond.i.preheader.for.cond.i.preheader.split_crit_edge: ; preds = %for.cond.i.preheader
+  br label %for.cond.i.preheader.split
+
+for.cond.i.preheader.split.us:                    ; preds = %for.cond.i.preheader
+  br label %for.cond.i.us
+
+for.cond.i.us:                                    ; preds = %if.end.i.us, %for.cond.i.preheader.split.us
+  br i1 true, label %for.cond.loopexit.us-lcssa.us, label %if.end.i.us
+
+if.end.i.us:                                      ; preds = %for.cond.i.us
+  br i1 true, label %for.cond.loopexit.us-lcssa.us, label %for.cond.i.us
+
+for.cond.loopexit.us-lcssa.us:                    ; preds = %if.end.i.us, %for.cond.i.us
+  %tmp178218.us.lcssa = phi %"class.clang::driver::Arg"** [ undef, %if.end.i.us ], [ undef, %for.cond.i.us ]
+  br label %for.cond.loopexit
+
+for.cond.i.preheader.split:                       ; preds = %for.cond.i.preheader.for.cond.i.preheader.split_crit_edge
+  br label %for.cond.i
+
+for.cond.i:                                       ; preds = %if.end.i, %for.cond.i.preheader.split
+  br i1 undef, label %for.cond.loopexit.us-lcssa, label %if.end.i
+
+if.end.i:                                         ; preds = %for.cond.i
+  br i1 undef, label %for.cond.loopexit.us-lcssa, label %for.cond.i
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
diff --git a/test/Transforms/LoopRotate/PhiRename-1.ll b/test/Transforms/LoopRotate/PhiRename-1.ll
index 74426a8..9cb55b4 100644
--- a/test/Transforms/LoopRotate/PhiRename-1.ll
+++ b/test/Transforms/LoopRotate/PhiRename-1.ll
@@ -33,7 +33,6 @@ entry:
 	%c = alloca i32, align 4		; <i32*> [#uses=4]
 	%l = alloca %struct.list*, align 4		; <%struct.list**> [#uses=5]
 	%op = alloca %struct.operator*, align 4		; <%struct.operator**> [#uses=3]
-	"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store i32 %arity, i32* %arity_addr
 	store i32 0, i32* %c
 	%tmp1 = load %struct.list** @operators		; <%struct.list*> [#uses=1]
diff --git a/test/Transforms/LoopRotate/crash.ll b/test/Transforms/LoopRotate/crash.ll
index 16a6868..954b834 100644
--- a/test/Transforms/LoopRotate/crash.ll
+++ b/test/Transforms/LoopRotate/crash.ll
@@ -113,7 +113,7 @@ bb116:		; preds = %bb131, %entry
 	br i1 false, label %bb141, label %bb131
 
 bb131:		; preds = %bb116
-	%mrv_gr125 = getresult %struct.NSRange %tmp123, 1		; <i64> [#uses=0]
+	%mrv_gr125 = extractvalue %struct.NSRange %tmp123, 1		; <i64> [#uses=0]
 	br label %bb116
 
 bb141:		; preds = %bb116
diff --git a/test/Transforms/Mem2Reg/ignore-lifetime.ll b/test/Transforms/Mem2Reg/ignore-lifetime.ll
new file mode 100644
index 0000000..5e4f9bf
--- /dev/null
+++ b/test/Transforms/Mem2Reg/ignore-lifetime.ll
@@ -0,0 +1,26 @@
+; RUN: opt -mem2reg -S -o - < %s | FileCheck %s
+
+declare void @llvm.lifetime.start(i64 %size, i8* nocapture %ptr)
+declare void @llvm.lifetime.end(i64 %size, i8* nocapture %ptr)
+
+define void @test1() {
+; CHECK: test1
+; CHECK-NOT: alloca
+  %A = alloca i32
+  %B = bitcast i32* %A to i8*
+  call void @llvm.lifetime.start(i64 2, i8* %B)
+  store i32 1, i32* %A
+  call void @llvm.lifetime.end(i64 2, i8* %B)
+  ret void
+}
+
+define void @test2() {
+; CHECK: test2
+; CHECK-NOT: alloca
+  %A = alloca {i8, i16}
+  %B = getelementptr {i8, i16}* %A, i32 0, i32 0
+  call void @llvm.lifetime.start(i64 2, i8* %B)
+  store {i8, i16} zeroinitializer, {i8, i16}* %A
+  call void @llvm.lifetime.end(i64 2, i8* %B)
+  ret void
+}
diff --git a/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll b/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
index 9f1e280..b95ad91 100644
--- a/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
+++ b/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
@@ -4,31 +4,33 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i386-pc-linux-gnu"
 
-define internal fastcc void @initialize({ x86_fp80, x86_fp80 }* noalias sret  %agg.result) nounwind  {
+%0 = type { x86_fp80, x86_fp80 }
+
+define internal fastcc void @initialize(%0* noalias sret %agg.result) nounwind {
 entry:
-	%agg.result.03 = getelementptr { x86_fp80, x86_fp80 }* %agg.result, i32 0, i32 0		; <x86_fp80*> [#uses=1]
-	store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.03
-	%agg.result.15 = getelementptr { x86_fp80, x86_fp80 }* %agg.result, i32 0, i32 1		; <x86_fp80*> [#uses=1]
-	store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.15
-	ret void
+  %agg.result.03 = getelementptr %0* %agg.result, i32 0, i32 0
+  store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.03
+  %agg.result.15 = getelementptr %0* %agg.result, i32 0, i32 1
+  store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.15
+  ret void
 }
 
-declare fastcc x86_fp80 @passed_uninitialized({ x86_fp80, x86_fp80 }* %x) nounwind
+declare fastcc x86_fp80 @passed_uninitialized(%0*) nounwind
 
-define fastcc void @badly_optimized() nounwind  {
+define fastcc void @badly_optimized() nounwind {
 entry:
-	%z = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
-	%tmp = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
-	%memtmp = alloca { x86_fp80, x86_fp80 }, align 8		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
-	call fastcc void @initialize( { x86_fp80, x86_fp80 }* noalias sret  %memtmp )
-	%tmp1 = bitcast { x86_fp80, x86_fp80 }* %tmp to i8*		; <i8*> [#uses=1]
-	%memtmp2 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %tmp1, i8* %memtmp2, i32 24, i32 8 )
-	%z3 = bitcast { x86_fp80, x86_fp80 }* %z to i8*		; <i8*> [#uses=1]
-	%tmp4 = bitcast { x86_fp80, x86_fp80 }* %tmp to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %z3, i8* %tmp4, i32 24, i32 8 )
-	%tmp5 = call fastcc x86_fp80 @passed_uninitialized( { x86_fp80, x86_fp80 }* %z )		; <x86_fp80> [#uses=0]
-	ret void
+  %z = alloca %0
+  %tmp = alloca %0
+  %memtmp = alloca %0, align 8
+  call fastcc void @initialize(%0* noalias sret %memtmp)
+  %tmp1 = bitcast %0* %tmp to i8*
+  %memtmp2 = bitcast %0* %memtmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp1, i8* %memtmp2, i32 24, i32 8, i1 false)
+  %z3 = bitcast %0* %z to i8*
+  %tmp4 = bitcast %0* %tmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %z3, i8* %tmp4, i32 24, i32 8, i1 false)
+  %tmp5 = call fastcc x86_fp80 @passed_uninitialized(%0* %z)
+  ret void
 }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll b/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
index 418761e..24cf576 100644
--- a/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
+++ b/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
@@ -1,20 +1,22 @@
 ; RUN: opt < %s -basicaa -memcpyopt -S | not grep {call.*memcpy.}
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-	%a = type { i32 }
-	%b = type { float }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
+%a = type { i32 }
+%b = type { float }
+
 declare void @g(%a*)
 
 define float @f() {
 entry:
-	%a_var = alloca %a
-	%b_var = alloca %b
-	call void @g(%a *%a_var)
-	%a_i8 = bitcast %a* %a_var to i8*
-	%b_i8 = bitcast %b* %b_var to i8*
-	call void @llvm.memcpy.i32(i8* %b_i8, i8* %a_i8, i32 4, i32 4)
-	%tmp1 = getelementptr %b* %b_var, i32 0, i32 0
-	%tmp2 = load float* %tmp1
-	ret float %tmp2
+  %a_var = alloca %a
+  %b_var = alloca %b
+  call void @g(%a* %a_var)
+  %a_i8 = bitcast %a* %a_var to i8*
+  %b_i8 = bitcast %b* %b_var to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b_i8, i8* %a_i8, i32 4, i32 4, i1 false)
+  %tmp1 = getelementptr %b* %b_var, i32 0, i32 0
+  %tmp2 = load float* %tmp1
+  ret float %tmp2
 }
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/MemCpyOpt/memcpy.ll b/test/Transforms/MemCpyOpt/memcpy.ll
index 5c6a94c..12519ef 100644
--- a/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/test/Transforms/MemCpyOpt/memcpy.ll
@@ -3,17 +3,21 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin9"
 
+%0 = type { x86_fp80, x86_fp80 }
+%1 = type { i32, i32 }
+
 define void @test1({ x86_fp80, x86_fp80 }* sret  %agg.result, x86_fp80 %z.0, x86_fp80 %z.1) nounwind  {
 entry:
-	%tmp2 = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=1]
-	%memtmp = alloca { x86_fp80, x86_fp80 }, align 16		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
-	%tmp5 = fsub x86_fp80 0xK80000000000000000000, %z.1		; <x86_fp80> [#uses=1]
-	call void @ccoshl( { x86_fp80, x86_fp80 }* sret  %memtmp, x86_fp80 %tmp5, x86_fp80 %z.0 ) nounwind 
-	%tmp219 = bitcast { x86_fp80, x86_fp80 }* %tmp2 to i8*		; <i8*> [#uses=2]
-	%memtmp20 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %tmp219, i8* %memtmp20, i32 32, i32 16 )
-	%agg.result21 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %agg.result21, i8* %tmp219, i32 32, i32 16 )
+  %tmp2 = alloca %0
+  %memtmp = alloca %0, align 16
+  %tmp5 = fsub x86_fp80 0xK80000000000000000000, %z.1
+  call void @ccoshl(%0* sret %memtmp, x86_fp80 %tmp5, x86_fp80 %z.0) nounwind
+  %tmp219 = bitcast %0* %tmp2 to i8*
+  %memtmp20 = bitcast %0* %memtmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp219, i8* %memtmp20, i32 32, i32 16, i1 false)
+  %agg.result21 = bitcast %0* %agg.result to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %agg.result21, i8* %tmp219, i32 32, i32 16, i1 false)
+  ret void
 
 ; Check that one of the memcpy's are removed.
 ;; FIXME: PR 8643 We should be able to eliminate the last memcpy here.
@@ -23,22 +27,19 @@ entry:
 ; CHECK: call void @llvm.memcpy
 ; CHECK-NOT: llvm.memcpy
 ; CHECK: ret void
-	ret void
 }
 
 declare void @ccoshl({ x86_fp80, x86_fp80 }* sret , x86_fp80, x86_fp80) nounwind 
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
-
 
 ; The intermediate alloca and one of the memcpy's should be eliminated, the
 ; other should be related with a memmove.
 define void @test2(i8* %P, i8* %Q) nounwind  {
-	%memtmp = alloca { x86_fp80, x86_fp80 }, align 16
-	%R = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*
-	call void @llvm.memcpy.i32( i8* %R, i8* %P, i32 32, i32 16 )
-	call void @llvm.memcpy.i32( i8* %Q, i8* %R, i32 32, i32 16 )
-        ret void
+  %memtmp = alloca %0, align 16
+  %R = bitcast %0* %memtmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %R, i8* %P, i32 32, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %Q, i8* %R, i32 32, i32 16, i1 false)
+  ret void
         
 ; CHECK: @test2
 ; CHECK-NEXT: call void @llvm.memmove{{.*}}(i8* %Q, i8* %P
@@ -51,12 +52,12 @@ define void @test2(i8* %P, i8* %Q) nounwind  {
 @x = external global { x86_fp80, x86_fp80 }
 
 define void @test3({ x86_fp80, x86_fp80 }* noalias sret %agg.result) nounwind  {
-	%x.0 = alloca { x86_fp80, x86_fp80 }
-	%x.01 = bitcast { x86_fp80, x86_fp80 }* %x.0 to i8*
-	call void @llvm.memcpy.i32( i8* %x.01, i8* bitcast ({ x86_fp80, x86_fp80 }* @x to i8*), i32 32, i32 16 )
-	%agg.result2 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8*
-	call void @llvm.memcpy.i32( i8* %agg.result2, i8* %x.01, i32 32, i32 16 )
-	ret void
+  %x.0 = alloca %0
+  %x.01 = bitcast %0* %x.0 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x.01, i8* bitcast (%0* @x to i8*), i32 32, i32 16, i1 false)
+  %agg.result2 = bitcast %0* %agg.result to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %agg.result2, i8* %x.01, i32 32, i32 16, i1 false)
+  ret void
 ; CHECK: @test3
 ; CHECK-NEXT: %agg.result2 = bitcast 
 ; CHECK-NEXT: call void @llvm.memcpy
@@ -66,10 +67,10 @@ define void @test3({ x86_fp80, x86_fp80 }* noalias sret %agg.result) nounwind  {
 
 ; PR8644
 define void @test4(i8 *%P) {
-  %A = alloca {i32, i32}
-  %a = bitcast {i32, i32}* %A to i8*
+  %A = alloca %1
+  %a = bitcast %1* %A to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %P, i64 8, i32 4, i1 false)
-  call void @test4a(i8* byval align 1 %a) 
+  call void @test4a(i8* byval align 1 %a)
   ret void
 ; CHECK: @test4
 ; CHECK-NEXT: call void @test4a(
@@ -84,7 +85,6 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32,
 
 declare void @test5a(%struct.S* byval align 16) nounwind ssp
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 ; rdar://8713376 - This memcpy can't be eliminated.
 define i32 @test5(i32 %x) nounwind ssp {
@@ -128,4 +128,5 @@ entry:
 
 declare i32 @g(%struct.p* byval align 8)
 
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
diff --git a/test/Transforms/MemCpyOpt/memmove.ll b/test/Transforms/MemCpyOpt/memmove.ll
index 8d3fbd2..7f1667a 100644
--- a/test/Transforms/MemCpyOpt/memmove.ll
+++ b/test/Transforms/MemCpyOpt/memmove.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin9.0"
 
-declare void @llvm.memmove.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 define i8* @test1(i8* nocapture %src) nounwind {
 entry:
@@ -13,8 +13,8 @@ entry:
 
   %malloccall = tail call i8* @malloc(i32 trunc (i64 mul nuw (i64 ptrtoint (i8* getelementptr (i8* null, i32 1) to i64), i64 13) to i32))
   %call3 = bitcast i8* %malloccall to [13 x i8]*
-  %call3.sub = getelementptr inbounds [13 x i8]* %call3, i64 0, i64 0 ; <i8*> [#uses=2]
-  tail call void @llvm.memmove.i64(i8* %call3.sub, i8* %src, i64 13, i32 1)
+  %call3.sub = getelementptr inbounds [13 x i8]* %call3, i64 0, i64 0
+  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %call3.sub, i8* %src, i64 13, i32 1, i1 false)
   ret i8* %call3.sub
 }
 declare noalias i8* @malloc(i32)
@@ -24,8 +24,8 @@ define void @test2(i8* %P) nounwind {
 entry:
 ; CHECK: @test2
 ; CHECK: call void @llvm.memcpy
-  %add.ptr = getelementptr i8* %P, i64 16         ; <i8*> [#uses=1]
-  tail call void @llvm.memmove.i64(i8* %P, i8* %add.ptr, i64 16, i32 1)
+  %add.ptr = getelementptr i8* %P, i64 16
+  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 16, i32 1, i1 false)
   ret void
 }
 
@@ -34,7 +34,7 @@ define void @test3(i8* %P) nounwind {
 entry:
 ; CHECK: @test3
 ; CHECK: call void @llvm.memmove
-  %add.ptr = getelementptr i8* %P, i64 16         ; <i8*> [#uses=1]
-  tail call void @llvm.memmove.i64(i8* %P, i8* %add.ptr, i64 17, i32 1)
+  %add.ptr = getelementptr i8* %P, i64 16
+  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 17, i32 1, i1 false)
   ret void
 }
diff --git a/test/Transforms/MemCpyOpt/sret.ll b/test/Transforms/MemCpyOpt/sret.ll
index ddfd0fd..8eac7da 100644
--- a/test/Transforms/MemCpyOpt/sret.ll
+++ b/test/Transforms/MemCpyOpt/sret.ll
@@ -3,26 +3,28 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin9"
 
-define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret  %agg.result, { x86_fp80, x86_fp80 }* byval  align 8 %z) nounwind  {
+%0 = type { x86_fp80, x86_fp80 }
+
+define void @ccosl(%0* noalias sret %agg.result, %0* byval align 8 %z) nounwind {
 entry:
-	%iz = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=3]
-	%memtmp = alloca { x86_fp80, x86_fp80 }, align 16		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
-	%tmp1 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 1		; <x86_fp80*> [#uses=1]
-	%tmp2 = load x86_fp80* %tmp1, align 16		; <x86_fp80> [#uses=1]
-	%tmp3 = fsub x86_fp80 0xK80000000000000000000, %tmp2		; <x86_fp80> [#uses=1]
-	%tmp4 = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 1		; <x86_fp80*> [#uses=1]
-	%real = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 0		; <x86_fp80*> [#uses=1]
-	%tmp7 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 0		; <x86_fp80*> [#uses=1]
-	%tmp8 = load x86_fp80* %tmp7, align 16		; <x86_fp80> [#uses=1]
-	store x86_fp80 %tmp3, x86_fp80* %real, align 16
-	store x86_fp80 %tmp8, x86_fp80* %tmp4, align 16
-	call void @ccoshl( { x86_fp80, x86_fp80 }* noalias sret  %memtmp, { x86_fp80, x86_fp80 }* byval align 8 %iz ) nounwind 
-	%memtmp14 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*		; <i8*> [#uses=1]
-	%agg.result15 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %agg.result15, i8* %memtmp14, i32 32, i32 16 )
-	ret void
+  %iz = alloca %0
+  %memtmp = alloca %0, align 16
+  %tmp1 = getelementptr %0* %z, i32 0, i32 1
+  %tmp2 = load x86_fp80* %tmp1, align 16
+  %tmp3 = fsub x86_fp80 0xK80000000000000000000, %tmp2
+  %tmp4 = getelementptr %0* %iz, i32 0, i32 1
+  %real = getelementptr %0* %iz, i32 0, i32 0
+  %tmp7 = getelementptr %0* %z, i32 0, i32 0
+  %tmp8 = load x86_fp80* %tmp7, align 16
+  store x86_fp80 %tmp3, x86_fp80* %real, align 16
+  store x86_fp80 %tmp8, x86_fp80* %tmp4, align 16
+  call void @ccoshl(%0* noalias sret %memtmp, %0* byval align 8 %iz) nounwind
+  %memtmp14 = bitcast %0* %memtmp to i8*
+  %agg.result15 = bitcast %0* %agg.result to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %agg.result15, i8* %memtmp14, i32 32, i32 16, i1 false)
+  ret void
 }
 
-declare void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret , { x86_fp80, x86_fp80 }* byval ) nounwind 
+declare void @ccoshl(%0* noalias sret, %0* byval) nounwind
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/SCCP/2008-04-22-multiple-ret-sccp.ll b/test/Transforms/SCCP/2008-04-22-multiple-ret-sccp.ll
index 1b26ca9..a40455c 100644
--- a/test/Transforms/SCCP/2008-04-22-multiple-ret-sccp.ll
+++ b/test/Transforms/SCCP/2008-04-22-multiple-ret-sccp.ll
@@ -5,7 +5,7 @@ declare {i32, i32} @bar(i32 %A)
 
 define i32 @foo() {
 	%X = call {i32, i32} @bar(i32 17)
-        %Y = getresult {i32, i32} %X, 0
+        %Y = extractvalue {i32, i32} %X, 0
 	%Z = add i32 %Y, %Y
 	ret i32 %Z
 }
diff --git a/test/Transforms/ScalarRepl/2007-05-24-LargeAggregate.ll b/test/Transforms/ScalarRepl/2007-05-24-LargeAggregate.ll
deleted file mode 100644
index e67b610..0000000
--- a/test/Transforms/ScalarRepl/2007-05-24-LargeAggregate.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | grep {alloca.*client_t}
-; PR1446
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "i686-pc-linux-gnu"
-
-	%struct.clientSnapshot_t = type { i32, [32 x i8], %struct.playerState_t, i32, i32, i32, i32, i32 }
-	%struct.client_t = type { i32, [1024 x i8], [64 x [1024 x i8]], i32, i32, i32, i32, i32, i32, %struct.usercmd_t, i32, i32, [1024 x i8], %struct.sharedEntity_t*, [32 x i8], [64 x i8], i32, i32, i32, i32, i32, i32, [8 x i8*], [8 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, [32 x %struct.clientSnapshot_t], i32, i32, i32, i32, i32, %struct.netchan_t, %struct.netchan_buffer_t*, %struct.netchan_buffer_t**, i32, [1025 x i32] }
-	%struct.entityShared_t = type { %struct.entityState_t, i32, i32, i32, i32, i32, [3 x float], [3 x float], i32, [3 x float], [3 x float], [3 x float], [3 x float], i32 }
-	%struct.entityState_t = type { i32, i32, i32, %struct.trajectory_t, %struct.trajectory_t, i32, i32, [3 x float], [3 x float], [3 x float], [3 x float], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
-	%struct.msg_t = type { i32, i32, i32, i8*, i32, i32, i32, i32 }
-	%struct.netadr_t = type { i32, [4 x i8], [10 x i8], i16 }
-	%struct.netchan_buffer_t = type { %struct.msg_t, [16384 x i8], %struct.netchan_buffer_t* }
-	%struct.netchan_t = type { i32, i32, %struct.netadr_t, i32, i32, i32, i32, i32, [16384 x i8], i32, i32, i32, [16384 x i8] }
-	%struct.playerState_t = type { i32, i32, i32, i32, i32, [3 x float], [3 x float], i32, i32, i32, [3 x i32], i32, i32, i32, i32, i32, i32, [3 x float], i32, i32, [2 x i32], [2 x i32], i32, i32, i32, i32, i32, i32, [3 x float], i32, i32, i32, i32, i32, [16 x i32], [16 x i32], [16 x i32], [16 x i32], i32, i32, i32, i32, i32, i32, i32 }
-	%struct.sharedEntity_t = type { %struct.entityState_t, %struct.entityShared_t }
-	%struct.trajectory_t = type { i32, i32, i32, [3 x float], [3 x float] }
-	%struct.usercmd_t = type { i32, [3 x i32], i32, i8, i8, i8, i8 }
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-define void @SV_DirectConnect(i64 %from.0.0, i64 %from.0.1, i32 %from.1) {
-entry:
-	%temp = alloca %struct.client_t, align 16		; <%struct.client_t*> [#uses=1]
-	%temp586 = bitcast %struct.client_t* %temp to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* null, i8* %temp586, i32 121596, i32 0 )
-	unreachable
-}
diff --git a/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll b/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll
index f1b8b80..cf96c4c 100644
--- a/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll
+++ b/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll
@@ -3,21 +3,22 @@
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
-	%struct.LongestMember = type { i8, i32 }
-	%struct.MyString = type { i32 }
-	%struct.UnionType = type { %struct.LongestMember }
+
+%struct.LongestMember = type { i8, i32 }
+%struct.MyString = type { i32 }
+%struct.UnionType = type { %struct.LongestMember }
 
 define void @_Z4testP9UnionTypePS0_(%struct.UnionType* %p, %struct.UnionType** %pointerToUnion) {
 entry:
-	%tmp = alloca %struct.UnionType, align 8		; <%struct.UnionType*> [#uses=2]
-	%tmp2 = getelementptr %struct.UnionType* %tmp, i32 0, i32 0, i32 0		; <i8*> [#uses=1]
-	%tmp13 = getelementptr %struct.UnionType* %p, i32 0, i32 0, i32 0		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %tmp2, i8* %tmp13, i32 8, i32 0 )
-	%tmp5 = load %struct.UnionType** %pointerToUnion		; <%struct.UnionType*> [#uses=1]
-	%tmp56 = getelementptr %struct.UnionType* %tmp5, i32 0, i32 0, i32 0		; <i8*> [#uses=1]
-	%tmp7 = getelementptr %struct.UnionType* %tmp, i32 0, i32 0, i32 0		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %tmp56, i8* %tmp7, i32 8, i32 0 )
-	ret void
+  %tmp = alloca %struct.UnionType, align 8
+  %tmp2 = getelementptr %struct.UnionType* %tmp, i32 0, i32 0, i32 0
+  %tmp13 = getelementptr %struct.UnionType* %p, i32 0, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp13, i32 8, i32 0, i1 false)
+  %tmp5 = load %struct.UnionType** %pointerToUnion
+  %tmp56 = getelementptr %struct.UnionType* %tmp5, i32 0, i32 0, i32 0
+  %tmp7 = getelementptr %struct.UnionType* %tmp, i32 0, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp56, i8* %tmp7, i32 8, i32 0, i1 false)
+  ret void
 }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll b/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
index b704727..71ba601 100644
--- a/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
+++ b/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
@@ -4,14 +4,14 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
 
-define void @memtest1(i8* %dst, i8* %src) nounwind  {
+define void @memtest1(i8* %dst, i8* %src) nounwind {
 entry:
-	%temp = alloca [200 x i8]		; <[100 x i8]*> [#uses=2]
-	%temp1 = bitcast [200 x i8]* %temp to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %temp1, i8* %src, i32 200, i32 1 )
-	%temp3 = bitcast [200 x i8]* %temp to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %dst, i8* %temp3, i32 200, i32 1 )
-	ret void
+  %temp = alloca [200 x i8]
+  %temp1 = bitcast [200 x i8]* %temp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %temp1, i8* %src, i32 200, i32 1, i1 false)
+  %temp3 = bitcast [200 x i8]* %temp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %temp3, i32 200, i32 1, i1 false)
+  ret void
 }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll b/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll
index 1df01c1..7cccb19 100644
--- a/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll
+++ b/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll
@@ -2,21 +2,22 @@
 ; PR2423
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
-	%struct.x = type { [1 x i32], i32, i32 }
+
+%struct.x = type { [1 x i32], i32, i32 }
 
 define i32 @b() nounwind {
 entry:
-	%s = alloca %struct.x		; <%struct.x*> [#uses=2]
-	%r = alloca %struct.x		; <%struct.x*> [#uses=2]
-	call i32 @a( %struct.x* %s ) nounwind		; <i32>:0 [#uses=0]
-	%r1 = bitcast %struct.x* %r to i8*		; <i8*> [#uses=1]
-	%s2 = bitcast %struct.x* %s to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %r1, i8* %s2, i32 12, i32 8 )
-	getelementptr %struct.x* %r, i32 0, i32 0, i32 1		; <i32*>:1 [#uses=1]
-	load i32* %1, align 4		; <i32>:2 [#uses=1]
-	ret i32 %2
+  %s = alloca %struct.x
+  %r = alloca %struct.x
+  %0 = call i32 @a(%struct.x* %s) nounwind
+  %r1 = bitcast %struct.x* %r to i8*
+  %s2 = bitcast %struct.x* %s to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %r1, i8* %s2, i32 12, i32 8, i1 false)
+  %1 = getelementptr %struct.x* %r, i32 0, i32 0, i32 1
+  %2 = load i32* %1, align 4
+  ret i32 %2
 }
 
 declare i32 @a(%struct.x*)
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll b/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll
index e32e683..e7a58f1 100644
--- a/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll
+++ b/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll
@@ -18,8 +18,8 @@ entry:
         ; because the type of the first element in %struct.two is i8.
 	%tmpS = getelementptr %struct.two* %S, i32 0, i32 0, i32 0 
 	%tmpD = bitcast %struct.two* %D to i8*
-        call void @llvm.memmove.i32(i8* %tmpD, i8* %tmpS, i32 4, i32 1)
+        call void @llvm.memmove.p0i8.p0i8.i32(i8* %tmpD, i8* %tmpS, i32 4, i32 1, i1 false)
         ret void
 }
 
-declare void @llvm.memmove.i32(i8*, i8*, i32, i32) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+\ No newline at end of file
diff --git a/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll b/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll
index 526457b..3218d59 100644
--- a/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll
+++ b/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll
@@ -12,9 +12,8 @@ entry:
         %0 = getelementptr %struct.st* %s, i32 0, i32 0  ; <i16*> [#uses=1]
         store i16 1, i16* %0, align 4
         %s1 = bitcast %struct.st* %s to i8*  ; <i8*> [#uses=1]
-        call void @llvm.memcpy.i32(i8* %p, i8* %s1, i32 2, i32 1)
+        call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %s1, i32 2, i32 1, i1 false)
         ret void
 }
 
-declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
-
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/2009-03-05-Aggre2Scalar-dbg.ll b/test/Transforms/ScalarRepl/2009-03-05-Aggre2Scalar-dbg.ll
index 50e7f9a..d71bcb9 100644
--- a/test/Transforms/ScalarRepl/2009-03-05-Aggre2Scalar-dbg.ll
+++ b/test/Transforms/ScalarRepl/2009-03-05-Aggre2Scalar-dbg.ll
@@ -1,11 +1,11 @@
 ; RUN: opt < %s -scalarrepl -disable-output -stats |& grep "Number of aggregates converted to scalar"
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.6"
-	type { }		; type %0
-	type { i8*, i32, i32, i16, i16, %2, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %2, %3*, i32, [3 x i8], [1 x i8], %2, i32, i64 }		; type %1
-	type { i8*, i32 }		; type %2
-	type opaque		; type %3
-	type { i32 }		; type %4
+	%0 = type { }		; type %0
+	%1 = type { i8*, i32, i32, i16, i16, %2, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %2, %3*, i32, [3 x i8], [1 x i8], %2, i32, i64 }		; type %1
+	%2 = type { i8*, i32 }		; type %2
+	%3 = type opaque		; type %3
+	%4 = type { i32 }		; type %4
 	%llvm.dbg.anchor.type = type { i32, i32 }
 	%llvm.dbg.basictype.type = type { i32, %0*, i8*, %0*, i32, i64, i64, i64, i32, i32 }
 	%llvm.dbg.compile_unit.type = type { i32, %0*, i32, i8*, i8*, i8*, i1, i1, i8*, i32 }
diff --git a/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll b/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll
index 31d9bae..1993e4f 100644
--- a/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll
+++ b/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll
@@ -13,54 +13,54 @@ define void @test(<8 x i16> %tmp.0, %struct.int16x8x2_t* %dst) nounwind {
 ; CHECK: @test
 ; CHECK-NOT: alloca
 ; CHECK: "alloca point"
+; CHECK: store <8 x i16>
+; CHECK: store <8 x i16>
+
 entry:
-  %tmp_addr = alloca %struct.int16x8_t            ; <%struct.int16x8_t*> [#uses=3]
-  %dst_addr = alloca %struct.int16x8x2_t*         ; <%struct.int16x8x2_t**> [#uses=2]
-  %__rv = alloca %union..0anon                    ; <%union..0anon*> [#uses=2]
-  %__bx = alloca %struct.int16x8_t                ; <%struct.int16x8_t*> [#uses=2]
-  %__ax = alloca %struct.int16x8_t                ; <%struct.int16x8_t*> [#uses=2]
-  %tmp2 = alloca %struct.int16x8x2_t              ; <%struct.int16x8x2_t*> [#uses=2]
-  %0 = alloca %struct.int16x8x2_t                 ; <%struct.int16x8x2_t*> [#uses=2]
-  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  %1 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
+  %tmp_addr = alloca %struct.int16x8_t
+  %dst_addr = alloca %struct.int16x8x2_t*
+  %__rv = alloca %union..0anon
+  %__bx = alloca %struct.int16x8_t
+  %__ax = alloca %struct.int16x8_t
+  %tmp2 = alloca %struct.int16x8x2_t
+  %0 = alloca %struct.int16x8x2_t
+  %"alloca point" = bitcast i32 0 to i32
+  %1 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0
   store <8 x i16> %tmp.0, <8 x i16>* %1
   store %struct.int16x8x2_t* %dst, %struct.int16x8x2_t** %dst_addr
-  %2 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
-  %3 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
-  %4 = load <8 x i16>* %3, align 16               ; <<8 x i16>> [#uses=1]
+  %2 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0
+  %3 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0
+  %4 = load <8 x i16>* %3, align 16
   store <8 x i16> %4, <8 x i16>* %2, align 16
-  %5 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
-  %6 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
-  %7 = load <8 x i16>* %6, align 16               ; <<8 x i16>> [#uses=1]
+  %5 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0
+  %6 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0
+  %7 = load <8 x i16>* %6, align 16
   store <8 x i16> %7, <8 x i16>* %5, align 16
-  %8 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
-  %9 = load <8 x i16>* %8, align 16               ; <<8 x i16>> [#uses=2]
-  %10 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
-  %11 = load <8 x i16>* %10, align 16             ; <<8 x i16>> [#uses=2]
-  %12 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0 ; <%struct.int16x8x2_t*> [#uses=1]
-  %13 = bitcast %struct.int16x8x2_t* %12 to %struct.__neon_int16x8x2_t* ; <%struct.__neon_int16x8x2_t*> [#uses=2]
-  %14 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> ; <<8 x i16>> [#uses=1]
-  %15 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
+  %8 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0
+  %9 = load <8 x i16>* %8, align 16
+  %10 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0
+  %11 = load <8 x i16>* %10, align 16
+  %12 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0
+  %13 = bitcast %struct.int16x8x2_t* %12 to %struct.__neon_int16x8x2_t*
+  %14 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  %15 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 0
   store <8 x i16> %14, <8 x i16>* %15
-  %16 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> ; <<8 x i16>> [#uses=1]
-  %17 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 1 ; <<8 x i16>*> [#uses=1]
+  %16 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  %17 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 1
   store <8 x i16> %16, <8 x i16>* %17
-  %18 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0 ; <%struct.int16x8x2_t*> [#uses=1]
-  %19 = bitcast %struct.int16x8x2_t* %0 to i8*    ; <i8*> [#uses=1]
-  %20 = bitcast %struct.int16x8x2_t* %18 to i8*   ; <i8*> [#uses=1]
-  call void @llvm.memcpy.i32(i8* %19, i8* %20, i32 32, i32 16)
-  %tmp21 = bitcast %struct.int16x8x2_t* %tmp2 to i8* ; <i8*> [#uses=1]
-  %21 = bitcast %struct.int16x8x2_t* %0 to i8*    ; <i8*> [#uses=1]
-  call void @llvm.memcpy.i32(i8* %tmp21, i8* %21, i32 32, i32 16)
-  %22 = load %struct.int16x8x2_t** %dst_addr, align 4 ; <%struct.int16x8x2_t*> [#uses=1]
-  %23 = bitcast %struct.int16x8x2_t* %22 to i8*   ; <i8*> [#uses=1]
-  %tmp22 = bitcast %struct.int16x8x2_t* %tmp2 to i8* ; <i8*> [#uses=1]
-  call void @llvm.memcpy.i32(i8* %23, i8* %tmp22, i32 32, i32 16)
+  %18 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0
+  %19 = bitcast %struct.int16x8x2_t* %0 to i8*
+  %20 = bitcast %struct.int16x8x2_t* %18 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %19, i8* %20, i32 32, i32 16, i1 false)
+  %tmp21 = bitcast %struct.int16x8x2_t* %tmp2 to i8*
+  %21 = bitcast %struct.int16x8x2_t* %0 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp21, i8* %21, i32 32, i32 16, i1 false)
+  %22 = load %struct.int16x8x2_t** %dst_addr, align 4
+  %23 = bitcast %struct.int16x8x2_t* %22 to i8*
+  %tmp22 = bitcast %struct.int16x8x2_t* %tmp2 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %23, i8* %tmp22, i32 32, i32 16, i1 false)
   br label %return
 
-; CHECK: store <8 x i16>
-; CHECK: store <8 x i16>
-
 return:                                           ; preds = %entry
   ret void
 }
@@ -69,21 +69,22 @@ return:                                           ; preds = %entry
 %struct._NSRange = type { i64 }
 
 define void @test_memcpy_self() nounwind {
-; CHECK: @test_memcpy_self
-; CHECK-NOT: alloca
-; CHECK: br i1
 entry:
-  %range = alloca %struct._NSRange                ; <%struct._NSRange*> [#uses=2]
+  %range = alloca %struct._NSRange
   br i1 undef, label %cond.true, label %cond.false
 
 cond.true:                                        ; preds = %entry
-  %tmp3 = bitcast %struct._NSRange* %range to i8* ; <i8*> [#uses=1]
-  %tmp4 = bitcast %struct._NSRange* %range to i8* ; <i8*> [#uses=1]
-  call void @llvm.memcpy.i32(i8* %tmp3, i8* %tmp4, i32 8, i32 8)
+  %tmp3 = bitcast %struct._NSRange* %range to i8*
+  %tmp4 = bitcast %struct._NSRange* %range to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp3, i8* %tmp4, i32 8, i32 8, i1 false)
   ret void
 
 cond.false:                                       ; preds = %entry
   ret void
+
+; CHECK: @test_memcpy_self
+; CHECK-NOT: alloca
+; CHECK: br i1
 }
 
-declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll b/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll
index 3aee399..52df6d5 100644
--- a/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll
+++ b/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll
@@ -3,7 +3,7 @@
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
 
-%struct.test = type { [3 x double ] }
+%struct.test = type { [3 x double] }
 
 define void @test_memcpy_self() nounwind {
 ; CHECK: @test_memcpy_self
@@ -11,8 +11,8 @@ define void @test_memcpy_self() nounwind {
 ; CHECK: ret void
   %1 = alloca %struct.test
   %2 = bitcast %struct.test* %1 to i8*
-  call void @llvm.memcpy.i32(i8* %2, i8* %2, i32 24, i32 4)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %2, i8* %2, i32 24, i32 4, i1 false)
   ret void
 }
 
-declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll b/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll
index 32e67fb..98fa1c6 100644
--- a/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll
+++ b/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll
@@ -10,7 +10,8 @@ target triple = "x86_64-apple-macosx10.7.0"
 
 ; CHECK: main
 ; CHECK-NOT: alloca
-; CHECK: extractelement <2 x float> zeroinitializer
+; CHECK: %[[A:[a-z0-9]*]] = and i128
+; CHECK: %[[B:[a-z0-9]*]] = trunc i128 %[[A]] to i32
 
 define void @main() uwtable ssp {
 entry:
@@ -27,7 +28,8 @@ entry:
 
 ; CHECK: test1
 ; CHECK-NOT: alloca
-; CHECK: extractelement <2 x float> zeroinitializer
+; CHECK: %[[A:[a-z0-9]*]] = and i128
+; CHECK: %[[B:[a-z0-9]*]] = trunc i128 %[[A]] to i32
 
 define void @test1() uwtable ssp {
 entry:
diff --git a/test/Transforms/ScalarRepl/2011-06-17-VectorPartialMemset.ll b/test/Transforms/ScalarRepl/2011-06-17-VectorPartialMemset.ll
new file mode 100644
index 0000000..f8530d6
--- /dev/null
+++ b/test/Transforms/ScalarRepl/2011-06-17-VectorPartialMemset.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+; CHECK: f
+; CHECK-NOT: alloca
+; CHECK: %[[A:[a-z0-9]*]] = and i128 undef, -16777216
+; CHECK: %[[B:[a-z0-9]*]] = bitcast i128 %[[A]] to <4 x float>
+; CHECK: %[[C:[a-z0-9]*]] = extractelement <4 x float> %[[B]], i32 0
+; CHECK: ret float %[[C]]
+
+define float @f() nounwind ssp {
+entry:
+  %a = alloca <4 x float>, align 16
+  %p = bitcast <4 x float>* %a to i8*
+  call void @llvm.memset.p0i8.i32(i8* %p, i8 0, i32 3, i32 16, i1 false)
+  %vec = load <4 x float>* %a, align 8
+  %val = extractelement <4 x float> %vec, i32 0
+  ret float %val
+}
+
+; CHECK: g
+; CHECK-NOT: alloca
+; CHECK: and i128
+
+define void @g() nounwind ssp {
+entry:
+  %a = alloca { <4 x float> }, align 16
+  %p = bitcast { <4 x float> }* %a to i8*
+  call void @llvm.memset.p0i8.i32(i8* %p, i8 0, i32 16, i32 16, i1 false)
+  %q = bitcast { <4 x float> }* %a to [2 x <2 x float>]*
+  %arrayidx = getelementptr inbounds [2 x <2 x float>]* %q, i32 0, i32 0
+  store <2 x float> undef, <2 x float>* %arrayidx, align 8
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/badarray.ll b/test/Transforms/ScalarRepl/badarray.ll
index 3ec3c01..768fec6 100644
--- a/test/Transforms/ScalarRepl/badarray.ll
+++ b/test/Transforms/ScalarRepl/badarray.ll
@@ -48,10 +48,10 @@ entry:
   %callret = call %padded *@test3f() ; <i32> [#uses=2]
   %callretcast = bitcast %padded* %callret to i8*                     ; <i8*> [#uses=1]
   %var_11 = bitcast %padded* %var_1 to i8*        ; <i8*> [#uses=1]
-  call void @llvm.memcpy.i32(i8* %callretcast, i8* %var_11, i32 8, i32 4)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %callretcast, i8* %var_11, i32 8, i32 4, i1 false)
   ret void
 }
 
-declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
 declare %padded* @test3f()
diff --git a/test/Transforms/ScalarRepl/crash.ll b/test/Transforms/ScalarRepl/crash.ll
index 83daaaf..cd4dc32 100644
--- a/test/Transforms/ScalarRepl/crash.ll
+++ b/test/Transforms/ScalarRepl/crash.ll
@@ -143,7 +143,6 @@ entry:
         %struct.anon = type { %struct.aal_spanarray_t }
 
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
 
 define fastcc void @test7() {
 entry:
@@ -158,7 +157,7 @@ cond_next114.i:         ; preds = %cond_true
 
 cond_next:              ; preds = %cond_true
         %SB19 = bitcast %struct.aal_spanbucket_t* %SB to i8*            ; <i8*> [#uses=1]
-        call void @llvm.memcpy.i32( i8* %SB19, i8* null, i32 12, i32 0 )
+        call void @llvm.memcpy.p0i8.p0i8.i32(i8* %SB19, i8* null, i32 12, i32 0, i1 false)
         br i1 false, label %cond_next34, label %cond_next79
 
 cond_next34:            ; preds = %cond_next
@@ -189,14 +188,14 @@ entry:
 
 
 ; rdar://6808691 - ZeroLengthMemSet
-        type <{ i32, i16, i8, i8, i64, i64, i16, [0 x i16] }>           
+        %0 = type <{ i32, i16, i8, i8, i64, i64, i16, [0 x i16] }>           
 
 define i32 @test9() {
 entry:
         %.compoundliteral = alloca %0           
         %tmp228 = getelementptr %0* %.compoundliteral, i32 0, i32 7
         %tmp229 = bitcast [0 x i16]* %tmp228 to i8*             
-        call void @llvm.memset.i64(i8* %tmp229, i8 0, i64 0, i32 2)
+        call void @llvm.memset.p0i8.i64(i8* %tmp229, i8 0, i64 0, i32 2, i1 false)
         unreachable
 }
 
@@ -260,3 +259,6 @@ entry:
   call void %0() nounwind
   ret void
 }
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/memset-aggregate.ll b/test/Transforms/ScalarRepl/memset-aggregate.ll
index 5aeefcd..42e7a0f 100644
--- a/test/Transforms/ScalarRepl/memset-aggregate.ll
+++ b/test/Transforms/ScalarRepl/memset-aggregate.ll
@@ -14,31 +14,29 @@ entry:
 	%L = alloca %struct.foo, align 8		; <%struct.foo*> [#uses=2]
 	%L2 = bitcast %struct.foo* %L to i8*		; <i8*> [#uses=1]
 	%tmp13 = bitcast %struct.foo* %P to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %L2, i8* %tmp13, i32 8, i32 4 )
+        call void @llvm.memcpy.p0i8.p0i8.i32(i8* %L2, i8* %tmp13, i32 8, i32 4, i1 false)
 	%tmp4 = getelementptr %struct.foo* %L, i32 0, i32 0		; <i32*> [#uses=1]
 	%tmp5 = load i32* %tmp4		; <i32> [#uses=1]
 	ret i32 %tmp5
 }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
 
 define i32 @test2() {
 entry:
 	%L = alloca [4 x %struct.foo], align 16		; <[4 x %struct.foo]*> [#uses=2]
 	%L12 = bitcast [4 x %struct.foo]* %L to i8*		; <i8*> [#uses=1]
-	call void @llvm.memset.i32( i8* %L12, i8 0, i32 32, i32 16 )
+        call void @llvm.memset.p0i8.i32(i8* %L12, i8 0, i32 32, i32 16, i1 false)
 	%tmp4 = getelementptr [4 x %struct.foo]* %L, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
 	%tmp5 = load i32* %tmp4		; <i32> [#uses=1]
 	ret i32 %tmp5
 }
 
-declare void @llvm.memset.i32(i8*, i8, i32, i32)
 
 define i32 @test3() {
 entry:
 	%B = alloca %struct.bar, align 16		; <%struct.bar*> [#uses=4]
 	%B1 = bitcast %struct.bar* %B to i8*		; <i8*> [#uses=1]
-	call void @llvm.memset.i32( i8* %B1, i8 1, i32 24, i32 16 )
+	call void @llvm.memset.p0i8.i32(i8* %B1, i8 1, i32 24, i32 16, i1 false)
 	%tmp3 = getelementptr %struct.bar* %B, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
 	store i32 1, i32* %tmp3
 	%tmp4 = getelementptr %struct.bar* %B, i32 0, i32 2		; <double*> [#uses=1]
@@ -58,9 +56,12 @@ entry:
 	store i32 1, i32* %0, align 8
 	%1 = getelementptr %struct.f* %A, i32 0, i32 1		; <i32*> [#uses=1]
 	%2 = bitcast i32* %1 to i8*		; <i8*> [#uses=1]
-	call void @llvm.memset.i32(i8* %2, i8 2, i32 12, i32 4)
+	call void @llvm.memset.p0i8.i32(i8* %2, i8 2, i32 12, i32 4, i1 false)
 	%3 = getelementptr %struct.f* %A, i32 0, i32 2		; <i32*> [#uses=1]
 	%4 = load i32* %3, align 8		; <i32> [#uses=1]
 	%retval12 = trunc i32 %4 to i16		; <i16> [#uses=1]
 	ret i16 %retval12
 }
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+\ No newline at end of file
diff --git a/test/Transforms/SimplifyCFG/2008-04-23-MergeMultipleResultRet.ll b/test/Transforms/SimplifyCFG/2008-04-23-MergeMultipleResultRet.ll
deleted file mode 100644
index 8e05a3c..0000000
--- a/test/Transforms/SimplifyCFG/2008-04-23-MergeMultipleResultRet.ll
+++ /dev/null
@@ -1,43 +0,0 @@
-; RUN: opt < %s -simplifycfg -disable-output
-; rdar://5882392
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-target triple = "x86_64-apple-darwin9"
-	%struct.Py_complex = type { double, double }
-
-define %struct.Py_complex @_Py_c_pow(double %a.0, double %a.1, double %b.0, double %b.1) nounwind  {
-entry:
-	%tmp7 = fcmp une double %b.0, 0.000000e+00		; <i1> [#uses=1]
-	%tmp11 = fcmp une double %b.1, 0.000000e+00		; <i1> [#uses=1]
-	%bothcond = or i1 %tmp7, %tmp11		; <i1> [#uses=1]
-	br i1 %bothcond, label %bb15, label %bb53
-
-bb15:		; preds = %entry
-	%tmp18 = fcmp une double %a.0, 0.000000e+00		; <i1> [#uses=1]
-	%tmp24 = fcmp une double %a.1, 0.000000e+00		; <i1> [#uses=1]
-	%bothcond1 = or i1 %tmp18, %tmp24		; <i1> [#uses=1]
-	br i1 %bothcond1, label %bb29, label %bb27
-
-bb27:		; preds = %bb15
-	%tmp28 = call i32* @__error( ) nounwind 		; <i32*> [#uses=1]
-	store i32 33, i32* %tmp28, align 4
-	ret double undef, double undef
-
-bb29:		; preds = %bb15
-	%tmp36 = fcmp une double %b.1, 0.000000e+00		; <i1> [#uses=1]
-	br i1 %tmp36, label %bb39, label %bb47
-
-bb39:		; preds = %bb29
-	br label %bb47
-
-bb47:		; preds = %bb39, %bb29
-	ret double undef, double undef
-
-bb53:		; preds = %entry
-	ret double undef, double undef
-}
-
-declare i32* @__error()
-
-declare double @pow(double, double) nounwind readonly 
-
-declare double @cos(double) nounwind readonly 
diff --git a/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll b/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll
deleted file mode 100644
index 9c15efc..0000000
--- a/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll
+++ /dev/null
@@ -1,30 +0,0 @@
-; RUN: opt < %s -simplifycfg -disable-output
-; PR2256
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-target triple = "x86_64-pc-mingw32"
-
-define { x86_fp80, x86_fp80 } @catanl({ x86_fp80, x86_fp80 }* byval  %Z, i1 %cond) nounwind  {
-bb:		; preds = %entry
-	br i1  %cond, label %bb48, label %bb40
-
-bb40:		; preds = %bb
-	store i32 34, i32* null, align 4
-	br label %bb196
-
-bb48:		; preds = %bb.bb48_crit_edge, %entry.bb48_crit_edge
-	%tmp53 = icmp eq i32 0, 1280		; <i1> [#uses=1]
-	br i1 %tmp53, label %bb56, label %bb174
-
-bb56:		; preds = %bb48
-	%iftmp.0.0 = select i1 false, x86_fp80 0xK3FFFC90FDAA22168C235, x86_fp80 0xKBFFFC90FDAA22168C235		; <x86_fp80> [#uses=0]
-	br label %bb196
-
-
-bb174:		; preds = %bb144, %bb114
-	%tmp191 = fmul x86_fp80 0xK00000000000000000000, 0xK3FFE8000000000000000		; <x86_fp80> [#uses=1]
-	br label %bb196
-
-bb196:		; preds = %bb174, %bb56, %bb40
-	%Res.1.0 = phi x86_fp80 [ 0xK7FFF8000000000000000, %bb40 ], [ %tmp191, %bb174 ], [ 0xK00000000000000000000, %bb56 ]		; <x86_fp80> [#uses=1]
-	ret x86_fp80 0xK00000000000000000000, x86_fp80 %Res.1.0
-}
diff --git a/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll b/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll
new file mode 100644
index 0000000..1b70c06
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -simplifycfg -S | \
+; RUN:   not grep " switch"
+; PR10131
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+define i32 @t(i32 %m) nounwind readnone {
+entry:
+  switch i32 %m, label %sw.bb4 [
+    i32 0, label %sw.bb0
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+    i32 3, label %sw.bb3
+  ]
+
+sw.bb0:                                           ; preds = %entry
+  br label %return
+
+sw.bb1:                                           ; preds = %entry
+  br label %return
+
+sw.bb2:                                           ; preds = %entry
+  br label %return
+
+sw.bb3:                                           ; preds = %entry
+  br label %return
+
+sw.bb4:                                           ; preds = %entry
+  br label %return
+
+return:                                           ; preds = %entry, %sw.bb4, %sw.bb3, %sw.bb2, %sw.bb1
+  %retval.0 = phi i32 [ 4, %sw.bb4 ], [ 3, %sw.bb3 ], [ 2, %sw.bb2 ], [ 1, %sw.bb1 ], [ 0, %sw.bb0 ]
+  ret i32 %retval.0
+}
diff --git a/test/Transforms/SimplifyCFG/switch_switch_fold_dbginfo.ll b/test/Transforms/SimplifyCFG/switch_switch_fold_dbginfo.ll
index 7d7391a..343e169 100644
--- a/test/Transforms/SimplifyCFG/switch_switch_fold_dbginfo.ll
+++ b/test/Transforms/SimplifyCFG/switch_switch_fold_dbginfo.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -simplifycfg -S | \
-; RUN:   grep switch | count 1
+; RUN:   not grep " switch"
 
 ; ModuleID = '<stdin>'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/Transforms/SimplifyLibCalls/MemCpy.ll b/test/Transforms/SimplifyLibCalls/MemCpy.ll
index 39662b1..c711178 100644
--- a/test/Transforms/SimplifyLibCalls/MemCpy.ll
+++ b/test/Transforms/SimplifyLibCalls/MemCpy.ll
@@ -4,17 +4,16 @@
 @hel = constant [4 x i8] c"hel\00"		; <[4 x i8]*> [#uses=1]
 @hello_u = constant [8 x i8] c"hello_u\00"		; <[8 x i8]*> [#uses=1]
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
 define i32 @main() {
-	%h_p = getelementptr [2 x i8]* @h, i32 0, i32 0		; <i8*> [#uses=1]
-	%hel_p = getelementptr [4 x i8]* @hel, i32 0, i32 0		; <i8*> [#uses=1]
-	%hello_u_p = getelementptr [8 x i8]* @hello_u, i32 0, i32 0		; <i8*> [#uses=1]
-	%target = alloca [1024 x i8]		; <[1024 x i8]*> [#uses=1]
-	%target_p = getelementptr [1024 x i8]* %target, i32 0, i32 0		; <i8*> [#uses=3]
-	call void @llvm.memcpy.i32( i8* %target_p, i8* %h_p, i32 2, i32 2 )
-	call void @llvm.memcpy.i32( i8* %target_p, i8* %hel_p, i32 4, i32 4 )
-	call void @llvm.memcpy.i32( i8* %target_p, i8* %hello_u_p, i32 8, i32 8 )
-	ret i32 0
+  %h_p = getelementptr [2 x i8]* @h, i32 0, i32 0
+  %hel_p = getelementptr [4 x i8]* @hel, i32 0, i32 0
+  %hello_u_p = getelementptr [8 x i8]* @hello_u, i32 0, i32 0
+  %target = alloca [1024 x i8]
+  %target_p = getelementptr [1024 x i8]* %target, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %target_p, i8* %h_p, i32 2, i32 2, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %target_p, i8* %hel_p, i32 4, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %target_p, i8* %hello_u_p, i32 8, i32 8, i1 false)
+  ret i32 0
 }
 
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/TailCallElim/inf-recursion.ll b/test/Transforms/TailCallElim/inf-recursion.ll
index e4ac928..c427869 100644
--- a/test/Transforms/TailCallElim/inf-recursion.ll
+++ b/test/Transforms/TailCallElim/inf-recursion.ll
@@ -30,5 +30,4 @@ define float @fabsf(float %f) {
         ret float %t
 }
 
-declare float @fabsf(float %f)
 declare x86_fp80 @fabsl(x86_fp80 %f)
diff --git a/test/Unit/lit.cfg b/test/Unit/lit.cfg
index 3509296..ba4cbc5 100644
--- a/test/Unit/lit.cfg
+++ b/test/Unit/lit.cfg
@@ -30,14 +30,6 @@ if 'TEMP' in os.environ:
 
 ###
 
-# If necessary, point the dynamic loader at libLLVM.so.
-if config.enable_shared:
-    shlibpath = config.environment.get(config.shlibpath_var,'')
-    if shlibpath:
-        shlibpath = os.pathsep + shlibpath
-    shlibpath = config.shlibdir + shlibpath
-    config.environment[config.shlibpath_var] = shlibpath
-
 # Check that the object root is known.
 if config.test_exec_root is None:
     # Otherwise, we haven't loaded the site specific configuration (the user is
@@ -81,3 +73,11 @@ if config.test_exec_root is None:
     lit.note('using out-of-tree build at %r' % llvm_obj_root)
     lit.load_config(config, site_cfg)
     raise SystemExit
+
+# If necessary, point the dynamic loader at libLLVM.so.
+if config.enable_shared:
+    shlibpath = config.environment.get(config.shlibpath_var,'')
+    if shlibpath:
+        shlibpath = os.pathsep + shlibpath
+    shlibpath = config.shlibdir + shlibpath
+    config.environment[config.shlibpath_var] = shlibpath
diff --git a/test/Verifier/2002-04-13-RetTypes.ll b/test/Verifier/2002-04-13-RetTypes.ll
index 197f5c2..d6e1a54 100644
--- a/test/Verifier/2002-04-13-RetTypes.ll
+++ b/test/Verifier/2002-04-13-RetTypes.ll
@@ -4,7 +4,6 @@
 ; delcared return type of the function they live in.
 ;
 
-define i32 @testfunc()
-begin
+define i32 @testfunc() {
 	ret i32* null
-end
+}
diff --git a/test/lit.cfg b/test/lit.cfg
index 9a2f74c..ef56473 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -306,3 +306,6 @@ else:
 
 if loadable_module:
     config.available_features.add('loadable_module')
+
+if config.enable_assertions:
+    config.available_features.add('asserts')
diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in
index a61920f..6c33831 100644
--- a/test/lit.site.cfg.in
+++ b/test/lit.site.cfg.in
@@ -5,9 +5,9 @@ config.llvm_obj_root = "@LLVM_BINARY_DIR@"
 config.llvm_tools_dir = "@LLVM_TOOLS_DIR@"
 config.llvmgcc_dir = "@LLVMGCCDIR@"
 config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@"
-config.llvm_build_modes = "@LLVM_BUILD_MODE@".split('+')
 config.python_executable = "@PYTHON_EXECUTABLE@"
 config.enable_shared = @ENABLE_SHARED@
+config.enable_assertions = @LLVM_ENABLE_ASSERTIONS@
 
 # Support substitution of the tools_dir with user parameters. This is
 # used when we can't determine the tool dir at configuration time.
diff --git a/tools/bugpoint/ExtractFunction.cpp b/tools/bugpoint/ExtractFunction.cpp
index 593765c..e22841a 100644
--- a/tools/bugpoint/ExtractFunction.cpp
+++ b/tools/bugpoint/ExtractFunction.cpp
@@ -175,13 +175,16 @@ void llvm::DeleteFunctionBody(Function *F) {
 static Constant *GetTorInit(std::vector<std::pair<Function*, int> > &TorList) {
   assert(!TorList.empty() && "Don't create empty tor list!");
   std::vector<Constant*> ArrayElts;
+  const Type *Int32Ty = Type::getInt32Ty(TorList[0].first->getContext());
+  
+  const StructType *STy =
+    StructType::get(Int32Ty, TorList[0].first->getType(), NULL);
   for (unsigned i = 0, e = TorList.size(); i != e; ++i) {
-    std::vector<Constant*> Elts;
-    Elts.push_back(ConstantInt::get(
-          Type::getInt32Ty(TorList[i].first->getContext()), TorList[i].second));
-    Elts.push_back(TorList[i].first);
-    ArrayElts.push_back(ConstantStruct::get(TorList[i].first->getContext(),
-                                            Elts, false));
+    Constant *Elts[] = {
+      ConstantInt::get(Int32Ty, TorList[i].second),
+      TorList[i].first
+    };
+    ArrayElts.push_back(ConstantStruct::get(STy, Elts));
   }
   return ConstantArray::get(ArrayType::get(ArrayElts[0]->getType(), 
                                            ArrayElts.size()),
diff --git a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
index 9a92671..6310d8b 100644
--- a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
+++ b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
@@ -230,23 +230,17 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
     case bitc::FUNC_CODE_INST_UNREACHABLE:  return "INST_UNREACHABLE";
 
     case bitc::FUNC_CODE_INST_PHI:          return "INST_PHI";
-    case bitc::FUNC_CODE_INST_MALLOC:       return "INST_MALLOC";
-    case bitc::FUNC_CODE_INST_FREE:         return "INST_FREE";
     case bitc::FUNC_CODE_INST_ALLOCA:       return "INST_ALLOCA";
     case bitc::FUNC_CODE_INST_LOAD:         return "INST_LOAD";
-    case bitc::FUNC_CODE_INST_STORE:        return "INST_STORE";
-    case bitc::FUNC_CODE_INST_CALL:         return "INST_CALL";
     case bitc::FUNC_CODE_INST_VAARG:        return "INST_VAARG";
-    case bitc::FUNC_CODE_INST_STORE2:       return "INST_STORE2";
-    case bitc::FUNC_CODE_INST_GETRESULT:    return "INST_GETRESULT";
+    case bitc::FUNC_CODE_INST_STORE:        return "INST_STORE";
     case bitc::FUNC_CODE_INST_EXTRACTVAL:   return "INST_EXTRACTVAL";
     case bitc::FUNC_CODE_INST_INSERTVAL:    return "INST_INSERTVAL";
     case bitc::FUNC_CODE_INST_CMP2:         return "INST_CMP2";
     case bitc::FUNC_CODE_INST_VSELECT:      return "INST_VSELECT";
-    case bitc::FUNC_CODE_DEBUG_LOC:         return "DEBUG_LOC";
     case bitc::FUNC_CODE_DEBUG_LOC_AGAIN:   return "DEBUG_LOC_AGAIN";
-    case bitc::FUNC_CODE_INST_CALL2:        return "INST_CALL2";
-    case bitc::FUNC_CODE_DEBUG_LOC2:        return "DEBUG_LOC2";
+    case bitc::FUNC_CODE_INST_CALL:         return "INST_CALL";
+    case bitc::FUNC_CODE_DEBUG_LOC:         return "DEBUG_LOC";
     }
   case bitc::TYPE_SYMTAB_BLOCK_ID:
     switch (CodeID) {
@@ -262,22 +256,17 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
   case bitc::METADATA_ATTACHMENT_ID:
     switch(CodeID) {
     default:return 0;
-    case bitc::METADATA_ATTACHMENT:  return "METADATA_ATTACHMENT";
-    case bitc::METADATA_ATTACHMENT2: return "METADATA_ATTACHMENT2";
+    case bitc::METADATA_ATTACHMENT: return "METADATA_ATTACHMENT";
     }
   case bitc::METADATA_BLOCK_ID:
     switch(CodeID) {
     default:return 0;
     case bitc::METADATA_STRING:      return "METADATA_STRING";
+    case bitc::METADATA_NAME:        return "METADATA_NAME";
+    case bitc::METADATA_KIND:        return "METADATA_KIND";
     case bitc::METADATA_NODE:        return "METADATA_NODE";
     case bitc::METADATA_FN_NODE:     return "METADATA_FN_NODE";
-    case bitc::METADATA_NAME:        return "METADATA_NAME";
     case bitc::METADATA_NAMED_NODE:  return "METADATA_NAMED_NODE";
-    case bitc::METADATA_KIND:        return "METADATA_KIND";
-    case bitc::METADATA_ATTACHMENT:  return "METADATA_ATTACHMENT";
-    case bitc::METADATA_NODE2:       return "METADATA_NODE2";
-    case bitc::METADATA_FN_NODE2:    return "METADATA_FN_NODE2";
-    case bitc::METADATA_NAMED_NODE2: return "METADATA_NAMED_NODE2";
     }
   }
 }
diff --git a/tools/llvm-dis/llvm-dis.cpp b/tools/llvm-dis/llvm-dis.cpp
index 7453d9e..9020a52 100644
--- a/tools/llvm-dis/llvm-dis.cpp
+++ b/tools/llvm-dis/llvm-dis.cpp
@@ -74,7 +74,7 @@ public:
     if (!V.getType()->isVoidTy()) {
       OS.PadToColumn(50);
       Padded = true;
-      OS << "; [#uses=" << V.getNumUses() << " type=" << V.getType()->getDescription() << "]";  // Output # uses and type
+      OS << "; [#uses=" << V.getNumUses() << " type=" << *V.getType() << "]";  // Output # uses and type
     }
     if (const Instruction *I = dyn_cast<Instruction>(&V)) {
       const DebugLoc &DL = I->getDebugLoc();
diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp
index eb23a1a..077f0e6 100644
--- a/tools/llvm-mc/llvm-mc.cpp
+++ b/tools/llvm-mc/llvm-mc.cpp
@@ -97,7 +97,7 @@ IncludeDirs("I", cl::desc("Directory of include files"),
 
 static cl::opt<std::string>
 ArchName("arch", cl::desc("Target arch to assemble for, "
-                            "see -version for available targets"));
+                          "see -version for available targets"));
 
 static cl::opt<std::string>
 TripleName("triple", cl::desc("Target triple to assemble for, "
@@ -110,12 +110,11 @@ MCPU("mcpu",
      cl::init(""));
 
 static cl::opt<bool>
-NoInitialTextSection("n", cl::desc(
-                   "Don't assume assembly file starts in the text section"));
+NoInitialTextSection("n", cl::desc("Don't assume assembly file starts "
+                                   "in the text section"));
 
 static cl::opt<bool>
-SaveTempLabels("L", cl::desc(
-                 "Don't discard temporary labels"));
+SaveTempLabels("L", cl::desc("Don't discard temporary labels"));
 
 enum ActionType {
   AC_AsLex,
diff --git a/tools/lto/LTOModule.cpp b/tools/lto/LTOModule.cpp
index 8f2b1f4..c899555 100644
--- a/tools/lto/LTOModule.cpp
+++ b/tools/lto/LTOModule.cpp
@@ -208,7 +208,7 @@ void LTOModule::addObjCClass(GlobalVariable *clgv) {
     if (objcClassNameFromExpression(c->getOperand(1), superclassName)) {
       NameAndAttributes info;
       StringMap<NameAndAttributes>::value_type &entry =
-        _undefines.GetOrCreateValue(superclassName.c_str());
+        _undefines.GetOrCreateValue(superclassName);
       if (!entry.getValue().name) {
         const char *symbolName = entry.getKey().data();
         info.name = symbolName;
@@ -220,7 +220,7 @@ void LTOModule::addObjCClass(GlobalVariable *clgv) {
     std::string className;
     if (objcClassNameFromExpression(c->getOperand(2), className)) {
       StringSet::value_type &entry =
-        _defines.GetOrCreateValue(className.c_str());
+        _defines.GetOrCreateValue(className);
       entry.setValue(1);
       NameAndAttributes info;
       info.name = entry.getKey().data();
@@ -243,7 +243,7 @@ void LTOModule::addObjCCategory(GlobalVariable *clgv) {
       NameAndAttributes info;
 
       StringMap<NameAndAttributes>::value_type &entry =
-        _undefines.GetOrCreateValue(targetclassName.c_str());
+        _undefines.GetOrCreateValue(targetclassName);
 
       if (entry.getValue().name)
         return;
@@ -264,7 +264,7 @@ void LTOModule::addObjCClassRef(GlobalVariable *clgv) {
     NameAndAttributes info;
 
     StringMap<NameAndAttributes>::value_type &entry =
-      _undefines.GetOrCreateValue(targetclassName.c_str());
+      _undefines.GetOrCreateValue(targetclassName);
     if (entry.getValue().name)
       return;
 
@@ -375,7 +375,7 @@ void LTOModule::addDefinedSymbol(GlobalValue *def, Mangler &mangler,
 
   // add to table of symbols
   NameAndAttributes info;
-  StringSet::value_type &entry = _defines.GetOrCreateValue(Buffer.c_str());
+  StringSet::value_type &entry = _defines.GetOrCreateValue(Buffer);
   entry.setValue(1);
 
   StringRef Name = entry.getKey();
@@ -436,7 +436,7 @@ void LTOModule::addPotentialUndefinedSymbol(GlobalValue *decl,
   mangler.getNameWithPrefix(name, decl, false);
 
   StringMap<NameAndAttributes>::value_type &entry =
-    _undefines.GetOrCreateValue(name.c_str());
+    _undefines.GetOrCreateValue(name);
 
   // we already have the symbol
   if (entry.getValue().name)
diff --git a/unittests/Support/ConstantRangeTest.cpp b/unittests/Support/ConstantRangeTest.cpp
index 161e2cf..742bcb4 100644
--- a/unittests/Support/ConstantRangeTest.cpp
+++ b/unittests/Support/ConstantRangeTest.cpp
@@ -299,6 +299,8 @@ TEST_F(ConstantRangeTest, Sub) {
   EXPECT_EQ(Empty.sub(APInt(16, 4)), Empty);
   EXPECT_EQ(Some.sub(APInt(16, 4)),
             ConstantRange(APInt(16, 0x6), APInt(16, 0xaa6)));
+  EXPECT_EQ(Some.sub(Some),
+            ConstantRange(APInt(16, 0xf561), APInt(16, 0xaa0)));
   EXPECT_EQ(Wrap.sub(APInt(16, 4)),
             ConstantRange(APInt(16, 0xaa6), APInt(16, 0x6)));
   EXPECT_EQ(One.sub(APInt(16, 4)),
diff --git a/unittests/Support/TypeBuilderTest.cpp b/unittests/Support/TypeBuilderTest.cpp
index 5a82883..bd19c65 100644
--- a/unittests/Support/TypeBuilderTest.cpp
+++ b/unittests/Support/TypeBuilderTest.cpp
@@ -231,19 +231,19 @@ public:
 namespace {
 
 TEST(TypeBuilderTest, Extensions) {
-  EXPECT_EQ(PointerType::getUnqual(StructType::get(getGlobalContext(), 
+  EXPECT_EQ(PointerType::getUnqual(StructType::get(
                                      TypeBuilder<int, false>::get(getGlobalContext()),
                                      TypeBuilder<int*, false>::get(getGlobalContext()),
                                      TypeBuilder<void*[], false>::get(getGlobalContext()),
                                      NULL)),
             (TypeBuilder<MyType*, false>::get(getGlobalContext())));
-  EXPECT_EQ(PointerType::getUnqual(StructType::get(getGlobalContext(), 
+  EXPECT_EQ(PointerType::getUnqual(StructType::get(
                                      TypeBuilder<types::i<32>, false>::get(getGlobalContext()),
                                      TypeBuilder<types::i<32>*, false>::get(getGlobalContext()),
                                      TypeBuilder<types::i<8>*[], false>::get(getGlobalContext()),
                                      NULL)),
             (TypeBuilder<MyPortableType*, false>::get(getGlobalContext())));
-  EXPECT_EQ(PointerType::getUnqual(StructType::get(getGlobalContext(), 
+  EXPECT_EQ(PointerType::getUnqual(StructType::get(
                                      TypeBuilder<types::i<32>, false>::get(getGlobalContext()),
                                      TypeBuilder<types::i<32>*, false>::get(getGlobalContext()),
                                      TypeBuilder<types::i<8>*[], false>::get(getGlobalContext()),
diff --git a/unittests/VMCore/DerivedTypesTest.cpp b/unittests/VMCore/DerivedTypesTest.cpp
index 9562157..d0ba026 100644
--- a/unittests/VMCore/DerivedTypesTest.cpp
+++ b/unittests/VMCore/DerivedTypesTest.cpp
@@ -60,7 +60,7 @@ static void PR7658() {
     v2.push_back(ConstantPointerNull::get(p4));
   }
 
-  WeakVH CS = ConstantStruct::get(ctx, v2, false); // { i32 14, opaque* null, opaque* null}
+  WeakVH CS = ConstantStruct::getAnon(ctx, v2, false); // { i32 14, opaque* null, opaque* null}
 
   StructType *s2 = StructType::get(ctx, t2);
   PATypeHolder h2(s2);
diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp
index 1d7a67b..a05867b 100644
--- a/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/utils/TableGen/AsmMatcherEmitter.cpp
@@ -98,6 +98,7 @@
 
 #include "AsmMatcherEmitter.h"
 #include "CodeGenTarget.h"
+#include "Error.h"
 #include "Record.h"
 #include "StringMatcher.h"
 #include "llvm/ADT/OwningPtr.h"
@@ -886,7 +887,8 @@ AsmMatcherInfo::getOperandClass(const CGIOperandList::OperandInfo &OI,
 
 void AsmMatcherInfo::
 BuildRegisterClasses(SmallPtrSet<Record*, 16> &SingletonRegisters) {
-  const std::vector<CodeGenRegister> &Registers = Target.getRegisters();
+  const std::vector<CodeGenRegister*> &Registers =
+    Target.getRegBank().getRegisters();
   const std::vector<CodeGenRegisterClass> &RegClassList =
     Target.getRegisterClasses();
 
@@ -910,9 +912,9 @@ BuildRegisterClasses(SmallPtrSet<Record*, 16> &SingletonRegisters) {
   // a unique register set class), and build the mapping of registers to the set
   // they should classify to.
   std::map<Record*, std::set<Record*> > RegisterMap;
-  for (std::vector<CodeGenRegister>::const_iterator it = Registers.begin(),
+  for (std::vector<CodeGenRegister*>::const_iterator it = Registers.begin(),
          ie = Registers.end(); it != ie; ++it) {
-    const CodeGenRegister &CGR = *it;
+    const CodeGenRegister &CGR = **it;
     // Compute the intersection of all sets containing this register.
     std::set<Record*> ContainingSet;
 
@@ -1745,14 +1747,16 @@ static void EmitMatchRegisterName(CodeGenTarget &Target, Record *AsmParser,
                                   raw_ostream &OS) {
   // Construct the match list.
   std::vector<StringMatcher::StringPair> Matches;
-  for (unsigned i = 0, e = Target.getRegisters().size(); i != e; ++i) {
-    const CodeGenRegister &Reg = Target.getRegisters()[i];
-    if (Reg.TheDef->getValueAsString("AsmName").empty())
+  const std::vector<CodeGenRegister*> &Regs =
+    Target.getRegBank().getRegisters();
+  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+    const CodeGenRegister *Reg = Regs[i];
+    if (Reg->TheDef->getValueAsString("AsmName").empty())
       continue;
 
     Matches.push_back(StringMatcher::StringPair(
-                                        Reg.TheDef->getValueAsString("AsmName"),
-                                        "return " + utostr(i + 1) + ";"));
+                                     Reg->TheDef->getValueAsString("AsmName"),
+                                     "return " + utostr(Reg->EnumValue) + ";"));
   }
 
   OS << "static unsigned MatchRegisterName(StringRef Name) {\n";
diff --git a/utils/TableGen/AsmWriterEmitter.cpp b/utils/TableGen/AsmWriterEmitter.cpp
index 818053a..066e03d 100644
--- a/utils/TableGen/AsmWriterEmitter.cpp
+++ b/utils/TableGen/AsmWriterEmitter.cpp
@@ -14,6 +14,7 @@
 
 #include "AsmWriterEmitter.h"
 #include "AsmWriterInst.h"
+#include "Error.h"
 #include "CodeGenTarget.h"
 #include "Record.h"
 #include "StringToOffsetTable.h"
@@ -462,7 +463,8 @@ void AsmWriterEmitter::EmitGetRegisterName(raw_ostream &O) {
   CodeGenTarget Target(Records);
   Record *AsmWriter = Target.getAsmWriter();
   std::string ClassName = AsmWriter->getValueAsString("AsmWriterClassName");
-  const std::vector<CodeGenRegister> &Registers = Target.getRegisters();
+  const std::vector<CodeGenRegister*> &Registers =
+    Target.getRegBank().getRegisters();
 
   StringToOffsetTable StringTable;
   O <<
@@ -476,7 +478,7 @@ void AsmWriterEmitter::EmitGetRegisterName(raw_ostream &O) {
   << "\n"
   << "  static const unsigned RegAsmOffset[] = {";
   for (unsigned i = 0, e = Registers.size(); i != e; ++i) {
-    const CodeGenRegister &Reg = Registers[i];
+    const CodeGenRegister &Reg = *Registers[i];
 
     std::string AsmName = Reg.TheDef->getValueAsString("AsmName");
     if (AsmName.empty())
diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt
index a24c921..957045e 100644
--- a/utils/TableGen/CMakeLists.txt
+++ b/utils/TableGen/CMakeLists.txt
@@ -25,6 +25,7 @@ add_llvm_utility(tblgen
   DAGISelMatcher.cpp
   DisassemblerEmitter.cpp
   EDEmitter.cpp
+  Error.cpp
   FastISelEmitter.cpp
   FixedLenDecoderEmitter.cpp
   InstrEnumEmitter.cpp
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
index a08cde6..1930a96 100644
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodeGenDAGPatterns.h"
+#include "Error.h"
 #include "Record.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/STLExtras.h"
diff --git a/utils/TableGen/CodeGenInstruction.cpp b/utils/TableGen/CodeGenInstruction.cpp
index 73fe916..e9557d2 100644
--- a/utils/TableGen/CodeGenInstruction.cpp
+++ b/utils/TableGen/CodeGenInstruction.cpp
@@ -13,6 +13,7 @@
 
 #include "CodeGenInstruction.h"
 #include "CodeGenTarget.h"
+#include "Error.h"
 #include "Record.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
diff --git a/utils/TableGen/CodeGenRegisters.cpp b/utils/TableGen/CodeGenRegisters.cpp
index 37952fc..1acf3a8 100644
--- a/utils/TableGen/CodeGenRegisters.cpp
+++ b/utils/TableGen/CodeGenRegisters.cpp
@@ -14,6 +14,7 @@
 
 #include "CodeGenRegisters.h"
 #include "CodeGenTarget.h"
+#include "Error.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 
@@ -151,6 +152,105 @@ CodeGenRegister::addSubRegsPreOrder(SetVector<CodeGenRegister*> &OSet) const {
 }
 
 //===----------------------------------------------------------------------===//
+//                               RegisterTuples
+//===----------------------------------------------------------------------===//
+
+// A RegisterTuples def is used to generate pseudo-registers from lists of
+// sub-registers. We provide a SetTheory expander class that returns the new
+// registers.
+namespace {
+struct TupleExpander : SetTheory::Expander {
+  void expand(SetTheory &ST, Record *Def, SetTheory::RecSet &Elts) {
+    std::vector<Record*> Indices = Def->getValueAsListOfDefs("SubRegIndices");
+    unsigned Dim = Indices.size();
+    ListInit *SubRegs = Def->getValueAsListInit("SubRegs");
+    if (Dim != SubRegs->getSize())
+      throw TGError(Def->getLoc(), "SubRegIndices and SubRegs size mismatch");
+    if (Dim < 2)
+      throw TGError(Def->getLoc(), "Tuples must have at least 2 sub-registers");
+
+    // Evaluate the sub-register lists to be zipped.
+    unsigned Length = ~0u;
+    SmallVector<SetTheory::RecSet, 4> Lists(Dim);
+    for (unsigned i = 0; i != Dim; ++i) {
+      ST.evaluate(SubRegs->getElement(i), Lists[i]);
+      Length = std::min(Length, unsigned(Lists[i].size()));
+    }
+
+    if (Length == 0)
+      return;
+
+    // Precompute some types.
+    Record *RegisterCl = Def->getRecords().getClass("Register");
+    RecTy *RegisterRecTy = new RecordRecTy(RegisterCl);
+    StringInit *BlankName = new StringInit("");
+
+    // Zip them up.
+    for (unsigned n = 0; n != Length; ++n) {
+      std::string Name;
+      Record *Proto = Lists[0][n];
+      std::vector<Init*> Tuple;
+      unsigned CostPerUse = 0;
+      for (unsigned i = 0; i != Dim; ++i) {
+        Record *Reg = Lists[i][n];
+        if (i) Name += '_';
+        Name += Reg->getName();
+        Tuple.push_back(new DefInit(Reg));
+        CostPerUse = std::max(CostPerUse,
+                              unsigned(Reg->getValueAsInt("CostPerUse")));
+      }
+
+      // Create a new Record representing the synthesized register. This record
+      // is only for consumption by CodeGenRegister, it is not added to the
+      // RecordKeeper.
+      Record *NewReg = new Record(Name, Def->getLoc(), Def->getRecords());
+      Elts.insert(NewReg);
+
+      // Copy Proto super-classes.
+      for (unsigned i = 0, e = Proto->getSuperClasses().size(); i != e; ++i)
+        NewReg->addSuperClass(Proto->getSuperClasses()[i]);
+
+      // Copy Proto fields.
+      for (unsigned i = 0, e = Proto->getValues().size(); i != e; ++i) {
+        RecordVal RV = Proto->getValues()[i];
+
+        // Replace the sub-register list with Tuple.
+        if (RV.getName() == "SubRegs")
+          RV.setValue(new ListInit(Tuple, RegisterRecTy));
+
+        // Provide a blank AsmName. MC hacks are required anyway.
+        if (RV.getName() == "AsmName")
+          RV.setValue(BlankName);
+
+        // CostPerUse is aggregated from all Tuple members.
+        if (RV.getName() == "CostPerUse")
+          RV.setValue(new IntInit(CostPerUse));
+
+        // Copy fields from the RegisterTuples def.
+        if (RV.getName() == "SubRegIndices" ||
+            RV.getName() == "CompositeIndices") {
+          NewReg->addValue(*Def->getValue(RV.getName()));
+          continue;
+        }
+
+        // Some fields get their default uninitialized value.
+        if (RV.getName() == "DwarfNumbers" ||
+            RV.getName() == "DwarfAlias" ||
+            RV.getName() == "Aliases") {
+          if (const RecordVal *DefRV = RegisterCl->getValue(RV.getName()))
+            NewReg->addValue(*DefRV);
+          continue;
+        }
+
+        // Everything else is copied from Proto.
+        NewReg->addValue(RV);
+      }
+    }
+  }
+};
+}
+
+//===----------------------------------------------------------------------===//
 //                            CodeGenRegisterClass
 //===----------------------------------------------------------------------===//
 
@@ -172,10 +272,28 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R)
   }
   assert(!VTs.empty() && "RegisterClass must contain at least one ValueType!");
 
+  // Default allocation order always contains all registers.
   Elements = RegBank.getSets().expand(R);
   for (unsigned i = 0, e = Elements->size(); i != e; ++i)
     Members.insert(RegBank.getReg((*Elements)[i]));
 
+  // Alternative allocation orders may be subsets.
+  ListInit *Alts = R->getValueAsListInit("AltOrders");
+  AltOrders.resize(Alts->size());
+  SetTheory::RecSet Order;
+  for (unsigned i = 0, e = Alts->size(); i != e; ++i) {
+    RegBank.getSets().evaluate(Alts->getElement(i), Order);
+    AltOrders[i].append(Order.begin(), Order.end());
+    // Verify that all altorder members are regclass members.
+    while (!Order.empty()) {
+      CodeGenRegister *Reg = RegBank.getReg(Order.back());
+      Order.pop_back();
+      if (!contains(Reg))
+        throw TGError(R->getLoc(), " AltOrder register " + Reg->getName() +
+                      " is not a class member");
+    }
+  }
+
   // SubRegClasses is a list<dag> containing (RC, subregindex, ...) dags.
   ListInit *SRC = R->getValueAsListInit("SubRegClasses");
   for (ListInit::const_iterator i = SRC->begin(), e = SRC->end(); i != e; ++i) {
@@ -207,8 +325,7 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R)
   SpillAlignment = R->getValueAsInt("Alignment");
   CopyCost = R->getValueAsInt("CopyCost");
   Allocatable = R->getValueAsBit("isAllocatable");
-  MethodBodies = R->getValueAsCode("MethodBodies");
-  MethodProtos = R->getValueAsCode("MethodProtos");
+  AltOrderSelect = R->getValueAsCode("AltOrderSelect");
 }
 
 bool CodeGenRegisterClass::contains(const CodeGenRegister *Reg) const {
@@ -228,7 +345,8 @@ bool CodeGenRegisterClass::hasSubClass(const CodeGenRegisterClass *RC) const {
   return SpillAlignment && RC->SpillAlignment % SpillAlignment == 0 &&
     SpillSize <= RC->SpillSize &&
     std::includes(Members.begin(), Members.end(),
-                  RC->Members.begin(), RC->Members.end());
+                  RC->Members.begin(), RC->Members.end(),
+                  CodeGenRegister::Less());
 }
 
 const std::string &CodeGenRegisterClass::getName() const {
@@ -240,8 +358,9 @@ const std::string &CodeGenRegisterClass::getName() const {
 //===----------------------------------------------------------------------===//
 
 CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) : Records(Records) {
-  // Configure register Sets to understand register classes.
+  // Configure register Sets to understand register classes and tuples.
   Sets.addFieldExpander("RegisterClass", "MemberList");
+  Sets.addExpander("RegisterTuples", new TupleExpander());
 
   // Read in the user-defined (named) sub-register indices.
   // More indices will be synthesized later.
@@ -255,7 +374,16 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) : Records(Records) {
   Registers.reserve(Regs.size());
   // Assign the enumeration values.
   for (unsigned i = 0, e = Regs.size(); i != e; ++i)
-    Registers.push_back(CodeGenRegister(Regs[i], i + 1));
+    getReg(Regs[i]);
+
+  // Expand tuples and number the new registers.
+  std::vector<Record*> Tups =
+    Records.getAllDerivedDefinitions("RegisterTuples");
+  for (unsigned i = 0, e = Tups.size(); i != e; ++i) {
+    const std::vector<Record*> *TupRegs = Sets.expand(Tups[i]);
+    for (unsigned j = 0, je = TupRegs->size(); j != je; ++j)
+      getReg((*TupRegs)[j]);
+  }
 
   // Read in register class definitions.
   std::vector<Record*> RCs = Records.getAllDerivedDefinitions("RegisterClass");
@@ -268,14 +396,12 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) : Records(Records) {
 }
 
 CodeGenRegister *CodeGenRegBank::getReg(Record *Def) {
-  if (Def2Reg.empty())
-    for (unsigned i = 0, e = Registers.size(); i != e; ++i)
-      Def2Reg[Registers[i].TheDef] = &Registers[i];
-
-  if (CodeGenRegister *Reg = Def2Reg[Def])
+  CodeGenRegister *&Reg = Def2Reg[Def];
+  if (Reg)
     return Reg;
-
-  throw TGError(Def->getLoc(), "Not a known Register!");
+  Reg = new CodeGenRegister(Def, Registers.size() + 1);
+  Registers.push_back(Reg);
+  return Reg;
 }
 
 CodeGenRegisterClass *CodeGenRegBank::getRegClass(Record *Def) {
@@ -315,10 +441,10 @@ void CodeGenRegBank::computeComposites() {
   // Precompute all sub-register maps. This will create Composite entries for
   // all inferred sub-register indices.
   for (unsigned i = 0, e = Registers.size(); i != e; ++i)
-    Registers[i].getSubRegs(*this);
+    Registers[i]->getSubRegs(*this);
 
   for (unsigned i = 0, e = Registers.size(); i != e; ++i) {
-    CodeGenRegister *Reg1 = &Registers[i];
+    CodeGenRegister *Reg1 = Registers[i];
     const CodeGenRegister::SubRegMap &SRM1 = Reg1->getSubRegs();
     for (CodeGenRegister::SubRegMap::const_iterator i1 = SRM1.begin(),
          e1 = SRM1.end(); i1 != e1; ++i1) {
@@ -404,7 +530,7 @@ computeOverlaps(std::map<const CodeGenRegister*, CodeGenRegister::Set> &Map) {
 
   // Collect overlaps that don't follow from rule 2.
   for (unsigned i = 0, e = Registers.size(); i != e; ++i) {
-    CodeGenRegister *Reg = &Registers[i];
+    CodeGenRegister *Reg = Registers[i];
     CodeGenRegister::Set &Overlaps = Map[Reg];
 
     // Reg overlaps itself.
@@ -430,7 +556,7 @@ computeOverlaps(std::map<const CodeGenRegister*, CodeGenRegister::Set> &Map) {
 
   // Apply rule 2. and inherit all sub-register overlaps.
   for (unsigned i = 0, e = Registers.size(); i != e; ++i) {
-    CodeGenRegister *Reg = &Registers[i];
+    CodeGenRegister *Reg = Registers[i];
     CodeGenRegister::Set &Overlaps = Map[Reg];
     const CodeGenRegister::SubRegMap &SRM = Reg->getSubRegs();
     for (CodeGenRegister::SubRegMap::const_iterator i2 = SRM.begin(),
diff --git a/utils/TableGen/CodeGenRegisters.h b/utils/TableGen/CodeGenRegisters.h
index 55f0b9b..5edbf47 100644
--- a/utils/TableGen/CodeGenRegisters.h
+++ b/utils/TableGen/CodeGenRegisters.h
@@ -86,6 +86,7 @@ namespace llvm {
   class CodeGenRegisterClass {
     CodeGenRegister::Set Members;
     const std::vector<Record*> *Elements;
+    std::vector<SmallVector<Record*, 16> > AltOrders;
   public:
     Record *TheDef;
     std::string Namespace;
@@ -96,7 +97,7 @@ namespace llvm {
     bool Allocatable;
     // Map SubRegIndex -> RegisterClass
     DenseMap<Record*,Record*> SubRegClasses;
-    std::string MethodProtos, MethodBodies;
+    std::string AltOrderSelect;
 
     const std::string &getName() const;
     const std::vector<MVT::SimpleValueType> &getValueTypes() const {return VTs;}
@@ -125,10 +126,17 @@ namespace llvm {
 
     // Returns an ordered list of class members.
     // The order of registers is the same as in the .td file.
-    ArrayRef<Record*> getOrder() const {
-      return *Elements;
+    // No = 0 is the default allocation order, No = 1 is the first alternative.
+    ArrayRef<Record*> getOrder(unsigned No = 0) const {
+      if (No == 0)
+        return *Elements;
+      else
+        return AltOrders[No - 1];
     }
 
+    // Return the total number of allocation orders available.
+    unsigned getNumOrders() const { return 1 + AltOrders.size(); }
+
     CodeGenRegisterClass(CodeGenRegBank&, Record *R);
   };
 
@@ -140,7 +148,7 @@ namespace llvm {
 
     std::vector<Record*> SubRegIndices;
     unsigned NumNamedIndices;
-    std::vector<CodeGenRegister> Registers;
+    std::vector<CodeGenRegister*> Registers;
     DenseMap<Record*, CodeGenRegister*> Def2Reg;
 
     std::vector<CodeGenRegisterClass> RegClasses;
@@ -171,7 +179,7 @@ namespace llvm {
     // Find or create a sub-register index representing the A+B composition.
     Record *getCompositeSubRegIndex(Record *A, Record *B, bool create = false);
 
-    const std::vector<CodeGenRegister> &getRegisters() { return Registers; }
+    const std::vector<CodeGenRegister*> &getRegisters() { return Registers; }
 
     // Find a register from its Record def.
     CodeGenRegister *getReg(Record*);
diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp
index 4ce8022..5b0b315 100644
--- a/utils/TableGen/CodeGenTarget.cpp
+++ b/utils/TableGen/CodeGenTarget.cpp
@@ -167,12 +167,10 @@ CodeGenRegBank &CodeGenTarget::getRegBank() const {
 /// getRegisterByName - If there is a register with the specific AsmName,
 /// return it.
 const CodeGenRegister *CodeGenTarget::getRegisterByName(StringRef Name) const {
-  const std::vector<CodeGenRegister> &Regs = getRegBank().getRegisters();
-  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-    const CodeGenRegister &Reg = Regs[i];
-    if (Reg.TheDef->getValueAsString("AsmName") == Name)
-      return &Reg;
-  }
+  const std::vector<CodeGenRegister*> &Regs = getRegBank().getRegisters();
+  for (unsigned i = 0, e = Regs.size(); i != e; ++i)
+    if (Regs[i]->TheDef->getValueAsString("AsmName") == Name)
+      return Regs[i];
 
   return 0;
 }
diff --git a/utils/TableGen/CodeGenTarget.h b/utils/TableGen/CodeGenTarget.h
index 2516515..9bedb9c 100644
--- a/utils/TableGen/CodeGenTarget.h
+++ b/utils/TableGen/CodeGenTarget.h
@@ -96,10 +96,6 @@ public:
   /// getRegBank - Return the register bank description.
   CodeGenRegBank &getRegBank() const;
 
-  const std::vector<CodeGenRegister> &getRegisters() const {
-    return getRegBank().getRegisters();
-  }
-
   /// getRegisterByName - If there is a register with the specific AsmName,
   /// return it.
   const CodeGenRegister *getRegisterByName(StringRef Name) const;
diff --git a/utils/TableGen/DAGISelMatcherGen.cpp b/utils/TableGen/DAGISelMatcherGen.cpp
index a8736fa..54553a8 100644
--- a/utils/TableGen/DAGISelMatcherGen.cpp
+++ b/utils/TableGen/DAGISelMatcherGen.cpp
@@ -93,10 +93,6 @@ namespace {
     /// CurPredicate - As we emit matcher nodes, this points to the latest check
     /// which should have future checks stuck into its Next position.
     Matcher *CurPredicate;
-
-    /// RegisterDefMap - A map of register record definitions to the
-    /// corresponding target CodeGenRegister entry.
-    DenseMap<const Record *, const CodeGenRegister *> RegisterDefMap;
   public:
     MatcherGen(const PatternToMatch &pattern, const CodeGenDAGPatterns &cgp);
 
@@ -165,12 +161,6 @@ MatcherGen::MatcherGen(const PatternToMatch &pattern,
 
   // If there are types that are manifestly known, infer them.
   InferPossibleTypes();
-
-  // Populate the map from records to CodeGenRegister entries.
-  const CodeGenTarget &CGT = CGP.getTargetInfo();
-  const std::vector<CodeGenRegister> &Registers = CGT.getRegisters();
-  for (unsigned i = 0, e = Registers.size(); i != e; ++i)
-    RegisterDefMap[Registers[i].TheDef] = &Registers[i];
 }
 
 /// InferPossibleTypes - As we emit the pattern, we end up generating type
@@ -590,8 +580,9 @@ void MatcherGen::EmitResultLeafAsOperand(const TreePatternNode *N,
   // If this is an explicit register reference, handle it.
   if (DefInit *DI = dynamic_cast<DefInit*>(N->getLeafValue())) {
     if (DI->getDef()->isSubClassOf("Register")) {
-      AddMatcher(new EmitRegisterMatcher(RegisterDefMap[DI->getDef()],
-                                         N->getType(0)));
+      const CodeGenRegister *Reg =
+        CGP.getTargetInfo().getRegBank().getReg(DI->getDef());
+      AddMatcher(new EmitRegisterMatcher(Reg, N->getType(0)));
       ResultOps.push_back(NextRecordedOperandNo++);
       return;
     }
diff --git a/utils/TableGen/DisassemblerEmitter.cpp b/utils/TableGen/DisassemblerEmitter.cpp
index d68d3b0..07313d1 100644
--- a/utils/TableGen/DisassemblerEmitter.cpp
+++ b/utils/TableGen/DisassemblerEmitter.cpp
@@ -9,6 +9,7 @@
 
 #include "DisassemblerEmitter.h"
 #include "CodeGenTarget.h"
+#include "Error.h"
 #include "Record.h"
 #include "X86DisassemblerTables.h"
 #include "X86RecognizableInstr.h"
diff --git a/utils/TableGen/Error.cpp b/utils/TableGen/Error.cpp
new file mode 100644
index 0000000..3f6cda8
--- /dev/null
+++ b/utils/TableGen/Error.cpp
@@ -0,0 +1,39 @@
+//===- Error.cpp - tblgen error handling helper routines --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains error handling helper routines to pretty-print diagnostic
+// messages from tblgen.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Error.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+SourceMgr SrcMgr;
+
+void PrintError(SMLoc ErrorLoc, const Twine &Msg) {
+  SrcMgr.PrintMessage(ErrorLoc, Msg, "error");
+}
+
+void PrintError(const char *Loc, const Twine &Msg) {
+  SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), Msg, "error");
+}
+
+void PrintError(const Twine &Msg) {
+  errs() << "error:" << Msg << "\n";
+}
+
+void PrintError(const TGError &Error) {
+  PrintError(Error.getLoc(), Error.getMessage());
+}
+
+} // end namespace llvm
diff --git a/utils/TableGen/Error.h b/utils/TableGen/Error.h
new file mode 100644
index 0000000..b3a0146
--- /dev/null
+++ b/utils/TableGen/Error.h
@@ -0,0 +1,43 @@
+//===- Error.h - tblgen error handling helper routines ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains error handling helper routines to pretty-print diagnostic
+// messages from tblgen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ERROR_H
+#define ERROR_H
+
+#include "llvm/Support/SourceMgr.h"
+
+namespace llvm {
+
+class TGError {
+  SMLoc Loc;
+  std::string Message;
+public:
+  TGError(SMLoc loc, const std::string &message) : Loc(loc), Message(message) {}
+
+  SMLoc getLoc() const { return Loc; }
+  const std::string &getMessage() const { return Message; }
+};
+
+void PrintError(SMLoc ErrorLoc, const Twine &Msg);
+void PrintError(const char *Loc, const Twine &Msg);
+void PrintError(const Twine &Msg);
+void PrintError(const TGError &Error);
+
+
+extern SourceMgr SrcMgr;
+
+
+} // end namespace "llvm"
+
+#endif
diff --git a/utils/TableGen/FastISelEmitter.cpp b/utils/TableGen/FastISelEmitter.cpp
index 6c2a767..f946ac7 100644
--- a/utils/TableGen/FastISelEmitter.cpp
+++ b/utils/TableGen/FastISelEmitter.cpp
@@ -18,6 +18,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "FastISelEmitter.h"
+#include "Error.h"
 #include "Record.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/VectorExtras.h"
@@ -406,15 +407,7 @@ static std::string PhyRegForNode(TreePatternNode *Op,
   PhysReg += static_cast<StringInit*>(OpLeafRec->getValue( \
              "Namespace")->getValue())->getValue();
   PhysReg += "::";
-
-  std::vector<CodeGenRegister> Regs = Target.getRegisters();
-  for (unsigned i = 0; i < Regs.size(); ++i) {
-    if (Regs[i].TheDef == OpLeafRec) {
-      PhysReg += Regs[i].getName();
-      break;
-    }
-  }
-
+  PhysReg += Target.getRegBank().getReg(OpLeafRec)->getName();
   return PhysReg;
 }
 
diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp
index 39eb3bd..7a53138 100644
--- a/utils/TableGen/IntrinsicEmitter.cpp
+++ b/utils/TableGen/IntrinsicEmitter.cpp
@@ -214,7 +214,7 @@ static void EmitTypeGenerate(raw_ostream &OS,
   if (ArgTypes.size() == 1)
     return EmitTypeGenerate(OS, ArgTypes.front(), ArgNo);
 
-  OS << "StructType::get(Context, ";
+  OS << "StructType::get(";
 
   for (std::vector<Record*>::const_iterator
          I = ArgTypes.begin(), E = ArgTypes.end(); I != E; ++I) {
diff --git a/utils/TableGen/NeonEmitter.cpp b/utils/TableGen/NeonEmitter.cpp
index 23fdbde..ef97744 100644
--- a/utils/TableGen/NeonEmitter.cpp
+++ b/utils/TableGen/NeonEmitter.cpp
@@ -24,6 +24,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "NeonEmitter.h"
+#include "Error.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
diff --git a/utils/TableGen/Record.cpp b/utils/TableGen/Record.cpp
index 8ac8cd9..3c750da 100644
--- a/utils/TableGen/Record.cpp
+++ b/utils/TableGen/Record.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "Record.h"
+#include "Error.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/Format.h"
 #include "llvm/ADT/StringExtras.h"
diff --git a/utils/TableGen/Record.h b/utils/TableGen/Record.h
index 522b719..c8905cc 100644
--- a/utils/TableGen/Record.h
+++ b/utils/TableGen/Record.h
@@ -1486,22 +1486,8 @@ struct LessRecordFieldName {
   }
 };
 
-
-class TGError {
-  SMLoc Loc;
-  std::string Message;
-public:
-  TGError(SMLoc loc, const std::string &message) : Loc(loc), Message(message) {}
-
-  SMLoc getLoc() const { return Loc; }
-  const std::string &getMessage() const { return Message; }
-};
-
-
 raw_ostream &operator<<(raw_ostream &OS, const RecordKeeper &RK);
 
-void PrintError(SMLoc ErrorLoc, const Twine &Msg);
-
 } // End llvm namespace
 
 #endif
diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp
index 9ffb66a..991f34c 100644
--- a/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/utils/TableGen/RegisterInfoEmitter.cpp
@@ -28,9 +28,9 @@ using namespace llvm;
 void RegisterInfoEmitter::runEnums(raw_ostream &OS) {
   CodeGenTarget Target(Records);
   CodeGenRegBank &Bank = Target.getRegBank();
-  const std::vector<CodeGenRegister> &Registers = Target.getRegisters();
+  const std::vector<CodeGenRegister*> &Registers = Bank.getRegisters();
 
-  std::string Namespace = Registers[0].TheDef->getValueAsString("Namespace");
+  std::string Namespace = Registers[0]->TheDef->getValueAsString("Namespace");
 
   EmitSourceFileHeader("Target Register Enum Values", OS);
   OS << "namespace llvm {\n\n";
@@ -40,9 +40,9 @@ void RegisterInfoEmitter::runEnums(raw_ostream &OS) {
   OS << "enum {\n  NoRegister,\n";
 
   for (unsigned i = 0, e = Registers.size(); i != e; ++i)
-    OS << "  " << Registers[i].getName() << " = " <<
-      Registers[i].EnumValue << ",\n";
-  assert(Registers.size() == Registers[Registers.size()-1].EnumValue &&
+    OS << "  " << Registers[i]->getName() << " = " <<
+      Registers[i]->EnumValue << ",\n";
+  assert(Registers.size() == Registers[Registers.size()-1]->EnumValue &&
          "Register enum value mismatch!");
   OS << "  NUM_TARGET_REGS \t// " << Registers.size()+1 << "\n";
   OS << "};\n";
@@ -108,12 +108,16 @@ void RegisterInfoEmitter::runHeader(raw_ostream &OS) {
     OS << "\n  };\n\n";
 
     for (unsigned i = 0, e = RegisterClasses.size(); i != e; ++i) {
-      const std::string &Name = RegisterClasses[i].getName();
+      const CodeGenRegisterClass &RC = RegisterClasses[i];
+      const std::string &Name = RC.getName();
 
       // Output the register class definition.
       OS << "  struct " << Name << "Class : public TargetRegisterClass {\n"
-         << "    " << Name << "Class();\n"
-         << RegisterClasses[i].MethodProtos << "  };\n";
+         << "    " << Name << "Class();\n";
+      if (!RC.AltOrderSelect.empty())
+        OS << "    ArrayRef<unsigned> "
+              "getRawAllocationOrder(const MachineFunction&) const;\n";
+      OS << "  };\n";
 
       // Output the extern for the instance.
       OS << "  extern " << Name << "Class\t" << Name << "RegClass;\n";
@@ -349,10 +353,9 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
       OS << "\n  };\n\n";
     }
 
-
+    // Emit methods.
     for (unsigned i = 0, e = RegisterClasses.size(); i != e; ++i) {
       const CodeGenRegisterClass &RC = RegisterClasses[i];
-      OS << RC.MethodBodies << "\n";
       OS << RC.getName() << "Class::" << RC.getName()
          << "Class()  : TargetRegisterClass("
          << RC.getName() + "RegClassID" << ", "
@@ -371,6 +374,27 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
          << RC.getName() << ", " << RC.getName() << " + "
          << RC.getOrder().size()
          << ") {}\n";
+      if (!RC.AltOrderSelect.empty()) {
+        OS << "\nstatic inline unsigned " << RC.getName()
+           << "AltOrderSelect(const MachineFunction &MF) {"
+           << RC.AltOrderSelect << "}\n\nArrayRef<unsigned> "
+           << RC.getName() << "Class::"
+           << "getRawAllocationOrder(const MachineFunction &MF) const {\n";
+        for (unsigned oi = 1 , oe = RC.getNumOrders(); oi != oe; ++oi) {
+          ArrayRef<Record*> Elems = RC.getOrder(oi);
+          OS << "  static const unsigned AltOrder" << oi << "[] = {";
+          for (unsigned elem = 0; elem != Elems.size(); ++elem)
+            OS << (elem ? ", " : " ") << getQualifiedName(Elems[elem]);
+          OS << " };\n";
+        }
+        OS << "  static const ArrayRef<unsigned> Order[] = {\n"
+           << "    ArrayRef<unsigned>(" << RC.getName();
+        for (unsigned oi = 1, oe = RC.getNumOrders(); oi != oe; ++oi)
+          OS << "),\n    ArrayRef<unsigned>(AltOrder" << oi;
+        OS << ")\n  };\n  const unsigned Select = " << RC.getName()
+           << "AltOrderSelect(MF);\n  assert(Select < " << RC.getNumOrders()
+           << ");\n  return Order[Select];\n}\n";
+        }
     }
 
     OS << "}\n";
@@ -385,11 +409,11 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
 
   typedef std::map<Record*, std::vector<int64_t>, LessRecord> DwarfRegNumsMapTy;
   DwarfRegNumsMapTy DwarfRegNums;
-  const std::vector<CodeGenRegister> &Regs = Target.getRegisters();
+  const std::vector<CodeGenRegister*> &Regs = RegBank.getRegisters();
 
   // Emit an overlap list for all registers.
   for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-    const CodeGenRegister *Reg = &Regs[i];
+    const CodeGenRegister *Reg = Regs[i];
     const CodeGenRegister::Set &O = Overlaps[Reg];
     // Move Reg to the front so TRI::getAliasSet can share the list.
     OS << "  const unsigned " << Reg->getName() << "_Overlaps[] = { "
@@ -406,7 +430,7 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
   // Loop over all of the registers which have sub-registers, emitting the
   // sub-registers list to memory.
   for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-    const CodeGenRegister &Reg = Regs[i];
+    const CodeGenRegister &Reg = *Regs[i];
     if (Reg.getSubRegs().empty())
      continue;
     // getSubRegs() orders by SubRegIndex. We want a topological order.
@@ -423,7 +447,7 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
   // Loop over all of the registers which have super-registers, emitting the
   // super-registers list to memory.
   for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-    const CodeGenRegister &Reg = Regs[i];
+    const CodeGenRegister &Reg = *Regs[i];
     const CodeGenRegister::SuperRegList &SR = Reg.getSuperRegs();
     if (SR.empty())
       continue;
@@ -439,7 +463,7 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
   // Now that register alias and sub-registers sets have been emitted, emit the
   // register descriptors now.
   for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-    const CodeGenRegister &Reg = Regs[i];
+    const CodeGenRegister &Reg = *Regs[i];
     OS << "    { \"";
     OS << Reg.getName() << "\",\t" << Reg.getName() << "_Overlaps,\t";
     if (!Reg.getSubRegs().empty())
@@ -490,10 +514,10 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
      << "  switch (RegNo) {\n"
      << "  default:\n    return 0;\n";
   for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-    const CodeGenRegister::SubRegMap &SRM = Regs[i].getSubRegs();
+    const CodeGenRegister::SubRegMap &SRM = Regs[i]->getSubRegs();
     if (SRM.empty())
       continue;
-    OS << "  case " << getQualifiedName(Regs[i].TheDef) << ":\n";
+    OS << "  case " << getQualifiedName(Regs[i]->TheDef) << ":\n";
     OS << "    switch (Index) {\n";
     OS << "    default: return 0;\n";
     for (CodeGenRegister::SubRegMap::const_iterator ii = SRM.begin(),
@@ -511,10 +535,10 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
      << "  switch (RegNo) {\n"
      << "  default:\n    return 0;\n";
    for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-     const CodeGenRegister::SubRegMap &SRM = Regs[i].getSubRegs();
+     const CodeGenRegister::SubRegMap &SRM = Regs[i]->getSubRegs();
      if (SRM.empty())
        continue;
-    OS << "  case " << getQualifiedName(Regs[i].TheDef) << ":\n";
+    OS << "  case " << getQualifiedName(Regs[i]->TheDef) << ":\n";
     for (CodeGenRegister::SubRegMap::const_iterator ii = SRM.begin(),
          ie = SRM.end(); ii != ie; ++ii)
       OS << "    if (SubRegNo == " << getQualifiedName(ii->second->TheDef)
@@ -563,7 +587,7 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
   // First, just pull all provided information to the map
   unsigned maxLength = 0;
   for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-    Record *Reg = Regs[i].TheDef;
+    Record *Reg = Regs[i]->TheDef;
     std::vector<int64_t> RegNums = Reg->getValueAsListOfInts("DwarfNumbers");
     maxLength = std::max((size_t)maxLength, RegNums.size());
     if (DwarfRegNums.count(Reg))
@@ -606,7 +630,7 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
   OS << "  };\n}\n\n";
 
   for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-    Record *Reg = Regs[i].TheDef;
+    Record *Reg = Regs[i]->TheDef;
     const RecordVal *V = Reg->getValue("DwarfAlias");
     if (!V || !V->getValue())
       continue;
diff --git a/utils/TableGen/SetTheory.cpp b/utils/TableGen/SetTheory.cpp
index 509d2f3..21ac09c 100644
--- a/utils/TableGen/SetTheory.cpp
+++ b/utils/TableGen/SetTheory.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "SetTheory.h"
+#include "Error.h"
 #include "Record.h"
 #include "llvm/Support/Format.h"
 
diff --git a/utils/TableGen/TGLexer.cpp b/utils/TableGen/TGLexer.cpp
index 572c36d..b4b90ff 100644
--- a/utils/TableGen/TGLexer.cpp
+++ b/utils/TableGen/TGLexer.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "TGLexer.h"
+#include "Error.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Config/config.h"
@@ -35,7 +36,6 @@ SMLoc TGLexer::getLoc() const {
   return SMLoc::getFromPointer(TokStart);
 }
 
-
 /// ReturnError - Set the error to the specified string at the specified
 /// location.  This is defined to always return tgtok::Error.
 tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) {
@@ -43,16 +43,6 @@ tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) {
   return tgtok::Error;
 }
 
-
-void TGLexer::PrintError(const char *Loc, const Twine &Msg) const {
-  SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), Msg, "error");
-}
-
-void TGLexer::PrintError(SMLoc Loc, const Twine &Msg) const {
-  SrcMgr.PrintMessage(Loc, Msg, "error");
-}
-
-
 int TGLexer::getNextChar() {
   char CurChar = *CurPtr++;
   switch (CurChar) {
diff --git a/utils/TableGen/TGLexer.h b/utils/TableGen/TGLexer.h
index c2a6453..84d328b 100644
--- a/utils/TableGen/TGLexer.h
+++ b/utils/TableGen/TGLexer.h
@@ -101,9 +101,6 @@ public:
   }
 
   SMLoc getLoc() const;
-
-  void PrintError(const char *Loc, const Twine &Msg) const;
-  void PrintError(SMLoc Loc, const Twine &Msg) const;
   
 private:
   /// LexToken - Read the next token and return its code.
diff --git a/utils/TableGen/TGParser.h b/utils/TableGen/TGParser.h
index 419a99b..94a1c2b 100644
--- a/utils/TableGen/TGParser.h
+++ b/utils/TableGen/TGParser.h
@@ -15,6 +15,7 @@
 #define TGPARSER_H
 
 #include "TGLexer.h"
+#include "Error.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/SourceMgr.h"
 #include <map>
@@ -60,7 +61,7 @@ public:
   bool ParseFile();
   
   bool Error(SMLoc L, const Twine &Msg) const {
-    Lex.PrintError(L, Msg);
+    PrintError(L, Msg);
     return true;
   }
   bool TokError(const Twine &Msg) const {
diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp
index 4e4da36..39fe993 100644
--- a/utils/TableGen/TableGen.cpp
+++ b/utils/TableGen/TableGen.cpp
@@ -26,6 +26,7 @@
 #include "DAGISelEmitter.h"
 #include "DisassemblerEmitter.h"
 #include "EDEmitter.h"
+#include "Error.h"
 #include "FastISelEmitter.h"
 #include "InstrEnumEmitter.h"
 #include "InstrInfoEmitter.h"
@@ -194,12 +195,6 @@ namespace {
 }
 
 
-static SourceMgr SrcMgr;
-
-void llvm::PrintError(SMLoc ErrorLoc, const Twine &Msg) {
-  SrcMgr.PrintMessage(ErrorLoc, Msg, "error");
-}
-
 int main(int argc, char **argv) {
   RecordKeeper Records;
 
@@ -403,13 +398,11 @@ int main(int argc, char **argv) {
     return 0;
 
   } catch (const TGError &Error) {
-    errs() << argv[0] << ": error:\n";
-    PrintError(Error.getLoc(), Error.getMessage());
-
+    PrintError(Error);
   } catch (const std::string &Error) {
-    errs() << argv[0] << ": " << Error << "\n";
+    PrintError(Error);
   } catch (const char *Error) {
-    errs() << argv[0] << ": " << Error << "\n";
+    PrintError(Error);
   } catch (...) {
     errs() << argv[0] << ": Unknown unexpected exception occurred.\n";
   }
diff --git a/utils/lit/lit/TestRunner.py b/utils/lit/lit/TestRunner.py
index 83603cc..80d0ba1 100644
--- a/utils/lit/lit/TestRunner.py
+++ b/utils/lit/lit/TestRunner.py
@@ -473,11 +473,9 @@ def parseIntegratedTestScript(test, normalize_slashes=False):
     if script[-1][-1] == '\\':
         return (Test.UNRESOLVED, "Test has unterminated run lines (with '\\')")
 
-    # Check that we have the required features or build modes:
+    # Check that we have the required features:
     missing_required_features = [f for f in requires
-                                 if f not in test.config.available_features
-                                 and f not in test.config.llvm_build_modes]
-
+                                 if f not in test.config.available_features]
     if missing_required_features:
         msg = ', '.join(missing_required_features)
         return (Test.UNSUPPORTED,
diff --git a/utils/lit/lit/TestingConfig.py b/utils/lit/lit/TestingConfig.py
index 2d8d3d0..25bb341 100644
--- a/utils/lit/lit/TestingConfig.py
+++ b/utils/lit/lit/TestingConfig.py
@@ -74,7 +74,6 @@ class TestingConfig:
 
     def clone(self, path):
         # FIXME: Chain implementations?
-        # See attribute chaining in finish()
         #
         # FIXME: Allow extra parameters?
         cfg = TestingConfig(self, self.name, self.suffixes, self.test_format,
@@ -102,9 +101,3 @@ class TestingConfig:
             # files. Should we distinguish them?
             self.test_source_root = str(self.test_source_root)
         self.excludes = set(self.excludes)
-
-        # chain attributes by copying them
-        if self.parent:
-            for k,v in vars(self.parent).items():
-                if not hasattr(self, k):
-                    setattr(self, k, v)