85 files changed, 1390 insertions, 4255 deletions
diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp
index 9d4543d..3ffcfdc 100644
--- a/lib/VMCore/AutoUpgrade.cpp
+++ b/lib/VMCore/AutoUpgrade.cpp
@@ -42,248 +42,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
   Module *M = F->getParent();
   switch (Name[5]) {
   default: break;
-  case 'a':
-    // This upgrades the llvm.atomic.lcs, llvm.atomic.las, llvm.atomic.lss,
-    // and atomics with default address spaces to their new names to their new
-    // function name (e.g. llvm.atomic.add.i32 => llvm.atomic.add.i32.p0i32)
-    if (Name.compare(5,7,"atomic.",7) == 0) {
-      if (Name.compare(12,3,"lcs",3) == 0) {
-        std::string::size_type delim = Name.find('.',12);
-        F->setName("llvm.atomic.cmp.swap" + Name.substr(delim) +
-                   ".p0" + Name.substr(delim+1));
-        NewFn = F;
-        return true;
-      }
-      else if (Name.compare(12,3,"las",3) == 0) {
-        std::string::size_type delim = Name.find('.',12);
-        F->setName("llvm.atomic.load.add"+Name.substr(delim)
-                   + ".p0" + Name.substr(delim+1));
-        NewFn = F;
-        return true;
-      }
-      else if (Name.compare(12,3,"lss",3) == 0) {
-        std::string::size_type delim = Name.find('.',12);
-        F->setName("llvm.atomic.load.sub"+Name.substr(delim)
-                   + ".p0" + Name.substr(delim+1));
-        NewFn = F;
-        return true;
-      }
-      else if (Name.rfind(".p") == std::string::npos) {
-        // We don't have an address space qualifier so this has be upgraded
-        // to the new name.  Copy the type name at the end of the intrinsic
-        // and add to it
-        std::string::size_type delim = Name.find_last_of('.');
-        assert(delim != std::string::npos && "can not find type");
-        F->setName(Name + ".p0" + Name.substr(delim+1));
-        NewFn = F;
-        return true;
-      }
-    } else if (Name.compare(5, 9, "arm.neon.", 9) == 0) {
-      if (((Name.compare(14, 5, "vmovl", 5) == 0 ||
-            Name.compare(14, 5, "vaddl", 5) == 0 ||
-            Name.compare(14, 5, "vsubl", 5) == 0 ||
-            Name.compare(14, 5, "vaddw", 5) == 0 ||
-            Name.compare(14, 5, "vsubw", 5) == 0 ||
-            Name.compare(14, 5, "vmlal", 5) == 0 ||
-            Name.compare(14, 5, "vmlsl", 5) == 0 ||
-            Name.compare(14, 5, "vabdl", 5) == 0 ||
-            Name.compare(14, 5, "vabal", 5) == 0) &&
-           (Name.compare(19, 2, "s.", 2) == 0 ||
-            Name.compare(19, 2, "u.", 2) == 0)) ||
-
-          (Name.compare(14, 4, "vaba", 4) == 0 &&
-           (Name.compare(18, 2, "s.", 2) == 0 ||
-            Name.compare(18, 2, "u.", 2) == 0)) ||
-
-          (Name.compare(14, 6, "vmovn.", 6) == 0)) {
-
-        // Calls to these are transformed into IR without intrinsics.
-        NewFn = 0;
-        return true;
-      }
-      // Old versions of NEON ld/st intrinsics are missing alignment arguments.
-      bool isVLd = (Name.compare(14, 3, "vld", 3) == 0);
-      bool isVSt = (Name.compare(14, 3, "vst", 3) == 0);
-      if (isVLd || isVSt) {
-        unsigned NumVecs = Name.at(17) - '0';
-        if (NumVecs == 0 || NumVecs > 4)
-          return false;
-        bool isLaneOp = (Name.compare(18, 5, "lane.", 5) == 0);
-        if (!isLaneOp && Name.at(18) != '.')
-          return false;
-        unsigned ExpectedArgs = 2; // for the address and alignment
-        if (isVSt || isLaneOp)
-          ExpectedArgs += NumVecs;
-        if (isLaneOp)
-          ExpectedArgs += 1; // for the lane number
-        unsigned NumP = FTy->getNumParams();
-        if (NumP != ExpectedArgs - 1)
-          return false;
-
-        // Change the name of the old (bad) intrinsic, because 
-        // its type is incorrect, but we cannot overload that name.
-        F->setName("");
-
-        // One argument is missing: add the alignment argument.
-        std::vector<const Type*> NewParams;
-        for (unsigned p = 0; p < NumP; ++p)
-          NewParams.push_back(FTy->getParamType(p));
-        NewParams.push_back(Type::getInt32Ty(F->getContext()));
-        FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(),
-                                                 NewParams, false);
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, NewFTy));
-        return true;
-      }
-    }
-    break;
-  case 'b':
-    //  This upgrades the name of the llvm.bswap intrinsic function to only use 
-    //  a single type name for overloading. We only care about the old format
-    //  'llvm.bswap.i*.i*', so check for 'bswap.' and then for there being 
-    //  a '.' after 'bswap.'
-    if (Name.compare(5,6,"bswap.",6) == 0) {
-      std::string::size_type delim = Name.find('.',11);
-      
-      if (delim != std::string::npos) {
-        //  Construct the new name as 'llvm.bswap' + '.i*'
-        F->setName(Name.substr(0,10)+Name.substr(delim));
-        NewFn = F;
-        return true;
-      }
-    }
-    break;
-
-  case 'c':
-    //  We only want to fix the 'llvm.ct*' intrinsics which do not have the 
-    //  correct return type, so we check for the name, and then check if the 
-    //  return type does not match the parameter type.
-    if ( (Name.compare(5,5,"ctpop",5) == 0 ||
-          Name.compare(5,4,"ctlz",4) == 0 ||
-          Name.compare(5,4,"cttz",4) == 0) &&
-        FTy->getReturnType() != FTy->getParamType(0)) {
-      //  We first need to change the name of the old (bad) intrinsic, because 
-      //  its type is incorrect, but we cannot overload that name. We 
-      //  arbitrarily unique it here allowing us to construct a correctly named 
-      //  and typed function below.
-      F->setName("");
-
-      //  Now construct the new intrinsic with the correct name and type. We 
-      //  leave the old function around in order to query its type, whatever it 
-      //  may be, and correctly convert up to the new type.
-      NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                    FTy->getParamType(0),
-                                                    FTy->getParamType(0),
-                                                    (Type *)0));
-      return true;
-    }
-    break;
-
-  case 'e':
-    //  The old llvm.eh.selector.i32 is equivalent to the new llvm.eh.selector.
-    if (Name.compare("llvm.eh.selector.i32") == 0) {
-      F->setName("llvm.eh.selector");
-      NewFn = F;
-      return true;
-    }
-    //  The old llvm.eh.typeid.for.i32 is equivalent to llvm.eh.typeid.for.
-    if (Name.compare("llvm.eh.typeid.for.i32") == 0) {
-      F->setName("llvm.eh.typeid.for");
-      NewFn = F;
-      return true;
-    }
-    //  Convert the old llvm.eh.selector.i64 to a call to llvm.eh.selector.
-    if (Name.compare("llvm.eh.selector.i64") == 0) {
-      NewFn = Intrinsic::getDeclaration(M, Intrinsic::eh_selector);
-      return true;
-    }
-    //  Convert the old llvm.eh.typeid.for.i64 to a call to llvm.eh.typeid.for.
-    if (Name.compare("llvm.eh.typeid.for.i64") == 0) {
-      NewFn = Intrinsic::getDeclaration(M, Intrinsic::eh_typeid_for);
-      return true;
-    }
-    break;
-
-  case 'm': {
-    // This upgrades the llvm.memcpy, llvm.memmove, and llvm.memset to the
-    // new format that allows overloading the pointer for different address
-    // space (e.g., llvm.memcpy.i16 => llvm.memcpy.p0i8.p0i8.i16)
-    const char* NewFnName = NULL;
-    if (Name.compare(5,8,"memcpy.i",8) == 0) {
-      if (Name[13] == '8')
-        NewFnName = "llvm.memcpy.p0i8.p0i8.i8";
-      else if (Name.compare(13,2,"16") == 0)
-        NewFnName = "llvm.memcpy.p0i8.p0i8.i16";
-      else if (Name.compare(13,2,"32") == 0)
-        NewFnName = "llvm.memcpy.p0i8.p0i8.i32";
-      else if (Name.compare(13,2,"64") == 0)
-        NewFnName = "llvm.memcpy.p0i8.p0i8.i64";
-    } else if (Name.compare(5,9,"memmove.i",9) == 0) {
-      if (Name[14] == '8')
-        NewFnName = "llvm.memmove.p0i8.p0i8.i8";
-      else if (Name.compare(14,2,"16") == 0)
-        NewFnName = "llvm.memmove.p0i8.p0i8.i16";
-      else if (Name.compare(14,2,"32") == 0)
-        NewFnName = "llvm.memmove.p0i8.p0i8.i32";
-      else if (Name.compare(14,2,"64") == 0)
-        NewFnName = "llvm.memmove.p0i8.p0i8.i64";
-    }
-    else if (Name.compare(5,8,"memset.i",8) == 0) {
-      if (Name[13] == '8')
-        NewFnName = "llvm.memset.p0i8.i8";
-      else if (Name.compare(13,2,"16") == 0)
-        NewFnName = "llvm.memset.p0i8.i16";
-      else if (Name.compare(13,2,"32") == 0)
-        NewFnName = "llvm.memset.p0i8.i32";
-      else if (Name.compare(13,2,"64") == 0)
-        NewFnName = "llvm.memset.p0i8.i64";
-    }
-    if (NewFnName) {
-      NewFn = cast<Function>(M->getOrInsertFunction(NewFnName, 
-                                            FTy->getReturnType(),
-                                            FTy->getParamType(0),
-                                            FTy->getParamType(1),
-                                            FTy->getParamType(2),
-                                            FTy->getParamType(3),
-                                            Type::getInt1Ty(F->getContext()),
-                                            (Type *)0));
-      return true;
-    }
-    break;
-  }
   case 'p':
-    //  This upgrades the llvm.part.select overloaded intrinsic names to only 
-    //  use one type specifier in the name. We only care about the old format
-    //  'llvm.part.select.i*.i*', and solve as above with bswap.
-    if (Name.compare(5,12,"part.select.",12) == 0) {
-      std::string::size_type delim = Name.find('.',17);
-      
-      if (delim != std::string::npos) {
-        //  Construct a new name as 'llvm.part.select' + '.i*'
-        F->setName(Name.substr(0,16)+Name.substr(delim));
-        NewFn = F;
-        return true;
-      }
-      break;
-    }
-
-    //  This upgrades the llvm.part.set intrinsics similarly as above, however 
-    //  we care about 'llvm.part.set.i*.i*.i*', but only the first two types 
-    //  must match. There is an additional type specifier after these two 
-    //  matching types that we must retain when upgrading.  Thus, we require 
-    //  finding 2 periods, not just one, after the intrinsic name.
-    if (Name.compare(5,9,"part.set.",9) == 0) {
-      std::string::size_type delim = Name.find('.',14);
-
-      if (delim != std::string::npos &&
-          Name.find('.',delim+1) != std::string::npos) {
-        //  Construct a new name as 'llvm.part.select' + '.i*.i*'
-        F->setName(Name.substr(0,13)+Name.substr(delim));
-        NewFn = F;
-        return true;
-      }
-      break;
-    }
-
     //  This upgrades the llvm.prefetch intrinsic to accept one more parameter,
     //  which is a instruction / data cache identifier. The old version only
     //  implicitly accepted the data version.
@@ -336,270 +95,21 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
       }
     }
 
-    // This fixes all MMX shift intrinsic instructions to take a
-    // x86_mmx instead of a v1i64, v2i32, v4i16, or v8i8.
-    if (Name.compare(5, 8, "x86.mmx.", 8) == 0) {
-      const Type *X86_MMXTy = VectorType::getX86_MMXTy(FTy->getContext());
-
-      if (Name.compare(13, 4, "padd", 4) == 0   ||
-          Name.compare(13, 4, "psub", 4) == 0   ||
-          Name.compare(13, 4, "pmul", 4) == 0   ||
-          Name.compare(13, 5, "pmadd", 5) == 0  ||
-          Name.compare(13, 4, "pand", 4) == 0   ||
-          Name.compare(13, 3, "por", 3) == 0    ||
-          Name.compare(13, 4, "pxor", 4) == 0   ||
-          Name.compare(13, 4, "pavg", 4) == 0   ||
-          Name.compare(13, 4, "pmax", 4) == 0   ||
-          Name.compare(13, 4, "pmin", 4) == 0   ||
-          Name.compare(13, 4, "psad", 4) == 0   ||
-          Name.compare(13, 4, "psll", 4) == 0   ||
-          Name.compare(13, 4, "psrl", 4) == 0   ||
-          Name.compare(13, 4, "psra", 4) == 0   ||
-          Name.compare(13, 4, "pack", 4) == 0   ||
-          Name.compare(13, 6, "punpck", 6) == 0 ||
-          Name.compare(13, 4, "pcmp", 4) == 0) {
-        assert(FTy->getNumParams() == 2 && "MMX intrinsic takes 2 args!");
-        const Type *SecondParamTy = X86_MMXTy;
-
-        if (Name.compare(13, 5, "pslli", 5) == 0 ||
-            Name.compare(13, 5, "psrli", 5) == 0 ||
-            Name.compare(13, 5, "psrai", 5) == 0)
-          SecondParamTy = FTy->getParamType(1);
-
-        // Don't do anything if it has the correct types.
-        if (FTy->getReturnType() == X86_MMXTy &&
-            FTy->getParamType(0) == X86_MMXTy &&
-            FTy->getParamType(1) == SecondParamTy)
-          break;
-
-        // We first need to change the name of the old (bad) intrinsic, because
-        // its type is incorrect, but we cannot overload that name. We
-        // arbitrarily unique it here allowing us to construct a correctly named
-        // and typed function below.
-        F->setName("");
-
-        // Now construct the new intrinsic with the correct name and type. We
-        // leave the old function around in order to query its type, whatever it
-        // may be, and correctly convert up to the new type.
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      X86_MMXTy, X86_MMXTy,
-                                                      SecondParamTy, (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 8, "maskmovq", 8) == 0) {
-        // Don't do anything if it has the correct types.
-        if (FTy->getParamType(0) == X86_MMXTy &&
-            FTy->getParamType(1) == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      FTy->getReturnType(),
-                                                      X86_MMXTy,
-                                                      X86_MMXTy,
-                                                      FTy->getParamType(2),
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 8, "pmovmskb", 8) == 0) {
-        if (FTy->getParamType(0) == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      FTy->getReturnType(),
-                                                      X86_MMXTy,
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 5, "movnt", 5) == 0) {
-        if (FTy->getParamType(1) == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      FTy->getReturnType(),
-                                                      FTy->getParamType(0),
-                                                      X86_MMXTy,
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 7, "palignr", 7) == 0) {
-        if (FTy->getReturnType() == X86_MMXTy &&
-            FTy->getParamType(0) == X86_MMXTy &&
-            FTy->getParamType(1) == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      X86_MMXTy,
-                                                      X86_MMXTy,
-                                                      X86_MMXTy,
-                                                      FTy->getParamType(2),
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 5, "pextr", 5) == 0) {
-        if (FTy->getParamType(0) == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      FTy->getReturnType(),
-                                                      X86_MMXTy,
-                                                      FTy->getParamType(1),
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 5, "pinsr", 5) == 0) {
-        if (FTy->getReturnType() == X86_MMXTy &&
-            FTy->getParamType(0) == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      X86_MMXTy,
-                                                      X86_MMXTy,
-                                                      FTy->getParamType(1),
-                                                      FTy->getParamType(2),
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 12, "cvtsi32.si64", 12) == 0) {
-        if (FTy->getReturnType() == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      X86_MMXTy,
-                                                      FTy->getParamType(0),
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 12, "cvtsi64.si32", 12) == 0) {
-        if (FTy->getParamType(0) == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      FTy->getReturnType(),
-                                                      X86_MMXTy,
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 8, "vec.init", 8) == 0) {
-        if (FTy->getReturnType() == X86_MMXTy)
-          break;
-
-        F->setName("");
-
-        if (Name.compare(21, 2, ".b", 2) == 0)
-          NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                        X86_MMXTy,
-                                                        FTy->getParamType(0),
-                                                        FTy->getParamType(1),
-                                                        FTy->getParamType(2),
-                                                        FTy->getParamType(3),
-                                                        FTy->getParamType(4),
-                                                        FTy->getParamType(5),
-                                                        FTy->getParamType(6),
-                                                        FTy->getParamType(7),
-                                                        (Type*)0));
-        else if (Name.compare(21, 2, ".w", 2) == 0)
-          NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                        X86_MMXTy,
-                                                        FTy->getParamType(0),
-                                                        FTy->getParamType(1),
-                                                        FTy->getParamType(2),
-                                                        FTy->getParamType(3),
-                                                        (Type*)0));
-        else if (Name.compare(21, 2, ".d", 2) == 0)
-          NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                        X86_MMXTy,
-                                                        FTy->getParamType(0),
-                                                        FTy->getParamType(1),
-                                                        (Type*)0));
-        return true;
-      }
-
-
-      if (Name.compare(13, 9, "vec.ext.d", 9) == 0) {
-        if (FTy->getReturnType() == X86_MMXTy &&
-            FTy->getParamType(0) == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      X86_MMXTy,
-                                                      X86_MMXTy,
-                                                      FTy->getParamType(1),
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 9, "emms", 4) == 0 ||
-          Name.compare(13, 9, "femms", 5) == 0) {
-        NewFn = 0;
-        break;
-      }
-
-      // We really shouldn't get here ever.
-      assert(0 && "Invalid MMX intrinsic!");
-      break;
-    } else if (Name.compare(5,17,"x86.sse2.loadh.pd",17) == 0 ||
-               Name.compare(5,17,"x86.sse2.loadl.pd",17) == 0 ||
-               Name.compare(5,16,"x86.sse2.movl.dq",16) == 0 ||
-               Name.compare(5,15,"x86.sse2.movs.d",15) == 0 ||
-               Name.compare(5,16,"x86.sse2.shuf.pd",16) == 0 ||
-               Name.compare(5,18,"x86.sse2.unpckh.pd",18) == 0 ||
-               Name.compare(5,18,"x86.sse2.unpckl.pd",18) == 0 ||
-               Name.compare(5,20,"x86.sse2.punpckh.qdq",20) == 0 ||
-               Name.compare(5,20,"x86.sse2.punpckl.qdq",20) == 0) {
-      // Calls to these intrinsics are transformed into ShuffleVector's.
-      NewFn = 0;
-      return true;
-    } else if (Name.compare(5, 16, "x86.sse41.pmulld", 16) == 0) {
-      // Calls to these intrinsics are transformed into vector multiplies.
-      NewFn = 0;
-      return true;
-    } else if (Name.compare(5, 18, "x86.ssse3.palign.r", 18) == 0 ||
-               Name.compare(5, 22, "x86.ssse3.palign.r.128", 22) == 0) {
-      // Calls to these intrinsics are transformed into vector shuffles, shifts,
-      // or 0.
-      NewFn = 0;
-      return true;           
-    } else if (Name.compare(5, 16, "x86.sse.loadu.ps", 16) == 0 ||
-               Name.compare(5, 17, "x86.sse2.loadu.dq", 17) == 0 ||
-               Name.compare(5, 17, "x86.sse2.loadu.pd", 17) == 0) {
+    if (Name.compare(5, 16, "x86.sse.loadu.ps", 16) == 0 ||
+        Name.compare(5, 17, "x86.sse2.loadu.dq", 17) == 0 ||
+        Name.compare(5, 17, "x86.sse2.loadu.pd", 17) == 0) {
       // Calls to these instructions are transformed into unaligned loads.
       NewFn = 0;
       return true;
-    } else if (Name.compare(5, 16, "x86.sse.movnt.ps", 16) == 0 ||
-               Name.compare(5, 17, "x86.sse2.movnt.dq", 17) == 0 ||
-               Name.compare(5, 17, "x86.sse2.movnt.pd", 17) == 0 ||
-               Name.compare(5, 17, "x86.sse2.movnt.i", 16) == 0) {
+    }
+      
+    if (Name.compare(5, 16, "x86.sse.movnt.ps", 16) == 0 ||
+        Name.compare(5, 17, "x86.sse2.movnt.dq", 17) == 0 ||
+        Name.compare(5, 17, "x86.sse2.movnt.pd", 17) == 0 ||
+        Name.compare(5, 17, "x86.sse2.movnt.i", 16) == 0) {
       // Calls to these instructions are transformed into nontemporal stores.
       NewFn = 0;
       return true;
-    } else if (Name.compare(5, 17, "x86.ssse3.pshuf.w", 17) == 0) {
-      // This is an SSE/MMX instruction.
-      const Type *X86_MMXTy = VectorType::getX86_MMXTy(FTy->getContext());
-      NewFn =
-        cast<Function>(M->getOrInsertFunction("llvm.x86.sse.pshuf.w",
-                                              X86_MMXTy,
-                                              X86_MMXTy,
-                                              Type::getInt8Ty(F->getContext()),
-                                              (Type*)0));
-      return true;
     }
 
     break;
@@ -625,105 +135,10 @@ bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
 }
 
 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
-  StringRef Name(GV->getName());
-
-  // We are only upgrading one symbol here.
-  if (Name == ".llvm.eh.catch.all.value") {
-    GV->setName("llvm.eh.catch.all.value");
-    return true;
-  }
-
+  // Nothing to do yet.
   return false;
 }
 
-/// ExtendNEONArgs - For NEON "long" and "wide" operations, where the results
-/// have vector elements twice as big as one or both source operands, do the
-/// sign- or zero-extension that used to be handled by intrinsics.  The
-/// extended values are returned via V0 and V1.
-static void ExtendNEONArgs(CallInst *CI, Value *Arg0, Value *Arg1,
-                           Value *&V0, Value *&V1) {
-  Function *F = CI->getCalledFunction();
-  const std::string& Name = F->getName();
-  bool isLong = (Name.at(18) == 'l');
-  bool isSigned = (Name.at(19) == 's');
-
-  if (isSigned) {
-    if (isLong)
-      V0 = new SExtInst(Arg0, CI->getType(), "", CI);
-    else
-      V0 = Arg0;
-    V1 = new SExtInst(Arg1, CI->getType(), "", CI);
-  } else {
-    if (isLong)
-      V0 = new ZExtInst(Arg0, CI->getType(), "", CI);
-    else
-      V0 = Arg0;
-    V1 = new ZExtInst(Arg1, CI->getType(), "", CI);
-  }
-}
-
-/// CallVABD - As part of expanding a call to one of the old NEON vabdl, vaba,
-/// or vabal intrinsics, construct a call to a vabd intrinsic.  Examine the
-/// name of the old intrinsic to determine whether to use a signed or unsigned
-/// vabd intrinsic.  Get the type from the old call instruction, adjusted for
-/// half-size vector elements if the old intrinsic was vabdl or vabal.
-static Instruction *CallVABD(CallInst *CI, Value *Arg0, Value *Arg1) {
-  Function *F = CI->getCalledFunction();
-  const std::string& Name = F->getName();
-  bool isLong = (Name.at(18) == 'l');
-  bool isSigned = (Name.at(isLong ? 19 : 18) == 's');
-
-  Intrinsic::ID intID;
-  if (isSigned)
-    intID = Intrinsic::arm_neon_vabds;
-  else
-    intID = Intrinsic::arm_neon_vabdu;
-
-  const Type *Ty = CI->getType();
-  if (isLong)
-    Ty = VectorType::getTruncatedElementVectorType(cast<const VectorType>(Ty));
-
-  Function *VABD = Intrinsic::getDeclaration(F->getParent(), intID, &Ty, 1);
-  Value *Operands[2];
-  Operands[0] = Arg0;
-  Operands[1] = Arg1;
-  return CallInst::Create(VABD, Operands, Operands+2, 
-                          "upgraded."+CI->getName(), CI);
-}
-
-/// ConstructNewCallInst - Construct a new CallInst with the signature of NewFn.
-static void ConstructNewCallInst(Function *NewFn, CallInst *OldCI,
-                                 Value **Operands, unsigned NumOps,
-                                 bool AssignName = true) {
-  // Construct a new CallInst.
-  CallInst *NewCI =
-    CallInst::Create(NewFn, Operands, Operands + NumOps,
-                     AssignName ? "upgraded." + OldCI->getName() : "", OldCI);
-
-  NewCI->setTailCall(OldCI->isTailCall());
-  NewCI->setCallingConv(OldCI->getCallingConv());
-
-  // Handle any uses of the old CallInst. If the type has changed, add a cast.
-  if (!OldCI->use_empty()) {
-    if (OldCI->getType() != NewCI->getType()) {
-      Function *OldFn = OldCI->getCalledFunction();
-      CastInst *RetCast =
-        CastInst::Create(CastInst::getCastOpcode(NewCI, true,
-                                                 OldFn->getReturnType(), true),
-                         NewCI, OldFn->getReturnType(), NewCI->getName(),OldCI);
-
-      // Replace all uses of the old call with the new cast which has the
-      // correct type.
-      OldCI->replaceAllUsesWith(RetCast);
-    } else {
-      OldCI->replaceAllUsesWith(NewCI);
-    }
-  }
-
-  // Clean up the old call now that it has been completely upgraded.
-  OldCI->eraseFromParent();
-}
-
 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the 
 // upgraded intrinsic. All argument and return casting must be provided in 
 // order to seamlessly integrate with existing context.
@@ -735,284 +150,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
   assert(F && "CallInst has no function associated with it.");
 
   if (!NewFn) {
-    // Get the Function's name.
-    const std::string& Name = F->getName();
-
-    // Upgrade ARM NEON intrinsics.
-    if (Name.compare(5, 9, "arm.neon.", 9) == 0) {
-      Instruction *NewI;
-      Value *V0, *V1;
-      if (Name.compare(14, 7, "vmovls.", 7) == 0) {
-        NewI = new SExtInst(CI->getArgOperand(0), CI->getType(),
-                            "upgraded." + CI->getName(), CI);
-      } else if (Name.compare(14, 7, "vmovlu.", 7) == 0) {
-        NewI = new ZExtInst(CI->getArgOperand(0), CI->getType(),
-                            "upgraded." + CI->getName(), CI);
-      } else if (Name.compare(14, 4, "vadd", 4) == 0) {
-        ExtendNEONArgs(CI, CI->getArgOperand(0), CI->getArgOperand(1), V0, V1);
-        NewI = BinaryOperator::CreateAdd(V0, V1, "upgraded."+CI->getName(), CI);
-      } else if (Name.compare(14, 4, "vsub", 4) == 0) {
-        ExtendNEONArgs(CI, CI->getArgOperand(0), CI->getArgOperand(1), V0, V1);
-        NewI = BinaryOperator::CreateSub(V0, V1,"upgraded."+CI->getName(),CI);
-      } else if (Name.compare(14, 4, "vmul", 4) == 0) {
-        ExtendNEONArgs(CI, CI->getArgOperand(0), CI->getArgOperand(1), V0, V1);
-        NewI = BinaryOperator::CreateMul(V0, V1,"upgraded."+CI->getName(),CI);
-      } else if (Name.compare(14, 4, "vmla", 4) == 0) {
-        ExtendNEONArgs(CI, CI->getArgOperand(1), CI->getArgOperand(2), V0, V1);
-        Instruction *MulI = BinaryOperator::CreateMul(V0, V1, "", CI);
-        NewI = BinaryOperator::CreateAdd(CI->getArgOperand(0), MulI,
-                                         "upgraded."+CI->getName(), CI);
-      } else if (Name.compare(14, 4, "vmls", 4) == 0) {
-        ExtendNEONArgs(CI, CI->getArgOperand(1), CI->getArgOperand(2), V0, V1);
-        Instruction *MulI = BinaryOperator::CreateMul(V0, V1, "", CI);
-        NewI = BinaryOperator::CreateSub(CI->getArgOperand(0), MulI,
-                                         "upgraded."+CI->getName(), CI);
-      } else if (Name.compare(14, 4, "vabd", 4) == 0) {
-        NewI = CallVABD(CI, CI->getArgOperand(0), CI->getArgOperand(1));
-        NewI = new ZExtInst(NewI, CI->getType(), "upgraded."+CI->getName(), CI);
-      } else if (Name.compare(14, 4, "vaba", 4) == 0) {
-        NewI = CallVABD(CI, CI->getArgOperand(1), CI->getArgOperand(2));
-        if (Name.at(18) == 'l')
-          NewI = new ZExtInst(NewI, CI->getType(), "", CI);
-        NewI = BinaryOperator::CreateAdd(CI->getArgOperand(0), NewI,
-                                         "upgraded."+CI->getName(), CI);
-      } else if (Name.compare(14, 6, "vmovn.", 6) == 0) {
-        NewI = new TruncInst(CI->getArgOperand(0), CI->getType(),
-                             "upgraded." + CI->getName(), CI);
-      } else {
-        llvm_unreachable("Unknown arm.neon function for CallInst upgrade.");
-      }
-      // Replace any uses of the old CallInst.
-      if (!CI->use_empty())
-        CI->replaceAllUsesWith(NewI);
-      CI->eraseFromParent();
-      return;
-    }
-
-    bool isLoadH = false, isLoadL = false, isMovL = false;
-    bool isMovSD = false, isShufPD = false;
-    bool isUnpckhPD = false, isUnpcklPD = false;
-    bool isPunpckhQPD = false, isPunpcklQPD = false;
-    if (F->getName() == "llvm.x86.sse2.loadh.pd")
-      isLoadH = true;
-    else if (F->getName() == "llvm.x86.sse2.loadl.pd")
-      isLoadL = true;
-    else if (F->getName() == "llvm.x86.sse2.movl.dq")
-      isMovL = true;
-    else if (F->getName() == "llvm.x86.sse2.movs.d")
-      isMovSD = true;
-    else if (F->getName() == "llvm.x86.sse2.shuf.pd")
-      isShufPD = true;
-    else if (F->getName() == "llvm.x86.sse2.unpckh.pd")
-      isUnpckhPD = true;
-    else if (F->getName() == "llvm.x86.sse2.unpckl.pd")
-      isUnpcklPD = true;
-    else if (F->getName() ==  "llvm.x86.sse2.punpckh.qdq")
-      isPunpckhQPD = true;
-    else if (F->getName() ==  "llvm.x86.sse2.punpckl.qdq")
-      isPunpcklQPD = true;
-
-    if (isLoadH || isLoadL || isMovL || isMovSD || isShufPD ||
-        isUnpckhPD || isUnpcklPD || isPunpckhQPD || isPunpcklQPD) {
-      std::vector<Constant*> Idxs;
-      Value *Op0 = CI->getArgOperand(0);
-      ShuffleVectorInst *SI = NULL;
-      if (isLoadH || isLoadL) {
-        Value *Op1 = UndefValue::get(Op0->getType());
-        Value *Addr = new BitCastInst(CI->getArgOperand(1), 
-                                  Type::getDoublePtrTy(C),
-                                      "upgraded.", CI);
-        Value *Load = new LoadInst(Addr, "upgraded.", false, 8, CI);
-        Value *Idx = ConstantInt::get(Type::getInt32Ty(C), 0);
-        Op1 = InsertElementInst::Create(Op1, Load, Idx, "upgraded.", CI);
-
-        if (isLoadH) {
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 0));
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
-        } else {
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
-        }
-        Value *Mask = ConstantVector::get(Idxs);
-        SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
-      } else if (isMovL) {
-        Constant *Zero = ConstantInt::get(Type::getInt32Ty(C), 0);
-        Idxs.push_back(Zero);
-        Idxs.push_back(Zero);
-        Idxs.push_back(Zero);
-        Idxs.push_back(Zero);
-        Value *ZeroV = ConstantVector::get(Idxs);
-
-        Idxs.clear(); 
-        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 4));
-        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 5));
-        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
-        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 3));
-        Value *Mask = ConstantVector::get(Idxs);
-        SI = new ShuffleVectorInst(ZeroV, Op0, Mask, "upgraded.", CI);
-      } else if (isMovSD ||
-                 isUnpckhPD || isUnpcklPD || isPunpckhQPD || isPunpcklQPD) {
-        Value *Op1 = CI->getArgOperand(1);
-        if (isMovSD) {
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
-        } else if (isUnpckhPD || isPunpckhQPD) {
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 3));
-        } else {
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 0));
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
-        }
-        Value *Mask = ConstantVector::get(Idxs);
-        SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
-      } else if (isShufPD) {
-        Value *Op1 = CI->getArgOperand(1);
-        unsigned MaskVal =
-                        cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
-        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), MaskVal & 1));
-        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C),
-                                               ((MaskVal >> 1) & 1)+2));
-        Value *Mask = ConstantVector::get(Idxs);
-        SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
-      }
-
-      assert(SI && "Unexpected!");
-
-      // Handle any uses of the old CallInst.
-      if (!CI->use_empty())
-        //  Replace all uses of the old call with the new cast which has the 
-        //  correct type.
-        CI->replaceAllUsesWith(SI);
-      
-      //  Clean up the old call now that it has been completely upgraded.
-      CI->eraseFromParent();
-    } else if (F->getName() == "llvm.x86.sse41.pmulld") {
-      // Upgrade this set of intrinsics into vector multiplies.
-      Instruction *Mul = BinaryOperator::CreateMul(CI->getArgOperand(0),
-                                                   CI->getArgOperand(1),
-                                                   CI->getName(),
-                                                   CI);
-      // Fix up all the uses with our new multiply.
-      if (!CI->use_empty())
-        CI->replaceAllUsesWith(Mul);
-        
-      // Remove upgraded multiply.
-      CI->eraseFromParent();
-    } else if (F->getName() == "llvm.x86.ssse3.palign.r") {
-      Value *Op1 = CI->getArgOperand(0);
-      Value *Op2 = CI->getArgOperand(1);
-      Value *Op3 = CI->getArgOperand(2);
-      unsigned shiftVal = cast<ConstantInt>(Op3)->getZExtValue();
-      Value *Rep;
-      IRBuilder<> Builder(C);
-      Builder.SetInsertPoint(CI->getParent(), CI);
-
-      // If palignr is shifting the pair of input vectors less than 9 bytes,
-      // emit a shuffle instruction.
-      if (shiftVal <= 8) {
-        const Type *IntTy = Type::getInt32Ty(C);
-        const Type *EltTy = Type::getInt8Ty(C);
-        const Type *VecTy = VectorType::get(EltTy, 8);
-        
-        Op2 = Builder.CreateBitCast(Op2, VecTy);
-        Op1 = Builder.CreateBitCast(Op1, VecTy);
-
-        llvm::SmallVector<llvm::Constant*, 8> Indices;
-        for (unsigned i = 0; i != 8; ++i)
-          Indices.push_back(ConstantInt::get(IntTy, shiftVal + i));
-
-        Value *SV = ConstantVector::get(Indices);
-        Rep = Builder.CreateShuffleVector(Op2, Op1, SV, "palignr");
-        Rep = Builder.CreateBitCast(Rep, F->getReturnType());
-      }
-
-      // If palignr is shifting the pair of input vectors more than 8 but less
-      // than 16 bytes, emit a logical right shift of the destination.
-      else if (shiftVal < 16) {
-        // MMX has these as 1 x i64 vectors for some odd optimization reasons.
-        const Type *EltTy = Type::getInt64Ty(C);
-        const Type *VecTy = VectorType::get(EltTy, 1);
-
-        Op1 = Builder.CreateBitCast(Op1, VecTy, "cast");
-        Op2 = ConstantInt::get(VecTy, (shiftVal-8) * 8);
-
-        // create i32 constant
-        Function *I =
-          Intrinsic::getDeclaration(F->getParent(), Intrinsic::x86_mmx_psrl_q);
-        Rep = Builder.CreateCall2(I, Op1, Op2, "palignr");
-      }
-
-      // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
-      else {
-        Rep = Constant::getNullValue(F->getReturnType());
-      }
-      
-      // Replace any uses with our new instruction.
-      if (!CI->use_empty())
-        CI->replaceAllUsesWith(Rep);
-        
-      // Remove upgraded instruction.
-      CI->eraseFromParent();
-      
-    } else if (F->getName() == "llvm.x86.ssse3.palign.r.128") {
-      Value *Op1 = CI->getArgOperand(0);
-      Value *Op2 = CI->getArgOperand(1);
-      Value *Op3 = CI->getArgOperand(2);
-      unsigned shiftVal = cast<ConstantInt>(Op3)->getZExtValue();
-      Value *Rep;
-      IRBuilder<> Builder(C);
-      Builder.SetInsertPoint(CI->getParent(), CI);
-
-      // If palignr is shifting the pair of input vectors less than 17 bytes,
-      // emit a shuffle instruction.
-      if (shiftVal <= 16) {
-        const Type *IntTy = Type::getInt32Ty(C);
-        const Type *EltTy = Type::getInt8Ty(C);
-        const Type *VecTy = VectorType::get(EltTy, 16);
-        
-        Op2 = Builder.CreateBitCast(Op2, VecTy);
-        Op1 = Builder.CreateBitCast(Op1, VecTy);
-
-        llvm::SmallVector<llvm::Constant*, 16> Indices;
-        for (unsigned i = 0; i != 16; ++i)
-          Indices.push_back(ConstantInt::get(IntTy, shiftVal + i));
-
-        Value *SV = ConstantVector::get(Indices);
-        Rep = Builder.CreateShuffleVector(Op2, Op1, SV, "palignr");
-        Rep = Builder.CreateBitCast(Rep, F->getReturnType());
-      }
-
-      // If palignr is shifting the pair of input vectors more than 16 but less
-      // than 32 bytes, emit a logical right shift of the destination.
-      else if (shiftVal < 32) {
-        const Type *EltTy = Type::getInt64Ty(C);
-        const Type *VecTy = VectorType::get(EltTy, 2);
-        const Type *IntTy = Type::getInt32Ty(C);
-
-        Op1 = Builder.CreateBitCast(Op1, VecTy, "cast");
-        Op2 = ConstantInt::get(IntTy, (shiftVal-16) * 8);
-
-        // create i32 constant
-        Function *I =
-          Intrinsic::getDeclaration(F->getParent(), Intrinsic::x86_sse2_psrl_dq);
-        Rep = Builder.CreateCall2(I, Op1, Op2, "palignr");
-      }
-
-      // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
-      else {
-        Rep = Constant::getNullValue(F->getReturnType());
-      }
-      
-      // Replace any uses with our new instruction.
-      if (!CI->use_empty())
-        CI->replaceAllUsesWith(Rep);
-        
-      // Remove upgraded instruction.
-      CI->eraseFromParent();
-    
-    } else if (F->getName() == "llvm.x86.sse.loadu.ps" ||
-               F->getName() == "llvm.x86.sse2.loadu.dq" ||
-               F->getName() == "llvm.x86.sse2.loadu.pd") {
+    if (F->getName() == "llvm.x86.sse.loadu.ps" ||
+        F->getName() == "llvm.x86.sse2.loadu.dq" ||
+        F->getName() == "llvm.x86.sse2.loadu.pd") {
       // Convert to a native, unaligned load.
       const Type *VecTy = CI->getType();
       const Type *IntTy = IntegerType::get(C, 128);
@@ -1064,310 +204,6 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
   }
 
   switch (NewFn->getIntrinsicID()) {
-  default: llvm_unreachable("Unknown function for CallInst upgrade.");
-  case Intrinsic::arm_neon_vld1:
-  case Intrinsic::arm_neon_vld2:
-  case Intrinsic::arm_neon_vld3:
-  case Intrinsic::arm_neon_vld4:
-  case Intrinsic::arm_neon_vst1:
-  case Intrinsic::arm_neon_vst2:
-  case Intrinsic::arm_neon_vst3:
-  case Intrinsic::arm_neon_vst4:
-  case Intrinsic::arm_neon_vld2lane:
-  case Intrinsic::arm_neon_vld3lane:
-  case Intrinsic::arm_neon_vld4lane:
-  case Intrinsic::arm_neon_vst2lane:
-  case Intrinsic::arm_neon_vst3lane:
-  case Intrinsic::arm_neon_vst4lane: {
-    // Add a default alignment argument of 1.
-    SmallVector<Value*, 8> Operands(CS.arg_begin(), CS.arg_end());
-    Operands.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
-    CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(),
-                                       CI->getName(), CI);
-    NewCI->setTailCall(CI->isTailCall());
-    NewCI->setCallingConv(CI->getCallingConv());
-
-    //  Handle any uses of the old CallInst.
-    if (!CI->use_empty())
-      //  Replace all uses of the old call with the new cast which has the 
-      //  correct type.
-      CI->replaceAllUsesWith(NewCI);
-    
-    //  Clean up the old call now that it has been completely upgraded.
-    CI->eraseFromParent();
-    break;
-  }        
-
-  case Intrinsic::x86_mmx_padd_b:
-  case Intrinsic::x86_mmx_padd_w:
-  case Intrinsic::x86_mmx_padd_d:
-  case Intrinsic::x86_mmx_padd_q:
-  case Intrinsic::x86_mmx_padds_b:
-  case Intrinsic::x86_mmx_padds_w:
-  case Intrinsic::x86_mmx_paddus_b:
-  case Intrinsic::x86_mmx_paddus_w:
-  case Intrinsic::x86_mmx_psub_b:
-  case Intrinsic::x86_mmx_psub_w:
-  case Intrinsic::x86_mmx_psub_d:
-  case Intrinsic::x86_mmx_psub_q:
-  case Intrinsic::x86_mmx_psubs_b:
-  case Intrinsic::x86_mmx_psubs_w:
-  case Intrinsic::x86_mmx_psubus_b:
-  case Intrinsic::x86_mmx_psubus_w:
-  case Intrinsic::x86_mmx_pmulh_w:
-  case Intrinsic::x86_mmx_pmull_w:
-  case Intrinsic::x86_mmx_pmulhu_w:
-  case Intrinsic::x86_mmx_pmulu_dq:
-  case Intrinsic::x86_mmx_pmadd_wd:
-  case Intrinsic::x86_mmx_pand:
-  case Intrinsic::x86_mmx_pandn:
-  case Intrinsic::x86_mmx_por:
-  case Intrinsic::x86_mmx_pxor:
-  case Intrinsic::x86_mmx_pavg_b:
-  case Intrinsic::x86_mmx_pavg_w:
-  case Intrinsic::x86_mmx_pmaxu_b:
-  case Intrinsic::x86_mmx_pmaxs_w:
-  case Intrinsic::x86_mmx_pminu_b:
-  case Intrinsic::x86_mmx_pmins_w:
-  case Intrinsic::x86_mmx_psad_bw:
-  case Intrinsic::x86_mmx_psll_w:
-  case Intrinsic::x86_mmx_psll_d:
-  case Intrinsic::x86_mmx_psll_q:
-  case Intrinsic::x86_mmx_pslli_w:
-  case Intrinsic::x86_mmx_pslli_d:
-  case Intrinsic::x86_mmx_pslli_q:
-  case Intrinsic::x86_mmx_psrl_w:
-  case Intrinsic::x86_mmx_psrl_d:
-  case Intrinsic::x86_mmx_psrl_q:
-  case Intrinsic::x86_mmx_psrli_w:
-  case Intrinsic::x86_mmx_psrli_d:
-  case Intrinsic::x86_mmx_psrli_q:
-  case Intrinsic::x86_mmx_psra_w:
-  case Intrinsic::x86_mmx_psra_d:
-  case Intrinsic::x86_mmx_psrai_w:
-  case Intrinsic::x86_mmx_psrai_d:
-  case Intrinsic::x86_mmx_packsswb:
-  case Intrinsic::x86_mmx_packssdw:
-  case Intrinsic::x86_mmx_packuswb:
-  case Intrinsic::x86_mmx_punpckhbw:
-  case Intrinsic::x86_mmx_punpckhwd:
-  case Intrinsic::x86_mmx_punpckhdq:
-  case Intrinsic::x86_mmx_punpcklbw:
-  case Intrinsic::x86_mmx_punpcklwd:
-  case Intrinsic::x86_mmx_punpckldq:
-  case Intrinsic::x86_mmx_pcmpeq_b:
-  case Intrinsic::x86_mmx_pcmpeq_w:
-  case Intrinsic::x86_mmx_pcmpeq_d:
-  case Intrinsic::x86_mmx_pcmpgt_b:
-  case Intrinsic::x86_mmx_pcmpgt_w:
-  case Intrinsic::x86_mmx_pcmpgt_d: {
-    Value *Operands[2];
-    
-    // Cast the operand to the X86 MMX type.
-    Operands[0] = new BitCastInst(CI->getArgOperand(0), 
-                                  NewFn->getFunctionType()->getParamType(0),
-                                  "upgraded.", CI);
-
-    switch (NewFn->getIntrinsicID()) {
-    default:
-      // Cast to the X86 MMX type.
-      Operands[1] = new BitCastInst(CI->getArgOperand(1), 
-                                    NewFn->getFunctionType()->getParamType(1),
-                                    "upgraded.", CI);
-      break;
-    case Intrinsic::x86_mmx_pslli_w:
-    case Intrinsic::x86_mmx_pslli_d:
-    case Intrinsic::x86_mmx_pslli_q:
-    case Intrinsic::x86_mmx_psrli_w:
-    case Intrinsic::x86_mmx_psrli_d:
-    case Intrinsic::x86_mmx_psrli_q:
-    case Intrinsic::x86_mmx_psrai_w:
-    case Intrinsic::x86_mmx_psrai_d:
-      // These take an i32 as their second parameter.
-      Operands[1] = CI->getArgOperand(1);
-      break;
-    }
-
-    ConstructNewCallInst(NewFn, CI, Operands, 2);
-    break;
-  }
-  case Intrinsic::x86_mmx_maskmovq: {
-    Value *Operands[3];
-
-    // Cast the operands to the X86 MMX type.
-    Operands[0] = new BitCastInst(CI->getArgOperand(0), 
-                                  NewFn->getFunctionType()->getParamType(0),
-                                  "upgraded.", CI);
-    Operands[1] = new BitCastInst(CI->getArgOperand(1), 
-                                  NewFn->getFunctionType()->getParamType(1),
-                                  "upgraded.", CI);
-    Operands[2] = CI->getArgOperand(2);
-
-    ConstructNewCallInst(NewFn, CI, Operands, 3, false);
-    break;
-  }
-  case Intrinsic::x86_mmx_pmovmskb: {
-    Value *Operands[1];
-
-    // Cast the operand to the X86 MMX type.
-    Operands[0] = new BitCastInst(CI->getArgOperand(0), 
-                                  NewFn->getFunctionType()->getParamType(0),
-                                  "upgraded.", CI);
-
-    ConstructNewCallInst(NewFn, CI, Operands, 1);
-    break;
-  }
-  case Intrinsic::x86_mmx_movnt_dq: {
-    Value *Operands[2];
-
-    Operands[0] = CI->getArgOperand(0);
-
-    // Cast the operand to the X86 MMX type.
-    Operands[1] = new BitCastInst(CI->getArgOperand(1),
-                                  NewFn->getFunctionType()->getParamType(1),
-                                  "upgraded.", CI);
-
-    ConstructNewCallInst(NewFn, CI, Operands, 2, false);
-    break;
-  }
-  case Intrinsic::x86_mmx_palignr_b: {
-    Value *Operands[3];
-
-    // Cast the operands to the X86 MMX type.
-    Operands[0] = new BitCastInst(CI->getArgOperand(0),
-                                  NewFn->getFunctionType()->getParamType(0),
-                                  "upgraded.", CI);
-    Operands[1] = new BitCastInst(CI->getArgOperand(1),
-                                  NewFn->getFunctionType()->getParamType(1),
-                                  "upgraded.", CI);
-    Operands[2] = CI->getArgOperand(2);
-
-    ConstructNewCallInst(NewFn, CI, Operands, 3);
-    break;
-  }
-  case Intrinsic::x86_mmx_pextr_w: {
-    Value *Operands[2];
-
-    // Cast the operands to the X86 MMX type.
-    Operands[0] = new BitCastInst(CI->getArgOperand(0),
-                                  NewFn->getFunctionType()->getParamType(0),
-                                  "upgraded.", CI);
-    Operands[1] = CI->getArgOperand(1);
-
-    ConstructNewCallInst(NewFn, CI, Operands, 2);
-    break;
-  }
-  case Intrinsic::x86_mmx_pinsr_w: {
-    Value *Operands[3];
-
-    // Cast the operands to the X86 MMX type.
-    Operands[0] = new BitCastInst(CI->getArgOperand(0),
-                                  NewFn->getFunctionType()->getParamType(0),
-                                  "upgraded.", CI);
-    Operands[1] = CI->getArgOperand(1);
-    Operands[2] = CI->getArgOperand(2);
-
-    ConstructNewCallInst(NewFn, CI, Operands, 3);
-    break;
-  }
-  case Intrinsic::x86_sse_pshuf_w: {
-    IRBuilder<> Builder(C);
-    Builder.SetInsertPoint(CI->getParent(), CI);
-
-    // Cast the operand to the X86 MMX type.
-    Value *Operands[2];
-    Operands[0] =
-      Builder.CreateBitCast(CI->getArgOperand(0), 
-                            NewFn->getFunctionType()->getParamType(0),
-                            "upgraded.");
-    Operands[1] =
-      Builder.CreateTrunc(CI->getArgOperand(1),
-                          Type::getInt8Ty(C),
-                          "upgraded.");
-
-    ConstructNewCallInst(NewFn, CI, Operands, 2);
-    break;
-  }
-
-  case Intrinsic::ctlz:
-  case Intrinsic::ctpop:
-  case Intrinsic::cttz: {
-    //  Build a small vector of the original arguments.
-    SmallVector<Value*, 8> Operands(CS.arg_begin(), CS.arg_end());
-
-    //  Construct a new CallInst
-    CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(),
-                                       "upgraded."+CI->getName(), CI);
-    NewCI->setTailCall(CI->isTailCall());
-    NewCI->setCallingConv(CI->getCallingConv());
-
-    //  Handle any uses of the old CallInst.
-    if (!CI->use_empty()) {
-      //  Check for sign extend parameter attributes on the return values.
-      bool SrcSExt = NewFn->getAttributes().paramHasAttr(0, Attribute::SExt);
-      bool DestSExt = F->getAttributes().paramHasAttr(0, Attribute::SExt);
-      
-      //  Construct an appropriate cast from the new return type to the old.
-      CastInst *RetCast = CastInst::Create(
-                            CastInst::getCastOpcode(NewCI, SrcSExt,
-                                                    F->getReturnType(),
-                                                    DestSExt),
-                            NewCI, F->getReturnType(),
-                            NewCI->getName(), CI);
-      NewCI->moveBefore(RetCast);
-
-      //  Replace all uses of the old call with the new cast which has the 
-      //  correct type.
-      CI->replaceAllUsesWith(RetCast);
-    }
-
-    //  Clean up the old call now that it has been completely upgraded.
-    CI->eraseFromParent();
-  }
-  break;
-  case Intrinsic::eh_selector:
-  case Intrinsic::eh_typeid_for: {
-    // Only the return type changed.
-    SmallVector<Value*, 8> Operands(CS.arg_begin(), CS.arg_end());
-    CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(),
-                                       "upgraded." + CI->getName(), CI);
-    NewCI->setTailCall(CI->isTailCall());
-    NewCI->setCallingConv(CI->getCallingConv());
-
-    //  Handle any uses of the old CallInst.
-    if (!CI->use_empty()) {
-      //  Construct an appropriate cast from the new return type to the old.
-      CastInst *RetCast =
-        CastInst::Create(CastInst::getCastOpcode(NewCI, true,
-                                                 F->getReturnType(), true),
-                         NewCI, F->getReturnType(), NewCI->getName(), CI);
-      CI->replaceAllUsesWith(RetCast);
-    }
-    CI->eraseFromParent();
-  }
-  break;
-  case Intrinsic::memcpy:
-  case Intrinsic::memmove:
-  case Intrinsic::memset: {
-    // Add isVolatile
-    const llvm::Type *I1Ty = llvm::Type::getInt1Ty(CI->getContext());
-    Value *Operands[5] = { CI->getArgOperand(0), CI->getArgOperand(1),
-                           CI->getArgOperand(2), CI->getArgOperand(3),
-                           llvm::ConstantInt::get(I1Ty, 0) };
-    CallInst *NewCI = CallInst::Create(NewFn, Operands, Operands+5,
-                                       CI->getName(), CI);
-    NewCI->setTailCall(CI->isTailCall());
-    NewCI->setCallingConv(CI->getCallingConv());
-    //  Handle any uses of the old CallInst.
-    if (!CI->use_empty())
-      //  Replace all uses of the old call with the new cast which has the 
-      //  correct type.
-      CI->replaceAllUsesWith(NewCI);
-    
-    //  Clean up the old call now that it has been completely upgraded.
-    CI->eraseFromParent();
-    break;
-  }
   case Intrinsic::prefetch: {
     IRBuilder<> Builder(C);
     Builder.SetInsertPoint(CI->getParent(), CI);
@@ -1401,13 +237,13 @@ void llvm::UpgradeCallsToIntrinsic(Function* F) {
   assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
 
   // Upgrade the function and check if it is a totaly new function.
-  Function* NewFn;
+  Function *NewFn;
   if (UpgradeIntrinsicFunction(F, NewFn)) {
     if (NewFn != F) {
       // Replace all uses to the old function with the new one if necessary.
       for (Value::use_iterator UI = F->use_begin(), UE = F->use_end();
            UI != UE; ) {
-        if (CallInst* CI = dyn_cast<CallInst>(*UI++))
+        if (CallInst *CI = dyn_cast<CallInst>(*UI++))
           UpgradeIntrinsicCall(CI, NewFn);
       }
       // Remove old function, no longer used, from the module.
@@ -1420,37 +256,27 @@ void llvm::UpgradeCallsToIntrinsic(Function* F) {
 /// If an llvm.dbg.declare intrinsic is invalid, then this function simply
 /// strips that use.
 void llvm::CheckDebugInfoIntrinsics(Module *M) {
-
-
   if (Function *FuncStart = M->getFunction("llvm.dbg.func.start")) {
-    while (!FuncStart->use_empty()) {
-      CallInst *CI = cast<CallInst>(FuncStart->use_back());
-      CI->eraseFromParent();
-    }
+    while (!FuncStart->use_empty())
+      cast<CallInst>(FuncStart->use_back())->eraseFromParent();
     FuncStart->eraseFromParent();
   }
   
   if (Function *StopPoint = M->getFunction("llvm.dbg.stoppoint")) {
-    while (!StopPoint->use_empty()) {
-      CallInst *CI = cast<CallInst>(StopPoint->use_back());
-      CI->eraseFromParent();
-    }
+    while (!StopPoint->use_empty())
+      cast<CallInst>(StopPoint->use_back())->eraseFromParent();
     StopPoint->eraseFromParent();
   }
 
   if (Function *RegionStart = M->getFunction("llvm.dbg.region.start")) {
-    while (!RegionStart->use_empty()) {
-      CallInst *CI = cast<CallInst>(RegionStart->use_back());
-      CI->eraseFromParent();
-    }
+    while (!RegionStart->use_empty())
+      cast<CallInst>(RegionStart->use_back())->eraseFromParent();
     RegionStart->eraseFromParent();
   }
 
   if (Function *RegionEnd = M->getFunction("llvm.dbg.region.end")) {
-    while (!RegionEnd->use_empty()) {
-      CallInst *CI = cast<CallInst>(RegionEnd->use_back());
-      CI->eraseFromParent();
-    }
+    while (!RegionEnd->use_empty())
+      cast<CallInst>(RegionEnd->use_back())->eraseFromParent();
     RegionEnd->eraseFromParent();
   }
   
diff --git a/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll b/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll
index 062ea59..ebd349a 100644
--- a/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll
+++ b/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll
@@ -15,12 +15,12 @@ define void @test0() {
 ; CHECK: NoModRef:   call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i32 1, i1 false) <->   call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i32 1, i1 false)
 ; CHECK: NoModRef:   call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i32 1, i1 false) <->   call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i32 1, i1 false)
 
-declare void @llvm.memset.i64(i8*, i8, i64, i32)
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
 
 @A = external global i8
 @B = external global i8
 define void @test1() {
-  call void @llvm.memset.i64(i8* @A, i8 0, i64 1, i32 1)
-  call void @llvm.memset.i64(i8* @B, i8 0, i64 1, i32 1)
+  call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i32 1, i1 false)
   ret void
 }
diff --git a/test/Analysis/BasicAA/modref.ll b/test/Analysis/BasicAA/modref.ll
index 7318a89..233396b 100644
--- a/test/Analysis/BasicAA/modref.ll
+++ b/test/Analysis/BasicAA/modref.ll
@@ -1,10 +1,6 @@
 ; RUN: opt < %s -basicaa -gvn -dse -S | FileCheck %s
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
-declare void @llvm.memset.i32(i8*, i8, i32, i32)
-declare void @llvm.memset.i8(i8*, i8, i8, i32)
-declare void @llvm.memcpy.i8(i8*, i8*, i8, i32)
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
 declare void @llvm.lifetime.end(i64, i8* nocapture)
 
 declare void @external(i32*) 
@@ -15,7 +11,7 @@ define i32 @test0(i8* %P) {
   
   store i32 0, i32* %A
   
-  call void @llvm.memset.i32(i8* %P, i8 0, i32 42, i32 1)
+  call void @llvm.memset.p0i8.i32(i8* %P, i8 0, i32 42, i32 1, i1 false)
   
   %B = load i32* %A
   ret i32 %B
@@ -31,7 +27,7 @@ define i8 @test1() {
 
   store i8 2, i8* %B  ;; Not written to by memcpy
 
-  call void @llvm.memcpy.i8(i8* %A, i8* %B, i8 -1, i32 0)
+  call void @llvm.memcpy.p0i8.p0i8.i8(i8* %A, i8* %B, i8 -1, i32 0, i1 false)
 
   %C = load i8* %B
   ret i8 %C
@@ -42,7 +38,7 @@ define i8 @test2(i8* %P) {
 ; CHECK: @test2
   %P2 = getelementptr i8* %P, i32 127
   store i8 1, i8* %P2  ;; Not dead across memset
-  call void @llvm.memset.i8(i8* %P, i8 2, i8 127, i32 0)
+  call void @llvm.memset.p0i8.i8(i8* %P, i8 2, i8 127, i32 0, i1 false)
   %A = load i8* %P2
   ret i8 %A
 ; CHECK: ret i8 1
@@ -55,7 +51,7 @@ define i8 @test2a(i8* %P) {
   ;; FIXME: DSE isn't zapping this dead store.
   store i8 1, i8* %P2  ;; Dead, clobbered by memset.
   
-  call void @llvm.memset.i8(i8* %P, i8 2, i8 127, i32 0)
+  call void @llvm.memset.p0i8.i8(i8* %P, i8 2, i8 127, i32 0, i1 false)
   %A = load i8* %P2
   ret i8 %A
 ; CHECK-NOT: load
@@ -95,7 +91,7 @@ define void @test3a(i8* %P, i8 %X) {
 
 define i32 @test4(i8* %P) {
   %tmp = load i32* @G1
-  call void @llvm.memset.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8 0, i32 4000, i32 1)
+  call void @llvm.memset.p0i8.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8 0, i32 4000, i32 1, i1 false)
   %tmp2 = load i32* @G1
   %sub = sub i32 %tmp2, %tmp
   ret i32 %sub
@@ -110,7 +106,7 @@ define i32 @test4(i8* %P) {
 ; write to G1.
 define i32 @test5(i8* %P, i32 %Len) {
   %tmp = load i32* @G1
-  call void @llvm.memcpy.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8* bitcast (i32* @G1 to i8*), i32 %Len, i32 1)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8* bitcast (i32* @G1 to i8*), i32 %Len, i32 1, i1 false)
   %tmp2 = load i32* @G1
   %sub = sub i32 %tmp2, %tmp
   ret i32 %sub
@@ -132,3 +128,9 @@ define i8 @test6(i8* %p, i8* noalias %a) {
 ; CHECK-NOT: load
 ; CHECK: ret
 }
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
diff --git a/test/Analysis/ScalarEvolution/avoid-smax-1.ll b/test/Analysis/ScalarEvolution/avoid-smax-1.ll
index 89e8b98..474d564 100644
--- a/test/Analysis/ScalarEvolution/avoid-smax-1.ll
+++ b/test/Analysis/ScalarEvolution/avoid-smax-1.ll
@@ -173,7 +173,7 @@ bb23:		; preds = %bb24, %bb.nph
 	%55 = mul i32 %y.21, %w		; <i32> [#uses=1]
 	%.sum5 = add i32 %55, %.sum3		; <i32> [#uses=1]
 	%56 = getelementptr i8* %j, i32 %.sum5		; <i8*> [#uses=1]
-	tail call void @llvm.memcpy.i32(i8* %56, i8* %54, i32 %w, i32 1)
+	tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %56, i8* %54, i32 %w, i32 1, i1 false)
 	%57 = add i32 %y.21, 1		; <i32> [#uses=2]
 	br label %bb24
 
@@ -190,7 +190,7 @@ bb26:		; preds = %bb24.bb26_crit_edge, %bb22
 	%60 = getelementptr i8* %j, i32 %.sum4		; <i8*> [#uses=1]
 	%61 = mul i32 %x, %w		; <i32> [#uses=1]
 	%62 = sdiv i32 %61, 2		; <i32> [#uses=1]
-	tail call void @llvm.memset.i32(i8* %60, i8 -128, i32 %62, i32 1)
+	tail call void @llvm.memset.p0i8.i32(i8* %60, i8 -128, i32 %62, i32 1, i1 false)
 	ret void
 
 bb29:		; preds = %bb20, %entry
@@ -208,7 +208,7 @@ bb30:		; preds = %bb31, %bb.nph11
 	%67 = getelementptr i8* %r, i32 %66		; <i8*> [#uses=1]
 	%68 = mul i32 %y.310, %w		; <i32> [#uses=1]
 	%69 = getelementptr i8* %j, i32 %68		; <i8*> [#uses=1]
-	tail call void @llvm.memcpy.i32(i8* %69, i8* %67, i32 %w, i32 1)
+	tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %69, i8* %67, i32 %w, i32 1, i1 false)
 	%70 = add i32 %y.310, 1		; <i32> [#uses=2]
 	br label %bb31
 
@@ -224,13 +224,12 @@ bb33:		; preds = %bb31.bb33_crit_edge, %bb29
 	%73 = getelementptr i8* %j, i32 %72		; <i8*> [#uses=1]
 	%74 = mul i32 %x, %w		; <i32> [#uses=1]
 	%75 = sdiv i32 %74, 2		; <i32> [#uses=1]
-	tail call void @llvm.memset.i32(i8* %73, i8 -128, i32 %75, i32 1)
+	tail call void @llvm.memset.p0i8.i32(i8* %73, i8 -128, i32 %75, i32 1, i1 false)
 	ret void
 
 return:		; preds = %bb20
 	ret void
 }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
-
-declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/Analysis/ScalarEvolution/trip-count3.ll b/test/Analysis/ScalarEvolution/trip-count3.ll
index 10b798b..1bf86ae 100644
--- a/test/Analysis/ScalarEvolution/trip-count3.ll
+++ b/test/Analysis/ScalarEvolution/trip-count3.ll
@@ -5,15 +5,14 @@
 ; dividing by the stride will have a remainder. This could theoretically
 ; be teaching it how to use a more elaborate trip count computation.
 
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-target triple = "x86_64-unknown-linux-gnu"
-	%struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
-	%struct.SHA_INFO = type { [5 x i32], i32, i32, [16 x i32] }
-	%struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }
-@_2E_str = external constant [26 x i8]		; <[26 x i8]*> [#uses=0]
-@stdin = external global %struct.FILE*		; <%struct.FILE**> [#uses=0]
-@_2E_str1 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
-@_2E_str12 = external constant [30 x i8]		; <[30 x i8]*> [#uses=0]
+%struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
+%struct.SHA_INFO = type { [5 x i32], i32, i32, [16 x i32] }
+%struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }
+
+@_2E_str = external constant [26 x i8]
+@stdin = external global %struct.FILE*
+@_2E_str1 = external constant [3 x i8]
+@_2E_str12 = external constant [30 x i8]
 
 declare void @sha_init(%struct.SHA_INFO* nocapture) nounwind
 
@@ -25,12 +24,8 @@ declare i32 @printf(i8* nocapture, ...) nounwind
 
 declare void @sha_final(%struct.SHA_INFO* nocapture) nounwind
 
-declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
-
 declare void @sha_update(%struct.SHA_INFO* nocapture, i8* nocapture, i32) nounwind
 
-declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
-
 declare i64 @fread(i8* noalias nocapture, i64, i64, %struct.FILE* noalias nocapture) nounwind
 
 declare i32 @main(i32, i8** nocapture) nounwind
@@ -43,36 +38,41 @@ declare void @sha_stream(%struct.SHA_INFO* nocapture, %struct.FILE* nocapture) n
 
 define void @sha_stream_bb3_2E_i(%struct.SHA_INFO* %sha_info, i8* %data1, i32, i8** %buffer_addr.0.i.out, i32* %count_addr.0.i.out) nounwind {
 newFuncRoot:
-	br label %bb3.i
-
-sha_update.exit.exitStub:		; preds = %bb3.i
-	store i8* %buffer_addr.0.i, i8** %buffer_addr.0.i.out
-	store i32 %count_addr.0.i, i32* %count_addr.0.i.out
-	ret void
-
-bb2.i:		; preds = %bb3.i
-	%1 = getelementptr %struct.SHA_INFO* %sha_info, i64 0, i32 3		; <[16 x i32]*> [#uses=1]
-	%2 = bitcast [16 x i32]* %1 to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i64(i8* %2, i8* %buffer_addr.0.i, i64 64, i32 1) nounwind
-	%3 = getelementptr %struct.SHA_INFO* %sha_info, i64 0, i32 3, i64 0		; <i32*> [#uses=1]
-	%4 = bitcast i32* %3 to i8*		; <i8*> [#uses=1]
-	br label %codeRepl
-
-codeRepl:		; preds = %bb2.i
-	call void @sha_stream_bb3_2E_i_bb1_2E_i_2E_i(i8* %4)
-	br label %byte_reverse.exit.i
-
-byte_reverse.exit.i:		; preds = %codeRepl
-	call fastcc void @sha_transform(%struct.SHA_INFO* %sha_info) nounwind
-	%5 = getelementptr i8* %buffer_addr.0.i, i64 64		; <i8*> [#uses=1]
-	%6 = add i32 %count_addr.0.i, -64		; <i32> [#uses=1]
-	br label %bb3.i
-
-bb3.i:		; preds = %byte_reverse.exit.i, %newFuncRoot
-	%buffer_addr.0.i = phi i8* [ %data1, %newFuncRoot ], [ %5, %byte_reverse.exit.i ]		; <i8*> [#uses=3]
-	%count_addr.0.i = phi i32 [ %0, %newFuncRoot ], [ %6, %byte_reverse.exit.i ]		; <i32> [#uses=3]
-	%7 = icmp sgt i32 %count_addr.0.i, 63		; <i1> [#uses=1]
-	br i1 %7, label %bb2.i, label %sha_update.exit.exitStub
+  br label %bb3.i
+
+sha_update.exit.exitStub:                         ; preds = %bb3.i
+  store i8* %buffer_addr.0.i, i8** %buffer_addr.0.i.out
+  store i32 %count_addr.0.i, i32* %count_addr.0.i.out
+  ret void
+
+bb2.i:                                            ; preds = %bb3.i
+  %1 = getelementptr %struct.SHA_INFO* %sha_info, i64 0, i32 3
+  %2 = bitcast [16 x i32]* %1 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %buffer_addr.0.i, i64 64, i32 1, i1 false)
+  %3 = getelementptr %struct.SHA_INFO* %sha_info, i64 0, i32 3, i64 0
+  %4 = bitcast i32* %3 to i8*
+  br label %codeRepl
+
+codeRepl:                                         ; preds = %bb2.i
+  call void @sha_stream_bb3_2E_i_bb1_2E_i_2E_i(i8* %4)
+  br label %byte_reverse.exit.i
+
+byte_reverse.exit.i:                              ; preds = %codeRepl
+  call fastcc void @sha_transform(%struct.SHA_INFO* %sha_info) nounwind
+  %5 = getelementptr i8* %buffer_addr.0.i, i64 64
+  %6 = add i32 %count_addr.0.i, -64
+  br label %bb3.i
+
+bb3.i:                                            ; preds = %byte_reverse.exit.i, %newFuncRoot
+  %buffer_addr.0.i = phi i8* [ %data1, %newFuncRoot ], [ %5, %byte_reverse.exit.i ]
+  %count_addr.0.i = phi i32 [ %0, %newFuncRoot ], [ %6, %byte_reverse.exit.i ]
+  %7 = icmp sgt i32 %count_addr.0.i, 63
+  br i1 %7, label %bb2.i, label %sha_update.exit.exitStub
 }
 
 declare void @sha_stream_bb3_2E_i_bb1_2E_i_2E_i(i8*) nounwind
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
diff --git a/test/Assembler/AutoUpgradeIntrinsics.ll b/test/Assembler/AutoUpgradeIntrinsics.ll
index 20beb49..eb4ac76 100644
--- a/test/Assembler/AutoUpgradeIntrinsics.ll
+++ b/test/Assembler/AutoUpgradeIntrinsics.ll
@@ -1,87 +1,6 @@
 ; Tests to make sure intrinsics are automatically upgraded.
-; RUN: llvm-as < %s | llvm-dis | not grep {i32 @llvm\\.ct}
-; RUN: llvm-as < %s | llvm-dis | \
-; RUN:   not grep {llvm\\.part\\.set\\.i\[0-9\]*\\.i\[0-9\]*\\.i\[0-9\]*}
-; RUN: llvm-as < %s | llvm-dis | \
-; RUN:   not grep {llvm\\.part\\.select\\.i\[0-9\]*\\.i\[0-9\]*}
-; RUN: llvm-as < %s | llvm-dis | \
-; RUN:   not grep {llvm\\.bswap\\.i\[0-9\]*\\.i\[0-9\]*}
-; RUN: llvm-as < %s | llvm-dis | \
-; RUN:   not grep {llvm\\.x86\\.sse2\\.loadu}
-; RUN: llvm-as < %s | llvm-dis | \
-; RUN:   grep {llvm\\.x86\\.mmx\\.ps} | grep {x86_mmx} | count 16
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
 
-declare i32 @llvm.ctpop.i28(i28 %val)
-declare i32 @llvm.cttz.i29(i29 %val)
-declare i32 @llvm.ctlz.i30(i30 %val)
-
-define i32 @test_ct(i32 %A) {
-  %c1 = call i32 @llvm.ctpop.i28(i28 1234)
-  %c2 = call i32 @llvm.cttz.i29(i29 2345)
-  %c3 = call i32 @llvm.ctlz.i30(i30 3456)
-  %r1 = add i32 %c1, %c2
-  %r2 = add i32 %r1, %c3
-  ret i32 %r2
-}
-
-declare i32 @llvm.part.set.i32.i32.i32(i32 %x, i32 %rep, i32 %hi, i32 %lo)
-declare i16 @llvm.part.set.i16.i16.i16(i16 %x, i16 %rep, i32 %hi, i32 %lo)
-define i32 @test_part_set(i32 %A, i16 %B) {
-  %a = call i32 @llvm.part.set.i32.i32.i32(i32 %A, i32 27, i32 8, i32 0)
-  %b = call i16 @llvm.part.set.i16.i16.i16(i16 %B, i16 27, i32 8, i32 0)
-  %c = zext i16 %b to i32
-  %d = add i32 %a, %c
-  ret i32 %d
-}
-
-declare i32 @llvm.part.select.i32.i32(i32 %x, i32 %hi, i32 %lo)
-declare i16 @llvm.part.select.i16.i16(i16 %x, i32 %hi, i32 %lo)
-define i32 @test_part_select(i32 %A, i16 %B) {
-  %a = call i32 @llvm.part.select.i32.i32(i32 %A, i32 8, i32 0)
-  %b = call i16 @llvm.part.select.i16.i16(i16 %B, i32 8, i32 0)
-  %c = zext i16 %b to i32
-  %d = add i32 %a, %c
-  ret i32 %d
-}
-
-declare i32 @llvm.bswap.i32.i32(i32 %x)
-declare i16 @llvm.bswap.i16.i16(i16 %x)
-define i32 @test_bswap(i32 %A, i16 %B) {
-  %a = call i32 @llvm.bswap.i32.i32(i32 %A)
-  %b = call i16 @llvm.bswap.i16.i16(i16 %B)
-  %c = zext i16 %b to i32
-  %d = add i32 %a, %c
-  ret i32 %d
-}
-
-declare <4 x i16> @llvm.x86.mmx.psra.w(<4 x i16>, <2 x i32>) nounwind readnone 
-declare <4 x i16> @llvm.x86.mmx.psll.w(<4 x i16>, <2 x i32>) nounwind readnone 
-declare <4 x i16> @llvm.x86.mmx.psrl.w(<4 x i16>, <2 x i32>) nounwind readnone 
-define void @sh16(<4 x i16> %A, <2 x i32> %B) {
-	%r1 = call <4 x i16> @llvm.x86.mmx.psra.w( <4 x i16> %A, <2 x i32> %B )		; <<4 x i16>> [#uses=0]
-	%r2 = call <4 x i16> @llvm.x86.mmx.psll.w( <4 x i16> %A, <2 x i32> %B )		; <<4 x i16>> [#uses=0]
-	%r3 = call <4 x i16> @llvm.x86.mmx.psrl.w( <4 x i16> %A, <2 x i32> %B )		; <<4 x i16>> [#uses=0]
-	ret void
-}
-
-declare <2 x i32> @llvm.x86.mmx.psra.d(<2 x i32>, <2 x i32>) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.psll.d(<2 x i32>, <2 x i32>) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.psrl.d(<2 x i32>, <2 x i32>) nounwind readnone 
-define void @sh32(<2 x i32> %A, <2 x i32> %B) {
-	%r1 = call <2 x i32> @llvm.x86.mmx.psra.d( <2 x i32> %A, <2 x i32> %B )		; <<2 x i32>> [#uses=0]
-	%r2 = call <2 x i32> @llvm.x86.mmx.psll.d( <2 x i32> %A, <2 x i32> %B )		; <<2 x i32>> [#uses=0]
-	%r3 = call <2 x i32> @llvm.x86.mmx.psrl.d( <2 x i32> %A, <2 x i32> %B )		; <<2 x i32>> [#uses=0]
-	ret void
-}
-
-declare <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64>, <2 x i32>) nounwind readnone 
-declare <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64>, <2 x i32>) nounwind readnone 
-define void @sh64(<1 x i64> %A, <2 x i32> %B) {
-	%r1 = call <1 x i64> @llvm.x86.mmx.psll.q( <1 x i64> %A, <2 x i32> %B )		; <<1 x i64>> [#uses=0]
-	%r2 = call <1 x i64> @llvm.x86.mmx.psrl.q( <1 x i64> %A, <2 x i32> %B )		; <<1 x i64>> [#uses=0]
-	ret void
-}
 
 declare <4 x float> @llvm.x86.sse.loadu.ps(i8*) nounwind readnone
 declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*) nounwind readnone
@@ -90,6 +9,10 @@ define void @test_loadu(i8* %a, double* %b) {
   %v0 = call <4 x float> @llvm.x86.sse.loadu.ps(i8* %a)
   %v1 = call <16 x i8> @llvm.x86.sse2.loadu.dq(i8* %a)
   %v2 = call <2 x double> @llvm.x86.sse2.loadu.pd(double* %b)
+
+; CHECK: load i128* {{.*}}, align 1
+; CHECK: load i128* {{.*}}, align 1
+; CHECK: load i128* {{.*}}, align 1
   ret void
 }
 
diff --git a/test/Assembler/AutoUpgradeMMXIntrinsics.ll b/test/Assembler/AutoUpgradeMMXIntrinsics.ll
deleted file mode 100644
index 54120ff..0000000
--- a/test/Assembler/AutoUpgradeMMXIntrinsics.ll
+++ /dev/null
@@ -1,223 +0,0 @@
-; Tests to make sure MMX intrinsics are automatically upgraded.
-; RUN: llvm-as < %s | llvm-dis -o %t
-; RUN: grep {llvm\\.x86\\.mmx} %t | not grep {\\\<1 x i64\\\>}
-; RUN: grep {llvm\\.x86\\.mmx} %t | not grep {\\\<2 x i32\\\>}
-; RUN: grep {llvm\\.x86\\.mmx} %t | not grep {\\\<4 x i16\\\>}
-; RUN: grep {llvm\\.x86\\.mmx} %t | not grep {\\\<8 x i8\\\>}
-; RUN: grep {llvm\\.x86\\.sse\\.pshuf\\.w} %t | not grep i32
-
-; Addition
-declare <8 x i8>  @llvm.x86.mmx.padd.b(<8 x i8>,  <8 x i8>)  nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.padd.w(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.x86.mmx.padd.d(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64>, <1 x i64>) nounwind readnone
-declare <8 x i8>  @llvm.x86.mmx.padds.b(<8 x i8>,  <8 x i8>)  nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.padds.w(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i8>  @llvm.x86.mmx.paddus.b(<8 x i8>,  <8 x i8>)  nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.paddus.w(<4 x i16>, <4 x i16>) nounwind readnone
-define void @add(<8 x i8> %A,  <8 x i8> %B,  <4 x i16> %C, <4 x i16> %D,
-                 <2 x i32> %E, <2 x i32> %F, <1 x i64> %G, <1 x i64> %H) {
-  %r1 = call <8 x i8>  @llvm.x86.mmx.padd.b(<8 x i8> %A,  <8 x i8> %B)
-  %r2 = call <4 x i16> @llvm.x86.mmx.padd.w(<4 x i16> %C, <4 x i16> %D)
-  %r3 = call <2 x i32> @llvm.x86.mmx.padd.d(<2 x i32> %E, <2 x i32> %F)
-  %r4 = call <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64> %G, <1 x i64> %H)
-  %r5 = call <8 x i8>  @llvm.x86.mmx.padds.b(<8 x i8>  %A, <8 x i8>  %B)
-  %r6 = call <4 x i16> @llvm.x86.mmx.padds.w(<4 x i16> %C, <4 x i16> %D)
-  %r7 = call <8 x i8>  @llvm.x86.mmx.paddus.b(<8 x i8>  %A, <8 x i8>  %B)
-  %r8 = call <4 x i16> @llvm.x86.mmx.paddus.w(<4 x i16> %C, <4 x i16> %D)
-  ret void
-}
-
-; Subtraction
-declare <8 x i8>  @llvm.x86.mmx.psub.b(<8 x i8>,  <8 x i8>)  nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.psub.w(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.x86.mmx.psub.d(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64>, <1 x i64>) nounwind readnone
-declare <8 x i8>  @llvm.x86.mmx.psubs.b(<8 x i8>,  <8 x i8>)  nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.psubs.w(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i8>  @llvm.x86.mmx.psubus.b(<8 x i8>,  <8 x i8>)  nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.psubus.w(<4 x i16>, <4 x i16>) nounwind readnone
-define void @sub(<8 x i8> %A,  <8 x i8> %B,  <4 x i16> %C, <4 x i16> %D,
-                 <2 x i32> %E, <2 x i32> %F, <1 x i64> %G, <1 x i64> %H) {
-  %r1 = call <8 x i8>  @llvm.x86.mmx.psub.b(<8 x i8> %A,  <8 x i8> %B)
-  %r2 = call <4 x i16> @llvm.x86.mmx.psub.w(<4 x i16> %C, <4 x i16> %D)
-  %r3 = call <2 x i32> @llvm.x86.mmx.psub.d(<2 x i32> %E, <2 x i32> %F)
-  %r4 = call <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64> %G, <1 x i64> %H)
-  %r5 = call <8 x i8>  @llvm.x86.mmx.psubs.b(<8 x i8>  %A, <8 x i8>  %B)
-  %r6 = call <4 x i16> @llvm.x86.mmx.psubs.w(<4 x i16> %C, <4 x i16> %D)
-  %r7 = call <8 x i8>  @llvm.x86.mmx.psubus.b(<8 x i8>  %A, <8 x i8>  %B)
-  %r8 = call <4 x i16> @llvm.x86.mmx.psubus.w(<4 x i16> %C, <4 x i16> %D)
-  ret void
-}
-
-; Multiplication
-declare <4 x i16> @llvm.x86.mmx.pmulh.w(<4 x i16>, <4 x i16>) nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.pmull.w(<4 x i16>, <4 x i16>) nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.pmulhu.w(<4 x i16>, <4 x i16>) nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.pmulu.dq(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.x86.mmx.pmadd.wd(<4 x i16>, <4 x i16>) nounwind readnone
-define void @mul(<4 x i16> %A, <4 x i16> %B) {
-  %r1 = call <4 x i16> @llvm.x86.mmx.pmulh.w(<4 x i16> %A, <4 x i16> %B)
-  %r2 = call <4 x i16> @llvm.x86.mmx.pmull.w(<4 x i16> %A, <4 x i16> %B)
-  %r3 = call <4 x i16> @llvm.x86.mmx.pmulhu.w(<4 x i16> %A, <4 x i16> %B)
-  %r4 = call <4 x i16> @llvm.x86.mmx.pmulu.dq(<4 x i16> %A, <4 x i16> %B)
-  %r5 = call <2 x i32> @llvm.x86.mmx.pmadd.wd(<4 x i16> %A, <4 x i16> %B)
-  ret void
-}
-
-; Bitwise operations
-declare <1 x i64> @llvm.x86.mmx.pand(<1 x i64>, <1 x i64>)  nounwind readnone
-declare <1 x i64> @llvm.x86.mmx.pandn(<1 x i64>, <1 x i64>) nounwind readnone
-declare <1 x i64> @llvm.x86.mmx.por(<1 x i64>, <1 x i64>)   nounwind readnone
-declare <1 x i64> @llvm.x86.mmx.pxor(<1 x i64>, <1 x i64>)  nounwind readnone
-define void @bit(<1 x i64> %A, <1 x i64> %B) {
-  %r1 = call <1 x i64> @llvm.x86.mmx.pand(<1 x i64> %A, <1 x i64> %B)
-  %r2 = call <1 x i64> @llvm.x86.mmx.pandn(<1 x i64> %A, <1 x i64> %B)
-  %r3 = call <1 x i64> @llvm.x86.mmx.por(<1 x i64> %A, <1 x i64> %B)
-  %r4 = call <1 x i64> @llvm.x86.mmx.pxor(<1 x i64> %A, <1 x i64> %B)
-  ret void
-}
-
-; Averages
-declare <8 x i8>  @llvm.x86.mmx.pavg.b(<8 x i8>,  <8 x i8>)  nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.pavg.w(<4 x i16>, <4 x i16>) nounwind readnone
-define void @avg(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D) {
-  %r1 = call <8 x i8>  @llvm.x86.mmx.pavg.b(<8 x i8>  %A, <8 x i8>  %B)
-  %r2 = call <4 x i16> @llvm.x86.mmx.pavg.w(<4 x i16> %C, <4 x i16> %D)
-  ret void
-}
-
-; Maximum
-declare <8 x i8>  @llvm.x86.mmx.pmaxu.b(<8 x i8>,  <8 x i8>)  nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.pmaxs.w(<4 x i16>, <4 x i16>) nounwind readnone
-define void @max(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D) {
-  %r1 = call <8 x i8>  @llvm.x86.mmx.pmaxu.b(<8 x i8>  %A, <8 x i8>  %B)
-  %r2 = call <4 x i16> @llvm.x86.mmx.pmaxs.w(<4 x i16> %C, <4 x i16> %D)
-  ret void
-}
-
-; Minimum
-declare <8 x i8>  @llvm.x86.mmx.pminu.b(<8 x i8>,  <8 x i8>)  nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.pmins.w(<4 x i16>, <4 x i16>) nounwind readnone
-define void @min(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D) {
-  %r1 = call <8 x i8>  @llvm.x86.mmx.pminu.b(<8 x i8>  %A, <8 x i8>  %B)
-  %r2 = call <4 x i16> @llvm.x86.mmx.pmins.w(<4 x i16> %C, <4 x i16> %D)
-  ret void
-}
-
-; Packed sum of absolute differences
-declare <4 x i16> @llvm.x86.mmx.psad.bw(<8 x i8>, <8 x i8>) nounwind readnone
-define void @psad(<8 x i8> %A, <8 x i8> %B) {
-  %r1 = call <4 x i16> @llvm.x86.mmx.psad.bw(<8 x i8> %A, <8 x i8> %B)
-  ret void
-}
-
-; Shift left
-declare <4 x i16> @llvm.x86.mmx.psll.w(<4 x i16>, <1 x i64>) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.psll.d(<2 x i32>, <1 x i64>) nounwind readnone 
-declare <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64>, <1 x i64>) nounwind readnone 
-declare <4 x i16> @llvm.x86.mmx.pslli.w(<4 x i16>, i32) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.pslli.d(<2 x i32>, i32) nounwind readnone 
-declare <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64>, i32) nounwind readnone 
-define void @shl(<4 x i16> %A, <2 x i32> %B, <1 x i64> %C, i32 %D) {
-  %r1 = call <4 x i16> @llvm.x86.mmx.psll.w(<4 x i16> %A, <1 x i64> %C)
-  %r2 = call <2 x i32> @llvm.x86.mmx.psll.d(<2 x i32> %B, <1 x i64> %C)
-  %r3 = call <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64> %C, <1 x i64> %C)
-  %r4 = call <4 x i16> @llvm.x86.mmx.pslli.w(<4 x i16> %A, i32 %D)
-  %r5 = call <2 x i32> @llvm.x86.mmx.pslli.d(<2 x i32> %B, i32 %D)
-  %r6 = call <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64> %C, i32 %D)
-  ret void
-}
-
-; Shift right logical
-declare <4 x i16> @llvm.x86.mmx.psrl.w(<4 x i16>, <1 x i64>) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.psrl.d(<2 x i32>, <1 x i64>) nounwind readnone 
-declare <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64>, <1 x i64>) nounwind readnone 
-declare <4 x i16> @llvm.x86.mmx.psrli.w(<4 x i16>, i32) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.psrli.d(<2 x i32>, i32) nounwind readnone 
-declare <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64>, i32) nounwind readnone 
-define void @shr(<4 x i16> %A, <2 x i32> %B, <1 x i64> %C, i32 %D) {
-  %r1 = call <4 x i16> @llvm.x86.mmx.psrl.w(<4 x i16> %A, <1 x i64> %C)
-  %r2 = call <2 x i32> @llvm.x86.mmx.psrl.d(<2 x i32> %B, <1 x i64> %C)
-  %r3 = call <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64> %C, <1 x i64> %C)
-  %r4 = call <4 x i16> @llvm.x86.mmx.psrli.w(<4 x i16> %A, i32 %D)
-  %r5 = call <2 x i32> @llvm.x86.mmx.psrli.d(<2 x i32> %B, i32 %D)
-  %r6 = call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> %C, i32 %D)
-  ret void
-}
-
-; Shift right arithmetic
-declare <4 x i16> @llvm.x86.mmx.psra.w(<4 x i16>, <1 x i64>) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.psra.d(<2 x i32>, <1 x i64>) nounwind readnone 
-declare <4 x i16> @llvm.x86.mmx.psrai.w(<4 x i16>, i32) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.psrai.d(<2 x i32>, i32) nounwind readnone 
-define void @sra(<4 x i16> %A, <2 x i32> %B, <1 x i64> %C, i32 %D) {
-  %r1 = call <4 x i16> @llvm.x86.mmx.psra.w(<4 x i16> %A, <1 x i64> %C)
-  %r2 = call <2 x i32> @llvm.x86.mmx.psra.d(<2 x i32> %B, <1 x i64> %C)
-  %r3 = call <4 x i16> @llvm.x86.mmx.psrai.w(<4 x i16> %A, i32 %D)
-  %r4 = call <2 x i32> @llvm.x86.mmx.psrai.d(<2 x i32> %B, i32 %D)
-  ret void
-}
-
-; Pack/Unpack ops
-declare <8 x i8>  @llvm.x86.mmx.packsswb(<4 x i16>, <4 x i16>) nounwind readnone 
-declare <4 x i16> @llvm.x86.mmx.packssdw(<2 x i32>, <2 x i32>) nounwind readnone 
-declare <8 x i8>  @llvm.x86.mmx.packuswb(<4 x i16>, <4 x i16>) nounwind readnone 
-declare <8 x i8>  @llvm.x86.mmx.punpckhbw(<8 x i8>, <8 x i8>) nounwind readnone 
-declare <4 x i16> @llvm.x86.mmx.punpckhwd(<4 x i16>, <4 x i16>) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.punpckhdq(<2 x i32>, <2 x i32>) nounwind readnone 
-declare <8 x i8>  @llvm.x86.mmx.punpcklbw(<8 x i8>, <8 x i8>) nounwind readnone 
-declare <4 x i16> @llvm.x86.mmx.punpcklwd(<4 x i16>, <4 x i16>) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.punpckldq(<2 x i32>, <2 x i32>) nounwind readnone 
-define void @pack_unpack(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D,
-                         <2 x i32> %E, <2 x i32> %F) {
-  %r1 = call <8 x i8>  @llvm.x86.mmx.packsswb(<4 x i16> %C, <4 x i16> %D)
-  %r2 = call <4 x i16> @llvm.x86.mmx.packssdw(<2 x i32> %E, <2 x i32> %F)
-  %r3 = call <8 x i8>  @llvm.x86.mmx.packuswb(<4 x i16> %C, <4 x i16> %D)
-  %r4 = call <8 x i8>  @llvm.x86.mmx.punpckhbw(<8 x i8>  %A, <8 x i8>  %B)
-  %r5 = call <4 x i16> @llvm.x86.mmx.punpckhwd(<4 x i16> %C, <4 x i16> %D)
-  %r6 = call <2 x i32> @llvm.x86.mmx.punpckhdq(<2 x i32> %E, <2 x i32> %F)
-  %r7 = call <8 x i8>  @llvm.x86.mmx.punpcklbw(<8 x i8>  %A, <8 x i8>  %B)
-  %r8 = call <4 x i16> @llvm.x86.mmx.punpcklwd(<4 x i16> %C, <4 x i16> %D)
-  %r9 = call <2 x i32> @llvm.x86.mmx.punpckldq(<2 x i32> %E, <2 x i32> %F)
-  ret void
-}
-
-; Integer comparison ops
-declare <8 x i8>  @llvm.x86.mmx.pcmpeq.b(<8 x i8>, <8 x i8>) nounwind readnone 
-declare <4 x i16> @llvm.x86.mmx.pcmpeq.w(<4 x i16>, <4 x i16>) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.pcmpeq.d(<2 x i32>, <2 x i32>) nounwind readnone 
-declare <8 x i8>  @llvm.x86.mmx.pcmpgt.b(<8 x i8>, <8 x i8>) nounwind readnone 
-declare <4 x i16> @llvm.x86.mmx.pcmpgt.w(<4 x i16>, <4 x i16>) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.pcmpgt.d(<2 x i32>, <2 x i32>) nounwind readnone 
-define void @cmp(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D,
-                 <2 x i32> %E, <2 x i32> %F) {
-  %r1 = call <8 x i8>  @llvm.x86.mmx.pcmpeq.b(<8 x i8>  %A, <8 x i8>  %B)
-  %r2 = call <4 x i16> @llvm.x86.mmx.pcmpeq.w(<4 x i16> %C, <4 x i16> %D)
-  %r3 = call <2 x i32> @llvm.x86.mmx.pcmpeq.d(<2 x i32> %E, <2 x i32> %F)
-  %r4 = call <8 x i8>  @llvm.x86.mmx.pcmpgt.b(<8 x i8>  %A, <8 x i8>  %B)
-  %r5 = call <4 x i16> @llvm.x86.mmx.pcmpgt.w(<4 x i16> %C, <4 x i16> %D)
-  %r6 = call <2 x i32> @llvm.x86.mmx.pcmpgt.d(<2 x i32> %E, <2 x i32> %F)
-  ret void
-}
-
-; Miscellaneous
-declare void      @llvm.x86.mmx.maskmovq(<8 x i8>, <8 x i8>, i32*) nounwind readnone 
-declare i32       @llvm.x86.mmx.pmovmskb(<8 x i8>) nounwind readnone 
-declare void      @llvm.x86.mmx.movnt.dq(i32*, <1 x i64>) nounwind readnone 
-declare <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64>, <1 x i64>,  i8) nounwind readnone 
-declare i32       @llvm.x86.mmx.pextr.w(<1 x i64>, i32) nounwind readnone 
-declare <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64>, i32, i32) nounwind readnone 
-declare <4 x i16> @llvm.x86.ssse3.pshuf.w(<4 x i16>, i32) nounwind readnone 
-define void @misc(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D,
-                  <2 x i32> %E, <2 x i32> %F, <1 x i64> %G, <1 x i64> %H,
-                  i32* %I, i8 %J, i16 %K, i32 %L) {
-        call void      @llvm.x86.mmx.maskmovq(<8 x i8> %A, <8 x i8> %B, i32* %I)
-  %r1 = call i32       @llvm.x86.mmx.pmovmskb(<8 x i8> %A)
-        call void      @llvm.x86.mmx.movnt.dq(i32* %I, <1 x i64> %G)
-  %r2 = call <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64> %G, <1 x i64> %H, i8 %J)
-  %r3 = call i32       @llvm.x86.mmx.pextr.w(<1 x i64> %G, i32 37)
-  %r4 = call <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64> %G, i32 37, i32 927)
-  %r5 = call <4 x i16> @llvm.x86.ssse3.pshuf.w(<4 x i16> %C, i32 37)
-  ret void
-}
diff --git a/test/Bitcode/memcpy.ll b/test/Bitcode/memcpy.ll
deleted file mode 100644
index 299eb1e..0000000
--- a/test/Bitcode/memcpy.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llvm-as %s -o /dev/null
-
-define void @test(i32* %P, i32* %Q) {
-entry:
-        %tmp.1 = bitcast i32* %P to i8*         ; <i8*> [#uses=3]
-        %tmp.3 = bitcast i32* %Q to i8*         ; <i8*> [#uses=4]
-        tail call void @llvm.memcpy.i32( i8* %tmp.1, i8* %tmp.3, i32 100000, i32 1 )
-        tail call void @llvm.memcpy.i64( i8* %tmp.1, i8* %tmp.3, i64 100000, i32 1 )
-        tail call void @llvm.memset.i32( i8* %tmp.3, i8 14, i32 10000, i32 0 )
-        tail call void @llvm.memmove.i32( i8* %tmp.1, i8* %tmp.3, i32 123124, i32 1 )
-        tail call void @llvm.memmove.i64( i8* %tmp.1, i8* %tmp.3, i64 123124, i32 1 )
-        ret void
-}
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
-
-declare void @llvm.memset.i32(i8*, i8, i32, i32)
-
-declare void @llvm.memmove.i32(i8*, i8*, i32, i32)
-
-declare void @llvm.memmove.i64(i8*, i8*, i64, i32)
diff --git a/test/CodeGen/ARM/2007-05-07-jumptoentry.ll b/test/CodeGen/ARM/2007-05-07-jumptoentry.ll
deleted file mode 100644
index 26864f1..0000000
--- a/test/CodeGen/ARM/2007-05-07-jumptoentry.ll
+++ /dev/null
@@ -1,58 +0,0 @@
-; RUN: llc < %s | not grep 1_0
-; This used to create an extra branch to 'entry', LBB1_0.
-
-; ModuleID = 'bug.bc'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
-target triple = "arm-apple-darwin8"
-	%struct.HexxagonMove = type { i8, i8, i32 }
-	%struct.HexxagonMoveList = type { i32, %struct.HexxagonMove* }
-
-define void @_ZN16HexxagonMoveList8sortListEv(%struct.HexxagonMoveList* %this) {
-entry:
-	%tmp51 = getelementptr %struct.HexxagonMoveList* %this, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp2 = getelementptr %struct.HexxagonMoveList* %this, i32 0, i32 1		; <%struct.HexxagonMove**> [#uses=2]
-	br label %bb49
-
-bb1:		; preds = %bb49
-	%tmp3 = load %struct.HexxagonMove** %tmp2		; <%struct.HexxagonMove*> [#uses=5]
-	%tmp6 = getelementptr %struct.HexxagonMove* %tmp3, i32 %i.1, i32 2		; <i32*> [#uses=1]
-	%tmp7 = load i32* %tmp6		; <i32> [#uses=2]
-	%tmp12 = add i32 %i.1, 1		; <i32> [#uses=7]
-	%tmp14 = getelementptr %struct.HexxagonMove* %tmp3, i32 %tmp12, i32 2		; <i32*> [#uses=1]
-	%tmp15 = load i32* %tmp14		; <i32> [#uses=1]
-	%tmp16 = icmp slt i32 %tmp7, %tmp15		; <i1> [#uses=1]
-	br i1 %tmp16, label %cond_true, label %bb49
-
-cond_true:		; preds = %bb1
-	%tmp23.0 = getelementptr %struct.HexxagonMove* %tmp3, i32 %i.1, i32 0		; <i8*> [#uses=2]
-	%tmp67 = load i8* %tmp23.0		; <i8> [#uses=1]
-	%tmp23.1 = getelementptr %struct.HexxagonMove* %tmp3, i32 %i.1, i32 1		; <i8*> [#uses=1]
-	%tmp68 = load i8* %tmp23.1		; <i8> [#uses=1]
-	%tmp3638 = getelementptr %struct.HexxagonMove* %tmp3, i32 %tmp12, i32 0		; <i8*> [#uses=1]
-	tail call void @llvm.memcpy.i32( i8* %tmp23.0, i8* %tmp3638, i32 8, i32 4 )
-	%tmp41 = load %struct.HexxagonMove** %tmp2		; <%struct.HexxagonMove*> [#uses=3]
-	%tmp44.0 = getelementptr %struct.HexxagonMove* %tmp41, i32 %tmp12, i32 0		; <i8*> [#uses=1]
-	store i8 %tmp67, i8* %tmp44.0
-	%tmp44.1 = getelementptr %struct.HexxagonMove* %tmp41, i32 %tmp12, i32 1		; <i8*> [#uses=1]
-	store i8 %tmp68, i8* %tmp44.1
-	%tmp44.2 = getelementptr %struct.HexxagonMove* %tmp41, i32 %tmp12, i32 2		; <i32*> [#uses=1]
-	store i32 %tmp7, i32* %tmp44.2
-	br label %bb49
-
-bb49:		; preds = %bb59, %cond_true, %bb1, %entry
-	%i.1 = phi i32 [ 0, %entry ], [ %tmp12, %bb1 ], [ %tmp12, %cond_true ], [ 0, %bb59 ]		; <i32> [#uses=5]
-	%move.2 = phi i32 [ 0, %entry ], [ 1, %cond_true ], [ %move.2, %bb1 ], [ 0, %bb59 ]		; <i32> [#uses=2]
-	%tmp52 = load i32* %tmp51		; <i32> [#uses=1]
-	%tmp53 = add i32 %tmp52, -1		; <i32> [#uses=1]
-	%tmp55 = icmp sgt i32 %tmp53, %i.1		; <i1> [#uses=1]
-	br i1 %tmp55, label %bb1, label %bb59
-
-bb59:		; preds = %bb49
-	%tmp61 = icmp eq i32 %move.2, 0		; <i1> [#uses=1]
-	br i1 %tmp61, label %return, label %bb49
-
-return:		; preds = %bb59
-	ret void
-}
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
diff --git a/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll b/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll
deleted file mode 100644
index 7ba2a19..0000000
--- a/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll
+++ /dev/null
@@ -1,237 +0,0 @@
-; RUN: llc < %s 
-; PR1424
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "arm-unknown-linux-gnueabi"
-	%struct.AVClass = type { i8*, i8* (i8*)*, %struct.AVOption* }
-	%struct.AVCodec = type { i8*, i32, i32, i32, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32, i8*)*, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32*, i8*, i32)*, i32, %struct.AVCodec*, void (%struct.AVCodecContext*)*, %struct.AVRational*, i32* }
-	%struct.AVCodecContext = type { %struct.AVClass*, i32, i32, i32, i32, i32, i8*, i32, %struct.AVRational, i32, i32, i32, i32, i32, void (%struct.AVCodecContext*, %struct.AVFrame*, i32*, i32, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, float, float, i32, i32, i32, i32, float, i32, i32, i32, %struct.AVCodec*, i8*, i32, i32, void (%struct.AVCodecContext*, i8*, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, [32 x i8], i32, i32, i32, i32, i32, i32, i32, float, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, void (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i8*, i8*, float, float, i32, %struct.RcOverride*, i32, i8*, i32, i32, i32, float, float, float, float, i32, float, float, float, float, float, i32, i32, i32, i32*, i32, i32, i32, i32, %struct.AVRational, %struct.AVFrame*, i32, i32, [4 x i64], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32*)*, i32, i32, i32, i32, i32, i32, i8*, i32, i32, i32, i32, i32, i32, i16*, i16*, i32, i32, i32, i32, %struct.AVPaletteControl*, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32 (%struct.AVCodecContext*, i8*)*, i8**, i32*, i32)*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64 }
-	%struct.AVEvalExpr = type opaque
-	%struct.AVFrame = type { [4 x i8*], [4 x i32], [4 x i8*], i32, i32, i64, i32, i32, i32, i32, i32, i8*, i32, i8*, [2 x [2 x i16]*], i32*, i8, i8*, [4 x i64], i32, i32, i32, i32, i32, %struct.AVPanScan*, i32, i32, i16*, [2 x i8*] }
-	%struct.AVOption = type opaque
-	%struct.AVPaletteControl = type { i32, [256 x i32] }
-	%struct.AVPanScan = type { i32, i32, i32, [3 x [2 x i16]] }
-	%struct.AVRational = type { i32, i32 }
-	%struct.BlockNode = type { i16, i16, i8, [3 x i8], i8, i8 }
-	%struct.DSPContext = type { void (i16*, i8*, i32)*, void (i16*, i8*, i8*, i32)*, void (i16*, i8*, i32)*, void (i16*, i8*, i32)*, void (i16*, i8*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, i32 (i16*)*, void (i8*, i8*, i32, i32, i32, i32, i32)*, void (i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)*, void (i16*)*, i32 (i8*, i32)*, i32 (i8*, i32)*, [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], i32 (i8*, i16*, i32)*, [4 x [4 x void (i8*, i8*, i32, i32)*]], [4 x [4 x void (i8*, i8*, i32, i32)*]], [4 x [4 x void (i8*, i8*, i32, i32)*]], [4 x [4 x void (i8*, i8*, i32, i32)*]], [2 x void (i8*, i8*, i8*, i32, i32)*], [11 x void (i8*, i8*, i32, i32, i32)*], [11 x void (i8*, i8*, i32, i32, i32)*], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], [8 x void (i8*, i8*, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [10 x void (i8*, i32, i32, i32, i32)*], [10 x void (i8*, i8*, i32, i32, i32, i32, i32)*], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i16*, i32)*, [2 x [4 x i32 (i8*, i8*, i8*, i32, i32)*]], void (i8*, i8*, i32)*, void (i8*, i8*, i8*, i32)*, void (i8*, i8*, i8*, i32, i32*, i32*)*, void (i32*, i32*, i32)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32)*, void (i8*, i32, i32, i32)*, void ([4 x [4 x i16]]*, i8*, [40 x i8]*, [40 x [2 x i16]]*, i32, i32, i32, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32)*, void (float*, float*, i32)*, void (float*, float*, i32)*, void (float*, float*, float*, i32)*, void (float*, float*, float*, float*, i32, i32, i32)*, void (i16*, float*, i32)*, void (i16*)*, void (i16*)*, void (i16*)*, void (i8*, i32, i16*)*, void (i8*, i32, i16*)*, [64 x i8], i32, i32 (i16*, i16*, i16*, i32)*, void (i16*, i16*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, void ([4 x i16]*)*, void (i32*, i32*, i32*, i32*, i32*, i32*, i32)*, void (i32*, i32)*, void (i8*, i32, i8**, i32, i32, i32, i32, i32, %struct.slice_buffer*, i32, i8*)*, void (i8*, i32, i32)*, [4 x void (i8*, i32, i8*, i32, i32, i32)*], void (i16*)*, void (i16*, i32)*, void (i16*, i32)*, void (i16*, i32)*, void (i8*, i32)*, void (i8*, i32)*, [16 x void (i8*, i8*, i32, i32)*] }
-	%struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
-	%struct.GetBitContext = type { i8*, i8*, i32*, i32, i32, i32, i32 }
-	%struct.MJpegContext = type opaque
-	%struct.MotionEstContext = type { %struct.AVCodecContext*, i32, [4 x [2 x i32]], [4 x [2 x i32]], i8*, i8*, [2 x i8*], i8*, i32, i32*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [4 x [4 x i8*]], [4 x [4 x i8*]], i32, i32, i32, i32, i32, [4 x void (i8*, i8*, i32, i32)*]*, [4 x void (i8*, i8*, i32, i32)*]*, [16 x void (i8*, i8*, i32)*]*, [16 x void (i8*, i8*, i32)*]*, [4097 x i8]*, i8*, i32 (%struct.MpegEncContext*, i32*, i32*, i32, i32, i32, i32, i32)* }
-	%struct.MpegEncContext = type { %struct.AVCodecContext*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.PutBitContext, i32, i32, i32, i32, i32, i32, i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.Picture*, %struct.Picture**, %struct.Picture**, i32, i32, [8 x %struct.MpegEncContext*], %struct.Picture, %struct.Picture, %struct.Picture, %struct.Picture, %struct.Picture*, %struct.Picture*, %struct.Picture*, [3 x i8*], [3 x i32], i16*, [3 x i16*], [20 x i16], i32, i32, i8*, i8*, i8*, i8*, i8*, [16 x i16]*, [3 x [16 x i16]*], i32, i8*, i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i32, i32, i32, i32, i32*, i32, i32, i32, i32, i32, i32, i32, [5 x i32], i32, i32, i32, i32, %struct.DSPContext, i32, i32, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x [2 x [2 x i16]*]], [2 x [2 x [2 x [2 x i16]*]]], [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x [2 x [2 x i16]*]], [2 x [2 x [2 x [2 x i16]*]]], [2 x i8*], [2 x [2 x i8*]], i32, i32, i32, [2 x [4 x [2 x i32]]], [2 x [2 x i32]], [2 x [2 x [2 x i32]]], i8*, [2 x [64 x i16]], %struct.MotionEstContext, i32, i32, i32, i32, i32, i32, i16*, [6 x i32], [6 x i32], [3 x i8*], i32*, [64 x i16], [64 x i16], [64 x i16], [64 x i16], i32, i32, i32, i32, i32, i8*, i8*, i8*, i8*, i8*, i8*, [8 x i32], [64 x i32]*, [64 x i32]*, [2 x [64 x i16]]*, [2 x [64 x i16]]*, [12 x i32], %struct.ScanTable, %struct.ScanTable, %struct.ScanTable, %struct.ScanTable, [64 x i32]*, [2 x i32], [64 x i16]*, i8*, i64, i64, i32, i32, %struct.RateControlContext, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i32, i32, %struct.GetBitContext, i32, i32, i32, %struct.ParseContext, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i16, i16, i16, i16, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x [2 x i32]], [2 x [2 x i32]], [2 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.PutBitContext, %struct.PutBitContext, i32, i32, i32, i32, i32, i32, i8*, i32, i32, i32, i32, i32, [3 x i32], %struct.MJpegContext*, [3 x i32], [3 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x [65 x [65 x [2 x i32]]]]*, i32, i32, %struct.GetBitContext, i32, i32, i32, i8*, i32, [2 x [2 x i32]], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x i32], i32, i32, i32, i32, i8*, i32, [12 x i16*], [64 x i16]*, [8 x [64 x i16]]*, i32 (%struct.MpegEncContext*, [64 x i16]*)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, i32 (%struct.MpegEncContext*, i16*, i32, i32, i32*)*, i32 (%struct.MpegEncContext*, i16*, i32, i32, i32*)*, void (%struct.MpegEncContext*, i16*)* }
-	%struct.ParseContext = type { i8*, i32, i32, i32, i32, i32, i32, i32 }
-	%struct.Picture = type { [4 x i8*], [4 x i32], [4 x i8*], i32, i32, i64, i32, i32, i32, i32, i32, i8*, i32, i8*, [2 x [2 x i16]*], i32*, i8, i8*, [4 x i64], i32, i32, i32, i32, i32, %struct.AVPanScan*, i32, i32, i16*, [2 x i8*], [3 x i8*], [2 x [2 x i16]*], i32*, [2 x i32], i32, i32, i32, i32, [2 x [16 x i32]], [2 x i32], i32, i32, i16*, i16*, i8*, i32*, i32 }
-	%struct.Plane = type { i32, i32, [8 x [4 x %struct.SubBand]] }
-	%struct.Predictor = type { double, double, double }
-	%struct.PutBitContext = type { i32, i32, i8*, i8*, i8* }
-	%struct.RangeCoder = type { i32, i32, i32, i32, [256 x i8], [256 x i8], i8*, i8*, i8* }
-	%struct.RateControlContext = type { %struct.FILE*, i32, %struct.RateControlEntry*, double, [5 x %struct.Predictor], double, double, double, double, double, [5 x double], i32, i32, [5 x i64], [5 x i64], [5 x i64], [5 x i64], [5 x i32], i32, i8*, float, i32, %struct.AVEvalExpr* }
-	%struct.RateControlEntry = type { i32, float, i32, i32, i32, i32, i32, i64, i32, float, i32, i32, i32, i32, i32, i32 }
-	%struct.RcOverride = type { i32, i32, i32, float }
-	%struct.ScanTable = type { i8*, [64 x i8], [64 x i8] }
-	%struct.SnowContext = type { %struct.AVCodecContext*, %struct.RangeCoder, %struct.DSPContext, %struct.AVFrame, %struct.AVFrame, %struct.AVFrame, [8 x %struct.AVFrame], %struct.AVFrame, [32 x i8], [4224 x i8], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [8 x [2 x i16]*], [8 x i32*], i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [4 x %struct.Plane], %struct.BlockNode*, [1024 x i32], i32, %struct.slice_buffer, %struct.MpegEncContext }
-	%struct.SubBand = type { i32, i32, i32, i32, i32, i32*, i32, i32, i32, %struct.x_and_coeff*, %struct.SubBand*, [519 x [32 x i8]] }
-	%struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }
-	%struct.slice_buffer = type { i32**, i32**, i32, i32, i32, i32, i32* }
-	%struct.x_and_coeff = type { i16, i16 }
-
-define fastcc void @iterative_me(%struct.SnowContext* %s) {
-entry:
-	%state = alloca [4224 x i8], align 8		; <[4224 x i8]*> [#uses=0]
-	%best_rd4233 = alloca i32, align 4		; <i32*> [#uses=0]
-	%tmp21 = getelementptr %struct.SnowContext* %s, i32 0, i32 36		; <i32*> [#uses=2]
-	br label %bb4198
-
-bb79:		; preds = %bb4189.preheader
-	br i1 false, label %cond_next239, label %cond_true
-
-cond_true:		; preds = %bb79
-	ret void
-
-cond_next239:		; preds = %bb79
-	%tmp286 = alloca i8, i32 0		; <i8*> [#uses=0]
-	ret void
-
-bb4198:		; preds = %bb4189.preheader, %entry
-	br i1 false, label %bb4189.preheader, label %bb4204
-
-bb4189.preheader:		; preds = %bb4198
-	br i1 false, label %bb79, label %bb4198
-
-bb4204:		; preds = %bb4198
-	br i1 false, label %bb4221, label %cond_next4213
-
-cond_next4213:		; preds = %bb4204
-	ret void
-
-bb4221:		; preds = %bb4204
-	br i1 false, label %bb5242.preheader, label %UnifiedReturnBlock
-
-bb5242.preheader:		; preds = %bb4221
-	br label %bb5242
-
-bb4231:		; preds = %bb5233
-	%tmp4254.sum = add i32 0, 1		; <i32> [#uses=2]
-	br i1 false, label %bb4559, label %cond_next4622
-
-bb4559:		; preds = %bb4231
-	ret void
-
-cond_next4622:		; preds = %bb4231
-	%tmp4637 = load i16* null		; <i16> [#uses=1]
-	%tmp46374638 = sext i16 %tmp4637 to i32		; <i32> [#uses=1]
-	%tmp4642 = load i16* null		; <i16> [#uses=1]
-	%tmp46424643 = sext i16 %tmp4642 to i32		; <i32> [#uses=1]
-	%tmp4648 = load i16* null		; <i16> [#uses=1]
-	%tmp46484649 = sext i16 %tmp4648 to i32		; <i32> [#uses=1]
-	%tmp4653 = getelementptr %struct.BlockNode* null, i32 %tmp4254.sum, i32 0		; <i16*> [#uses=1]
-	%tmp4654 = load i16* %tmp4653		; <i16> [#uses=1]
-	%tmp46544655 = sext i16 %tmp4654 to i32		; <i32> [#uses=1]
-	%tmp4644 = add i32 %tmp46374638, 2		; <i32> [#uses=1]
-	%tmp4650 = add i32 %tmp4644, %tmp46424643		; <i32> [#uses=1]
-	%tmp4656 = add i32 %tmp4650, %tmp46484649		; <i32> [#uses=1]
-	%tmp4657 = add i32 %tmp4656, %tmp46544655		; <i32> [#uses=2]
-	%tmp4658 = ashr i32 %tmp4657, 2		; <i32> [#uses=1]
-	%tmp4662 = load i16* null		; <i16> [#uses=1]
-	%tmp46624663 = sext i16 %tmp4662 to i32		; <i32> [#uses=1]
-	%tmp4672 = getelementptr %struct.BlockNode* null, i32 0, i32 1		; <i16*> [#uses=1]
-	%tmp4673 = load i16* %tmp4672		; <i16> [#uses=1]
-	%tmp46734674 = sext i16 %tmp4673 to i32		; <i32> [#uses=1]
-	%tmp4678 = getelementptr %struct.BlockNode* null, i32 %tmp4254.sum, i32 1		; <i16*> [#uses=1]
-	%tmp4679 = load i16* %tmp4678		; <i16> [#uses=1]
-	%tmp46794680 = sext i16 %tmp4679 to i32		; <i32> [#uses=1]
-	%tmp4669 = add i32 %tmp46624663, 2		; <i32> [#uses=1]
-	%tmp4675 = add i32 %tmp4669, 0		; <i32> [#uses=1]
-	%tmp4681 = add i32 %tmp4675, %tmp46734674		; <i32> [#uses=1]
-	%tmp4682 = add i32 %tmp4681, %tmp46794680		; <i32> [#uses=2]
-	%tmp4683 = ashr i32 %tmp4682, 2		; <i32> [#uses=1]
-	%tmp4703 = load i32* %tmp21		; <i32> [#uses=1]
-	%tmp4707 = shl i32 %tmp4703, 0		; <i32> [#uses=4]
-	%tmp4710 = load %struct.BlockNode** null		; <%struct.BlockNode*> [#uses=6]
-	%tmp4713 = mul i32 %tmp4707, %mb_y.4		; <i32> [#uses=1]
-	%tmp4715 = add i32 %tmp4713, %mb_x.7		; <i32> [#uses=7]
-	store i8 0, i8* null
-	store i8 0, i8* null
-	%tmp47594761 = bitcast %struct.BlockNode* null to i8*		; <i8*> [#uses=2]
-	call void @llvm.memcpy.i32( i8* null, i8* %tmp47594761, i32 10, i32 0 )
-	%tmp4716.sum5775 = add i32 %tmp4715, 1		; <i32> [#uses=1]
-	%tmp4764 = getelementptr %struct.BlockNode* %tmp4710, i32 %tmp4716.sum5775		; <%struct.BlockNode*> [#uses=1]
-	%tmp47644766 = bitcast %struct.BlockNode* %tmp4764 to i8*		; <i8*> [#uses=1]
-	%tmp4716.sum5774 = add i32 %tmp4715, %tmp4707		; <i32> [#uses=0]
-	%tmp47704772 = bitcast %struct.BlockNode* null to i8*		; <i8*> [#uses=1]
-	%tmp4774 = add i32 %tmp4707, 1		; <i32> [#uses=1]
-	%tmp4716.sum5773 = add i32 %tmp4774, %tmp4715		; <i32> [#uses=1]
-	%tmp4777 = getelementptr %struct.BlockNode* %tmp4710, i32 %tmp4716.sum5773		; <%struct.BlockNode*> [#uses=1]
-	%tmp47774779 = bitcast %struct.BlockNode* %tmp4777 to i8*		; <i8*> [#uses=1]
-	%tmp4781 = icmp slt i32 %mb_x.7, 0		; <i1> [#uses=1]
-	%tmp4788 = or i1 %tmp4781, %tmp4784		; <i1> [#uses=2]
-	br i1 %tmp4788, label %cond_true4791, label %cond_next4794
-
-cond_true4791:		; preds = %cond_next4622
-	unreachable
-
-cond_next4794:		; preds = %cond_next4622
-	%tmp4797 = icmp slt i32 %mb_x.7, %tmp4707		; <i1> [#uses=1]
-	br i1 %tmp4797, label %cond_next4803, label %cond_true4800
-
-cond_true4800:		; preds = %cond_next4794
-	unreachable
-
-cond_next4803:		; preds = %cond_next4794
-	%tmp4825 = ashr i32 %tmp4657, 12		; <i32> [#uses=1]
-	shl i32 %tmp4682, 4		; <i32>:0 [#uses=1]
-	%tmp4828 = and i32 %0, -64		; <i32> [#uses=1]
-	%tmp4831 = getelementptr %struct.BlockNode* %tmp4710, i32 %tmp4715, i32 2		; <i8*> [#uses=0]
-	%tmp4826 = add i32 %tmp4828, %tmp4825		; <i32> [#uses=1]
-	%tmp4829 = add i32 %tmp4826, 0		; <i32> [#uses=1]
-	%tmp4835 = add i32 %tmp4829, 0		; <i32> [#uses=1]
-	store i32 %tmp4835, i32* null
-	%tmp48534854 = trunc i32 %tmp4658 to i16		; <i16> [#uses=1]
-	%tmp4856 = getelementptr %struct.BlockNode* %tmp4710, i32 %tmp4715, i32 0		; <i16*> [#uses=1]
-	store i16 %tmp48534854, i16* %tmp4856
-	%tmp48574858 = trunc i32 %tmp4683 to i16		; <i16> [#uses=1]
-	%tmp4860 = getelementptr %struct.BlockNode* %tmp4710, i32 %tmp4715, i32 1		; <i16*> [#uses=1]
-	store i16 %tmp48574858, i16* %tmp4860
-	%tmp4866 = getelementptr %struct.BlockNode* %tmp4710, i32 %tmp4715, i32 4		; <i8*> [#uses=0]
-	br i1 false, label %bb4933, label %cond_false4906
-
-cond_false4906:		; preds = %cond_next4803
-	call void @llvm.memcpy.i32( i8* %tmp47594761, i8* null, i32 10, i32 0 )
-	call void @llvm.memcpy.i32( i8* %tmp47644766, i8* null, i32 10, i32 0 )
-	call void @llvm.memcpy.i32( i8* %tmp47704772, i8* null, i32 10, i32 0 )
-	call void @llvm.memcpy.i32( i8* %tmp47774779, i8* null, i32 10, i32 0 )
-	br label %bb5215
-
-bb4933:		; preds = %bb5215, %cond_next4803
-	br i1 false, label %cond_true4944, label %bb5215
-
-cond_true4944:		; preds = %bb4933
-	%tmp4982 = load i32* %tmp21		; <i32> [#uses=1]
-	%tmp4986 = shl i32 %tmp4982, 0		; <i32> [#uses=2]
-	%tmp4992 = mul i32 %tmp4986, %mb_y.4		; <i32> [#uses=1]
-	%tmp4994 = add i32 %tmp4992, %mb_x.7		; <i32> [#uses=5]
-	%tmp4995.sum5765 = add i32 %tmp4994, 1		; <i32> [#uses=1]
-	%tmp5043 = getelementptr %struct.BlockNode* null, i32 %tmp4995.sum5765		; <%struct.BlockNode*> [#uses=1]
-	%tmp50435045 = bitcast %struct.BlockNode* %tmp5043 to i8*		; <i8*> [#uses=2]
-	call void @llvm.memcpy.i32( i8* null, i8* %tmp50435045, i32 10, i32 0 )
-	%tmp4995.sum5764 = add i32 %tmp4994, %tmp4986		; <i32> [#uses=1]
-	%tmp5049 = getelementptr %struct.BlockNode* null, i32 %tmp4995.sum5764		; <%struct.BlockNode*> [#uses=1]
-	%tmp50495051 = bitcast %struct.BlockNode* %tmp5049 to i8*		; <i8*> [#uses=2]
-	call void @llvm.memcpy.i32( i8* null, i8* %tmp50495051, i32 10, i32 0 )
-	%tmp4995.sum5763 = add i32 0, %tmp4994		; <i32> [#uses=1]
-	%tmp5056 = getelementptr %struct.BlockNode* null, i32 %tmp4995.sum5763		; <%struct.BlockNode*> [#uses=1]
-	%tmp50565058 = bitcast %struct.BlockNode* %tmp5056 to i8*		; <i8*> [#uses=1]
-	br i1 %tmp4788, label %cond_true5070, label %cond_next5073
-
-cond_true5070:		; preds = %cond_true4944
-	unreachable
-
-cond_next5073:		; preds = %cond_true4944
-	%tmp5139 = getelementptr %struct.BlockNode* null, i32 %tmp4994, i32 1		; <i16*> [#uses=0]
-	%tmp5145 = getelementptr %struct.BlockNode* null, i32 %tmp4994, i32 4		; <i8*> [#uses=0]
-	call void @llvm.memcpy.i32( i8* %tmp50435045, i8* null, i32 10, i32 0 )
-	call void @llvm.memcpy.i32( i8* %tmp50495051, i8* null, i32 10, i32 0 )
-	call void @llvm.memcpy.i32( i8* %tmp50565058, i8* null, i32 10, i32 0 )
-	br label %bb5215
-
-bb5215:		; preds = %cond_next5073, %bb4933, %cond_false4906
-	%i4232.3 = phi i32 [ 0, %cond_false4906 ], [ 0, %cond_next5073 ], [ 0, %bb4933 ]		; <i32> [#uses=1]
-	%tmp5217 = icmp slt i32 %i4232.3, 4		; <i1> [#uses=1]
-	br i1 %tmp5217, label %bb4933, label %bb5220
-
-bb5220:		; preds = %bb5215
-	br i1 false, label %bb5230, label %cond_true5226
-
-cond_true5226:		; preds = %bb5220
-	ret void
-
-bb5230:		; preds = %bb5220
-	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
-	br label %bb5233
-
-bb5233:		; preds = %bb5233.preheader, %bb5230
-	%indvar = phi i32 [ 0, %bb5233.preheader ], [ %indvar.next, %bb5230 ]		; <i32> [#uses=2]
-	%mb_x.7 = shl i32 %indvar, 1		; <i32> [#uses=4]
-	br i1 false, label %bb4231, label %bb5239
-
-bb5239:		; preds = %bb5233
-	%indvar.next37882 = add i32 %indvar37881, 1		; <i32> [#uses=1]
-	br label %bb5242
-
-bb5242:		; preds = %bb5239, %bb5242.preheader
-	%indvar37881 = phi i32 [ 0, %bb5242.preheader ], [ %indvar.next37882, %bb5239 ]		; <i32> [#uses=2]
-	%mb_y.4 = shl i32 %indvar37881, 1		; <i32> [#uses=3]
-	br i1 false, label %bb5233.preheader, label %bb5248
-
-bb5233.preheader:		; preds = %bb5242
-	%tmp4784 = icmp slt i32 %mb_y.4, 0		; <i1> [#uses=1]
-	br label %bb5233
-
-bb5248:		; preds = %bb5242
-	ret void
-
-UnifiedReturnBlock:		; preds = %bb4221
-	ret void
-}
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
diff --git a/test/CodeGen/ARM/2009-03-07-SpillerBug.ll b/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
index 0ec17ae..377bbd2 100644
--- a/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
+++ b/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
@@ -1,16 +1,15 @@
 ; RUN: llc < %s -mtriple=armv6-apple-darwin9 -mattr=+vfp2
 ; rdar://6653182
 
-	%struct.ggBRDF = type { i32 (...)** }
-	%struct.ggPoint2 = type { [2 x double] }
-	%struct.ggPoint3 = type { [3 x double] }
-	%struct.ggSpectrum = type { [8 x float] }
-	%struct.ggSphere = type { %struct.ggPoint3, double }
-	%struct.mrDiffuseAreaSphereLuminaire = type { %struct.mrSphere, %struct.ggSpectrum }
-	%struct.mrDiffuseCosineSphereLuminaire = type { %struct.mrDiffuseAreaSphereLuminaire }
-	%struct.mrSphere = type { %struct.ggBRDF, %struct.ggSphere }
 
-declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
+%struct.ggBRDF = type { i32 (...)** }
+%struct.ggPoint2 = type { [2 x double] }
+%struct.ggPoint3 = type { [3 x double] }
+%struct.ggSpectrum = type { [8 x float] }
+%struct.ggSphere = type { %struct.ggPoint3, double }
+%struct.mrDiffuseAreaSphereLuminaire = type { %struct.mrSphere, %struct.ggSpectrum }
+%struct.mrDiffuseCosineSphereLuminaire = type { %struct.mrDiffuseAreaSphereLuminaire }
+%struct.mrSphere = type { %struct.ggBRDF, %struct.ggSphere }
 
 declare double @llvm.sqrt.f64(double) nounwind readonly
 
@@ -20,59 +19,61 @@ declare double @acos(double) nounwind readonly
 
 define i32 @_ZNK34mrDiffuseSolidAngleSphereLuminaire18selectVisiblePointERK8ggPoint3RK9ggVector3RK8ggPoint2dRS0_Rd(%struct.mrDiffuseCosineSphereLuminaire* nocapture %this, %struct.ggPoint3* nocapture %x, %struct.ggPoint3* nocapture %unnamed_arg, %struct.ggPoint2* nocapture %uv, double %unnamed_arg2, %struct.ggPoint3* nocapture %on_light, double* nocapture %invProb) nounwind {
 entry:
-	%0 = call double @llvm.sqrt.f64(double 0.000000e+00) nounwind		; <double> [#uses=4]
-	%1 = fcmp ult double 0.000000e+00, %0		; <i1> [#uses=1]
-	br i1 %1, label %bb3, label %bb7
+  %0 = call double @llvm.sqrt.f64(double 0.000000e+00) nounwind
+  %1 = fcmp ult double 0.000000e+00, %0
+  br i1 %1, label %bb3, label %bb7
 
-bb3:		; preds = %entry
-	%2 = fdiv double 1.000000e+00, 0.000000e+00		; <double> [#uses=1]
-	%3 = fmul double 0.000000e+00, %2		; <double> [#uses=2]
-	%4 = call double @llvm.sqrt.f64(double 0.000000e+00) nounwind		; <double> [#uses=1]
-	%5 = fdiv double 1.000000e+00, %4		; <double> [#uses=2]
-	%6 = fmul double %3, %5		; <double> [#uses=2]
-	%7 = fmul double 0.000000e+00, %5		; <double> [#uses=2]
-	%8 = fmul double %3, %7		; <double> [#uses=1]
-	%9 = fsub double %8, 0.000000e+00		; <double> [#uses=1]
-	%10 = fmul double 0.000000e+00, %6		; <double> [#uses=1]
-	%11 = fsub double 0.000000e+00, %10		; <double> [#uses=1]
-	%12 = fsub double -0.000000e+00, %11		; <double> [#uses=1]
-	%13 = fmul double %0, %0		; <double> [#uses=2]
-	%14 = fsub double %13, 0.000000e+00		; <double> [#uses=1]
-	%15 = call double @llvm.sqrt.f64(double %14)		; <double> [#uses=1]
-	%16 = fmul double 0.000000e+00, %15		; <double> [#uses=1]
-	%17 = fdiv double %16, %0		; <double> [#uses=1]
-	%18 = fadd double 0.000000e+00, %17		; <double> [#uses=1]
-	%19 = call double @acos(double %18) nounwind readonly		; <double> [#uses=1]
-	%20 = load double* null, align 4		; <double> [#uses=1]
-	%21 = fmul double %20, 0x401921FB54442D18		; <double> [#uses=1]
-	%22 = call double @sin(double %19) nounwind readonly		; <double> [#uses=2]
-	%23 = fmul double %22, 0.000000e+00		; <double> [#uses=2]
-	%24 = fmul double %6, %23		; <double> [#uses=1]
-	%25 = fmul double %7, %23		; <double> [#uses=1]
-	%26 = call double @sin(double %21) nounwind readonly		; <double> [#uses=1]
-	%27 = fmul double %22, %26		; <double> [#uses=2]
-	%28 = fmul double %9, %27		; <double> [#uses=1]
-	%29 = fmul double %27, %12		; <double> [#uses=1]
-	%30 = fadd double %24, %28		; <double> [#uses=1]
-	%31 = fadd double 0.000000e+00, %29		; <double> [#uses=1]
-	%32 = fadd double %25, 0.000000e+00		; <double> [#uses=1]
-	%33 = fadd double %30, 0.000000e+00		; <double> [#uses=1]
-	%34 = fadd double %31, 0.000000e+00		; <double> [#uses=1]
-	%35 = fadd double %32, 0.000000e+00		; <double> [#uses=1]
-	%36 = bitcast %struct.ggPoint3* %x to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32(i8* null, i8* %36, i32 24, i32 4) nounwind
-	store double %33, double* null, align 8
-	br i1 false, label %_Z20ggRaySphereIntersectRK6ggRay3RK8ggSphereddRd.exit, label %bb5.i.i.i
+bb3:                                              ; preds = %entry
+  %2 = fdiv double 1.000000e+00, 0.000000e+00
+  %3 = fmul double 0.000000e+00, %2
+  %4 = call double @llvm.sqrt.f64(double 0.000000e+00) nounwind
+  %5 = fdiv double 1.000000e+00, %4
+  %6 = fmul double %3, %5
+  %7 = fmul double 0.000000e+00, %5
+  %8 = fmul double %3, %7
+  %9 = fsub double %8, 0.000000e+00
+  %10 = fmul double 0.000000e+00, %6
+  %11 = fsub double 0.000000e+00, %10
+  %12 = fsub double -0.000000e+00, %11
+  %13 = fmul double %0, %0
+  %14 = fsub double %13, 0.000000e+00
+  %15 = call double @llvm.sqrt.f64(double %14)
+  %16 = fmul double 0.000000e+00, %15
+  %17 = fdiv double %16, %0
+  %18 = fadd double 0.000000e+00, %17
+  %19 = call double @acos(double %18) nounwind readonly
+  %20 = load double* null, align 4
+  %21 = fmul double %20, 0x401921FB54442D18
+  %22 = call double @sin(double %19) nounwind readonly
+  %23 = fmul double %22, 0.000000e+00
+  %24 = fmul double %6, %23
+  %25 = fmul double %7, %23
+  %26 = call double @sin(double %21) nounwind readonly
+  %27 = fmul double %22, %26
+  %28 = fmul double %9, %27
+  %29 = fmul double %27, %12
+  %30 = fadd double %24, %28
+  %31 = fadd double 0.000000e+00, %29
+  %32 = fadd double %25, 0.000000e+00
+  %33 = fadd double %30, 0.000000e+00
+  %34 = fadd double %31, 0.000000e+00
+  %35 = fadd double %32, 0.000000e+00
+  %36 = bitcast %struct.ggPoint3* %x to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* null, i8* %36, i32 24, i32 4, i1 false)
+  store double %33, double* null, align 8
+  br i1 false, label %_Z20ggRaySphereIntersectRK6ggRay3RK8ggSphereddRd.exit, label %bb5.i.i.i
 
-bb5.i.i.i:		; preds = %bb3
-	unreachable
+bb5.i.i.i:                                        ; preds = %bb3
+  unreachable
 
-_Z20ggRaySphereIntersectRK6ggRay3RK8ggSphereddRd.exit:		; preds = %bb3
-	%37 = fsub double %13, 0.000000e+00		; <double> [#uses=0]
-	%38 = fsub double -0.000000e+00, %34		; <double> [#uses=0]
-	%39 = fsub double -0.000000e+00, %35		; <double> [#uses=0]
-	ret i32 1
+_Z20ggRaySphereIntersectRK6ggRay3RK8ggSphereddRd.exit: ; preds = %bb3
+  %37 = fsub double %13, 0.000000e+00
+  %38 = fsub double -0.000000e+00, %34
+  %39 = fsub double -0.000000e+00, %35
+  ret i32 1
 
-bb7:		; preds = %entry
-	ret i32 0
+bb7:                                              ; preds = %entry
+  ret i32 0
 }
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll
deleted file mode 100644
index 27888d7..0000000
--- a/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll
+++ /dev/null
@@ -1,77 +0,0 @@
-; RUN: llc < %s -mtriple=armv6-apple-darwin
-
-	type { i32, i32, %struct.D_Sym**, [3 x %struct.D_Sym*] }		; type %0
-	type { i32, %struct.D_Reduction** }		; type %1
-	type { i32, %struct.D_RightEpsilonHint* }		; type %2
-	type { i32, %struct.D_ErrorRecoveryHint* }		; type %3
-	type { i32, i32, %struct.D_Reduction**, [3 x %struct.D_Reduction*] }		; type %4
-	%struct.D_ErrorRecoveryHint = type { i16, i16, i8* }
-	%struct.D_ParseNode = type { i32, %struct.d_loc_t, i8*, i8*, %struct.D_Scope*, void (%struct.D_Parser*, %struct.d_loc_t*, i8**)*, i8*, i8* }
-	%struct.D_Parser = type { i8*, void (%struct.D_Parser*, %struct.d_loc_t*, i8**)*, %struct.D_Scope*, void (%struct.D_Parser*)*, %struct.D_ParseNode* (%struct.D_Parser*, i32, %struct.D_ParseNode**)*, void (%struct.D_ParseNode*)*, %struct.d_loc_t, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
-	%struct.D_ParserTables = type { i32, %struct.D_State*, i16*, i32, i32, %struct.D_Symbol*, void (%struct.D_Parser*, %struct.d_loc_t*, i8**)*, i32, %struct.D_Pass*, i32 }
-	%struct.D_Pass = type { i8*, i32, i32, i32 }
-	%struct.D_Reduction = type { i16, i16, i32 (i8*, i8**, i32, i32, %struct.D_Parser*)*, i32 (i8*, i8**, i32, i32, %struct.D_Parser*)*, i16, i16, i32, i32, i32, i32, i32 (i8*, i8**, i32, i32, %struct.D_Parser*)** }
-	%struct.D_RightEpsilonHint = type { i16, i16, %struct.D_Reduction* }
-	%struct.D_Scope = type { i8, %struct.D_Sym*, %struct.D_SymHash*, %struct.D_Sym*, %struct.D_Scope*, %struct.D_Scope*, %struct.D_Scope*, %struct.D_Scope*, %struct.D_Scope* }
-	%struct.D_Shift = type { i16, i8, i8, i32, i32, i32 (i8*, i8**, i32, i32, %struct.D_Parser*)* }
-	%struct.D_State = type { i8*, i32, %1, %2, %3, %struct.D_Shift**, i32 (i8**, i32*, i32*, i16*, i32*, i8*, i32*)*, i8*, i8, i8, i8, i8*, %struct.D_Shift***, i32 }
-	%struct.D_Sym = type { i8*, i32, i32, %struct.D_Sym*, %struct.D_Sym*, i32 }
-	%struct.D_SymHash = type { i32, i32, %0 }
-	%struct.D_Symbol = type { i32, i8*, i32 }
-	%struct.PNode = type { i32, i32, i32, i32, %struct.D_Reduction*, %struct.D_Shift*, i32, %struct.VecPNode, i32, i8, i8, %struct.PNode*, %struct.PNode*, %struct.PNode*, %struct.PNode*, i8*, i8*, %struct.D_Scope*, i8*, %struct.D_ParseNode }
-	%struct.PNodeHash = type { %struct.PNode**, i32, i32, i32, %struct.PNode* }
-	%struct.Parser = type { %struct.D_Parser, i8*, i8*, %struct.D_ParserTables*, i32, i32, i32, i32, i32, i32, i32, %struct.PNodeHash, %struct.SNodeHash, %struct.Reduction*, %struct.Shift*, %struct.D_Scope*, %struct.SNode*, i32, %struct.Reduction*, %struct.Shift*, i32, %struct.PNode*, %struct.SNode*, %struct.ZNode*, %4, %struct.ShiftResult*, %struct.D_Shift, %struct.Parser*, i8* }
-	%struct.Reduction = type { %struct.ZNode*, %struct.SNode*, %struct.D_Reduction*, %struct.SNode*, i32, %struct.Reduction* }
-	%struct.SNode = type { %struct.D_State*, %struct.D_Scope*, i8*, %struct.d_loc_t, i32, %struct.PNode*, %struct.VecZNode, i32, %struct.SNode*, %struct.SNode* }
-	%struct.SNodeHash = type { %struct.SNode**, i32, i32, i32, %struct.SNode*, %struct.SNode* }
-	%struct.Shift = type { %struct.SNode*, %struct.Shift* }
-	%struct.ShiftResult = type { %struct.D_Shift*, %struct.d_loc_t }
-	%struct.VecPNode = type { i32, i32, %struct.PNode**, [3 x %struct.PNode*] }
-	%struct.VecSNode = type { i32, i32, %struct.SNode**, [3 x %struct.SNode*] }
-	%struct.VecZNode = type { i32, i32, %struct.ZNode**, [3 x %struct.ZNode*] }
-	%struct.ZNode = type { %struct.PNode*, %struct.VecSNode }
-	%struct.d_loc_t = type { i8*, i8*, i32, i32, i32 }
-
-declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
-
-define fastcc i32 @exhaustive_parse(%struct.Parser* %p, i32 %state) nounwind {
-entry:
-	store i8* undef, i8** undef, align 4
-	%0 = getelementptr %struct.Parser* %p, i32 0, i32 0, i32 6		; <%struct.d_loc_t*> [#uses=1]
-	%1 = bitcast %struct.d_loc_t* %0 to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32(i8* undef, i8* %1, i32 20, i32 4)
-	br label %bb10
-
-bb10:		; preds = %bb30, %bb29, %bb26, %entry
-	br i1 undef, label %bb18, label %bb20
-
-bb18:		; preds = %bb10
-	br i1 undef, label %bb20, label %bb19
-
-bb19:		; preds = %bb18
-	br label %bb20
-
-bb20:		; preds = %bb19, %bb18, %bb10
-	br i1 undef, label %bb21, label %bb22
-
-bb21:		; preds = %bb20
-	unreachable
-
-bb22:		; preds = %bb20
-	br i1 undef, label %bb24, label %bb26
-
-bb24:		; preds = %bb22
-	unreachable
-
-bb26:		; preds = %bb22
-	br i1 undef, label %bb10, label %bb29
-
-bb29:		; preds = %bb26
-	br i1 undef, label %bb10, label %bb30
-
-bb30:		; preds = %bb29
-	br i1 undef, label %bb31, label %bb10
-
-bb31:		; preds = %bb30
-	unreachable
-}
diff --git a/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll b/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
index 397eba4..8bde748 100644
--- a/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
+++ b/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
@@ -1,32 +1,35 @@
 ; RUN: llc < %s -mtriple=arm-apple-darwin9 -march=arm | FileCheck %s
 
+; CHECK: L_LSDA_0:
+
+
 %struct.A = type { i32* }
 
 define void @"\01-[MyFunction Name:]"() {
 entry:
-  %save_filt.1 = alloca i32                       ; <i32*> [#uses=2]
-  %save_eptr.0 = alloca i8*                       ; <i8**> [#uses=2]
-  %a = alloca %struct.A                           ; <%struct.A*> [#uses=3]
-  %eh_exception = alloca i8*                      ; <i8**> [#uses=5]
-  %eh_selector = alloca i32                       ; <i32*> [#uses=3]
-  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  call  void @_ZN1AC1Ev(%struct.A* %a)
-  invoke  void @_Z3barv()
+  %save_filt.1 = alloca i32
+  %save_eptr.0 = alloca i8*
+  %a = alloca %struct.A
+  %eh_exception = alloca i8*
+  %eh_selector = alloca i32
+  %"alloca point" = bitcast i32 0 to i32
+  call void @_ZN1AC1Ev(%struct.A* %a)
+  invoke void @_Z3barv()
           to label %invcont unwind label %lpad
 
 invcont:                                          ; preds = %entry
-  call  void @_ZN1AD1Ev(%struct.A* %a) nounwind
+  call void @_ZN1AD1Ev(%struct.A* %a) nounwind
   br label %return
 
 bb:                                               ; preds = %ppad
-  %eh_select = load i32* %eh_selector             ; <i32> [#uses=1]
+  %eh_select = load i32* %eh_selector
   store i32 %eh_select, i32* %save_filt.1, align 4
-  %eh_value = load i8** %eh_exception             ; <i8*> [#uses=1]
+  %eh_value = load i8** %eh_exception
   store i8* %eh_value, i8** %save_eptr.0, align 4
-  call  void @_ZN1AD1Ev(%struct.A* %a) nounwind
-  %0 = load i8** %save_eptr.0, align 4            ; <i8*> [#uses=1]
+  call void @_ZN1AD1Ev(%struct.A* %a) nounwind
+  %0 = load i8** %save_eptr.0, align 4
   store i8* %0, i8** %eh_exception, align 4
-  %1 = load i32* %save_filt.1, align 4            ; <i32> [#uses=1]
+  %1 = load i32* %save_filt.1, align 4
   store i32 %1, i32* %eh_selector, align 4
   br label %Unwind
 
@@ -34,10 +37,10 @@ return:                                           ; preds = %invcont
   ret void
 
 lpad:                                             ; preds = %entry
-  %eh_ptr = call i8* @llvm.eh.exception()         ; <i8*> [#uses=1]
+  %eh_ptr = call i8* @llvm.eh.exception()
   store i8* %eh_ptr, i8** %eh_exception
-  %eh_ptr1 = load i8** %eh_exception              ; <i8*> [#uses=1]
-  %eh_select2 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32(i8* %eh_ptr1, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 0) ; <i32> [#uses=1]
+  %eh_ptr1 = load i8** %eh_exception
+  %eh_select2 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr1, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 0)
   store i32 %eh_select2, i32* %eh_selector
   br label %ppad
 
@@ -45,20 +48,20 @@ ppad:                                             ; preds = %lpad
   br label %bb
 
 Unwind:                                           ; preds = %bb
-  %eh_ptr3 = load i8** %eh_exception              ; <i8*> [#uses=1]
-  call  void @_Unwind_SjLj_Resume(i8* %eh_ptr3)
+  %eh_ptr3 = load i8** %eh_exception
+  call void @_Unwind_SjLj_Resume(i8* %eh_ptr3)
   unreachable
 }
 
 define linkonce_odr void @_ZN1AC1Ev(%struct.A* %this) {
 entry:
-  %this_addr = alloca %struct.A*                  ; <%struct.A**> [#uses=2]
-  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %this_addr = alloca %struct.A*
+  %"alloca point" = bitcast i32 0 to i32
   store %struct.A* %this, %struct.A** %this_addr
-  %0 = call  i8* @_Znwm(i32 4)         ; <i8*> [#uses=1]
-  %1 = bitcast i8* %0 to i32*                     ; <i32*> [#uses=1]
-  %2 = load %struct.A** %this_addr, align 4       ; <%struct.A*> [#uses=1]
-  %3 = getelementptr inbounds %struct.A* %2, i32 0, i32 0 ; <i32**> [#uses=1]
+  %0 = call i8* @_Znwm(i32 4)
+  %1 = bitcast i8* %0 to i32*
+  %2 = load %struct.A** %this_addr, align 4
+  %3 = getelementptr inbounds %struct.A* %2, i32 0, i32 0
   store i32* %1, i32** %3, align 4
   br label %return
 
@@ -70,14 +73,14 @@ declare i8* @_Znwm(i32)
 
 define linkonce_odr void @_ZN1AD1Ev(%struct.A* %this) nounwind {
 entry:
-  %this_addr = alloca %struct.A*                  ; <%struct.A**> [#uses=2]
-  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %this_addr = alloca %struct.A*
+  %"alloca point" = bitcast i32 0 to i32
   store %struct.A* %this, %struct.A** %this_addr
-  %0 = load %struct.A** %this_addr, align 4       ; <%struct.A*> [#uses=1]
-  %1 = getelementptr inbounds %struct.A* %0, i32 0, i32 0 ; <i32**> [#uses=1]
-  %2 = load i32** %1, align 4                     ; <i32*> [#uses=1]
-  %3 = bitcast i32* %2 to i8*                     ; <i8*> [#uses=1]
-  call  void @_ZdlPv(i8* %3) nounwind
+  %0 = load %struct.A** %this_addr, align 4
+  %1 = getelementptr inbounds %struct.A* %0, i32 0, i32 0
+  %2 = load i32** %1, align 4
+  %3 = bitcast i32* %2 to i8*
+  call void @_ZdlPv(i8* %3) nounwind
   br label %bb
 
 bb:                                               ; preds = %entry
@@ -86,17 +89,16 @@ bb:                                               ; preds = %entry
 return:                                           ; preds = %bb
   ret void
 }
-;CHECK: L_LSDA_0:
 
 declare void @_ZdlPv(i8*) nounwind
 
 declare void @_Z3barv()
 
-declare i8* @llvm.eh.exception() nounwind
+declare i8* @llvm.eh.exception() nounwind readonly
 
-declare i32 @llvm.eh.selector.i32(i8*, i8*, ...) nounwind
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
 
-declare i32 @llvm.eh.typeid.for.i32(i8*) nounwind
+declare i32 @llvm.eh.typeid.for(i8*) nounwind
 
 declare i32 @__gxx_personality_sj0(...)
 
diff --git a/test/CodeGen/ARM/dyn-stackalloc.ll b/test/CodeGen/ARM/dyn-stackalloc.ll
index 92e2d13..de2820e 100644
--- a/test/CodeGen/ARM/dyn-stackalloc.ll
+++ b/test/CodeGen/ARM/dyn-stackalloc.ll
@@ -1,48 +1,48 @@
 ; RUN: llc < %s -march=arm
 
-	%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
-	%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
+%struct.comment = type { i8**, i32*, i32, i8* }
+%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
+%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
+
+@str215 = external global [2 x i8]
 
 define void @t1(%struct.state* %v) {
-	%tmp6 = load i32* null
-	%tmp8 = alloca float, i32 %tmp6
-	store i32 1, i32* null
-	br i1 false, label %bb123.preheader, label %return
-
-bb123.preheader:
-	br i1 false, label %bb43, label %return
-
-bb43:
-	call fastcc void @f1( float* %tmp8, float* null, i32 0 )
-	%tmp70 = load i32* null
-	%tmp85 = getelementptr float* %tmp8, i32 0
-	call fastcc void @f2( float* null, float* null, float* %tmp85, i32 %tmp70 )
-	ret void
-
-return:
-	ret void
+  %tmp6 = load i32* null
+  %tmp8 = alloca float, i32 %tmp6
+  store i32 1, i32* null
+  br i1 false, label %bb123.preheader, label %return
+
+bb123.preheader:                                  ; preds = %0
+  br i1 false, label %bb43, label %return
+
+bb43:                                             ; preds = %bb123.preheader
+  call fastcc void @f1(float* %tmp8, float* null, i32 0)
+  %tmp70 = load i32* null
+  %tmp85 = getelementptr float* %tmp8, i32 0
+  call fastcc void @f2(float* null, float* null, float* %tmp85, i32 %tmp70)
+  ret void
+
+return:                                           ; preds = %bb123.preheader, %0
+  ret void
 }
 
 declare fastcc void @f1(float*, float*, i32)
 
 declare fastcc void @f2(float*, float*, float*, i32)
 
-	%struct.comment = type { i8**, i32*, i32, i8* }
-@str215 = external global [2 x i8]
-
 define void @t2(%struct.comment* %vc, i8* %tag, i8* %contents) {
-	%tmp1 = call i32 @strlen( i8* %tag )
-	%tmp3 = call i32 @strlen( i8* %contents )
-	%tmp4 = add i32 %tmp1, 2
-	%tmp5 = add i32 %tmp4, %tmp3
-	%tmp6 = alloca i8, i32 %tmp5
-	%tmp9 = call i8* @strcpy( i8* %tmp6, i8* %tag )
-	%tmp6.len = call i32 @strlen( i8* %tmp6 )
-	%tmp6.indexed = getelementptr i8* %tmp6, i32 %tmp6.len
-	call void @llvm.memcpy.i32( i8* %tmp6.indexed, i8* getelementptr ([2 x i8]* @str215, i32 0, i32 0), i32 2, i32 1 )
-	%tmp15 = call i8* @strcat( i8* %tmp6, i8* %contents )
-	call fastcc void @comment_add( %struct.comment* %vc, i8* %tmp6 )
-	ret void
+  %tmp1 = call i32 @strlen(i8* %tag)
+  %tmp3 = call i32 @strlen(i8* %contents)
+  %tmp4 = add i32 %tmp1, 2
+  %tmp5 = add i32 %tmp4, %tmp3
+  %tmp6 = alloca i8, i32 %tmp5
+  %tmp9 = call i8* @strcpy(i8* %tmp6, i8* %tag)
+  %tmp6.len = call i32 @strlen(i8* %tmp6)
+  %tmp6.indexed = getelementptr i8* %tmp6, i32 %tmp6.len
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp6.indexed, i8* getelementptr inbounds ([2 x i8]* @str215, i32 0, i32 0), i32 2, i32 1, i1 false)
+  %tmp15 = call i8* @strcat(i8* %tmp6, i8* %contents)
+  call fastcc void @comment_add(%struct.comment* %vc, i8* %tmp6)
+  ret void
 }
 
 declare i32 @strlen(i8*)
@@ -51,6 +51,6 @@ declare i8* @strcat(i8*, i8*)
 
 declare fastcc void @comment_add(%struct.comment*, i8*)
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
 declare i8* @strcpy(i8*, i8*)
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll
index 5bae037..30b9f59 100644
--- a/test/CodeGen/ARM/memcpy-inline.ll
+++ b/test/CodeGen/ARM/memcpy-inline.ll
@@ -6,13 +6,14 @@
 ; CHECK: ldrb
 
 %struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
+
 @src = external global %struct.x
 @dst = external global %struct.x
 
 define i32 @t() {
 entry:
-	call void @llvm.memcpy.i32( i8* getelementptr (%struct.x* @dst, i32 0, i32 0), i8* getelementptr (%struct.x* @src, i32 0, i32 0), i32 11, i32 8 )
-	ret i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%struct.x* @dst, i32 0, i32 0), i8* getelementptr inbounds (%struct.x* @src, i32 0, i32 0), i32 11, i32 8, i1 false)
+  ret i32 0
 }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/memfunc.ll b/test/CodeGen/ARM/memfunc.ll
index 757364b..aeda022 100644
--- a/test/CodeGen/ARM/memfunc.ll
+++ b/test/CodeGen/ARM/memfunc.ll
@@ -9,24 +9,21 @@ entry:
 
         ; CHECK: memmove
         ; EABI: __aeabi_memmove
-        call void @llvm.memmove.i32( i8* bitcast ([500 x i32]* @from to i8*), i8* bitcast ([500 x i32]* @to to i8*), i32 500, i32 0 )
+        call void @llvm.memmove.p0i8.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8* bitcast ([500 x i32]* @to to i8*), i32 500, i32 0, i1 false)
 
         ; CHECK: memcpy
         ; EABI: __aeabi_memcpy
-        call void @llvm.memcpy.i32( i8* bitcast ([500 x i32]* @from to i8*), i8* bitcast ([500 x i32]* @to to i8*), i32 500, i32 0 )
+        call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8* bitcast ([500 x i32]* @to to i8*), i32 500, i32 0, i1 false)
 
         ; EABI memset swaps arguments
         ; CHECK: mov r1, #0
         ; CHECK: memset
         ; EABI: mov r2, #0
         ; EABI: __aeabi_memset
-        call void @llvm.memset.i32( i8* bitcast ([500 x i32]* @from to i8*), i8 0, i32 500, i32 0 )
+        call void @llvm.memset.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8 0, i32 500, i32 0, i1 false)
         unreachable
 }
 
-declare void @llvm.memmove.i32(i8*, i8*, i32, i32)
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-declare void @llvm.memset.i32(i8*, i8, i32, i32)
-
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/CodeGen/Generic/2005-07-12-memcpy-i64-length.ll b/test/CodeGen/Generic/2005-07-12-memcpy-i64-length.ll
deleted file mode 100644
index 733202c..0000000
--- a/test/CodeGen/Generic/2005-07-12-memcpy-i64-length.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s
-; Test that llvm.memcpy works with a i64 length operand on all targets.
-
-declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
-
-define void @l12_l94_bc_divide_endif_2E_3_2E_ce() {
-newFuncRoot:
-        tail call void @llvm.memcpy.i64( i8* null, i8* null, i64 0, i32 1 )
-        unreachable
-}
-
diff --git a/test/CodeGen/Generic/2007-11-21-UndeadIllegalNode.ll b/test/CodeGen/Generic/2007-11-21-UndeadIllegalNode.ll
deleted file mode 100644
index 3090857..0000000
--- a/test/CodeGen/Generic/2007-11-21-UndeadIllegalNode.ll
+++ /dev/null
@@ -1,157 +0,0 @@
-; RUN: llc < %s -o -
-
-	%struct.RETURN = type { i32, i32 }
-	%struct.ada__finalization__controlled = type { %struct.system__finalization_root__root_controlled }
-	%struct.ada__streams__root_stream_type = type { %struct.ada__tags__dispatch_table* }
-	%struct.ada__strings__unbounded__string_access = type { i8*, %struct.RETURN* }
-	%struct.ada__strings__unbounded__unbounded_string = type { %struct.ada__finalization__controlled, %struct.ada__strings__unbounded__string_access, i32 }
-	%struct.ada__tags__dispatch_table = type { [1 x i32] }
-	%struct.exception = type { i8, i8, i32, i8*, i8*, i32, i8* }
-	%struct.system__finalization_root__root_controlled = type { %struct.ada__streams__root_stream_type, %struct.system__finalization_root__root_controlled*, %struct.system__finalization_root__root_controlled* }
-	%struct.system__standard_library__exception_data = type { i8, i8, i32, i32, %struct.system__standard_library__exception_data*, i32, void ()* }
-@C.495.7639 = internal constant %struct.RETURN { i32 1, i32 16 }		; <%struct.RETURN*> [#uses=1]
-@ada__strings__index_error = external global %struct.exception		; <%struct.exception*> [#uses=1]
-@.str5 = internal constant [16 x i8] c"a-strunb.adb:690"		; <[16 x i8]*> [#uses=1]
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-declare void @ada__strings__unbounded__realloc_for_chunk(%struct.ada__strings__unbounded__unbounded_string*, i32)
-
-declare void @__gnat_raise_exception(%struct.system__standard_library__exception_data*, i64)
-
-define void @ada__strings__unbounded__insert__2(%struct.ada__strings__unbounded__unbounded_string* %source, i32 %before, i64 %new_item.0.0) {
-entry:
-	%tmp24636 = lshr i64 %new_item.0.0, 32		; <i64> [#uses=1]
-	%tmp24637 = trunc i64 %tmp24636 to i32		; <i32> [#uses=1]
-	%tmp24638 = inttoptr i32 %tmp24637 to %struct.RETURN*		; <%struct.RETURN*> [#uses=2]
-	%tmp25 = getelementptr %struct.RETURN* %tmp24638, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp26 = load i32* %tmp25, align 4		; <i32> [#uses=1]
-	%tmp29 = getelementptr %struct.RETURN* %tmp24638, i32 0, i32 1		; <i32*> [#uses=1]
-	%tmp30 = load i32* %tmp29, align 4		; <i32> [#uses=1]
-	%tmp63 = getelementptr %struct.ada__strings__unbounded__unbounded_string* %source, i32 0, i32 1, i32 1		; <%struct.RETURN**> [#uses=5]
-	%tmp64 = load %struct.RETURN** %tmp63, align 4		; <%struct.RETURN*> [#uses=1]
-	%tmp65 = getelementptr %struct.RETURN* %tmp64, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp66 = load i32* %tmp65, align 4		; <i32> [#uses=1]
-	%tmp67 = icmp sgt i32 %tmp66, %before		; <i1> [#uses=1]
-	br i1 %tmp67, label %bb77, label %bb
-
-bb:		; preds = %entry
-	%tmp71 = getelementptr %struct.ada__strings__unbounded__unbounded_string* %source, i32 0, i32 2		; <i32*> [#uses=4]
-	%tmp72 = load i32* %tmp71, align 4		; <i32> [#uses=1]
-	%tmp73 = add i32 %tmp72, 1		; <i32> [#uses=1]
-	%tmp74 = icmp slt i32 %tmp73, %before		; <i1> [#uses=1]
-	br i1 %tmp74, label %bb77, label %bb84
-
-bb77:		; preds = %bb, %entry
-	tail call void @__gnat_raise_exception( %struct.system__standard_library__exception_data* bitcast (%struct.exception* @ada__strings__index_error to %struct.system__standard_library__exception_data*), i64 or (i64 zext (i32 ptrtoint ([16 x i8]* @.str5 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.RETURN* @C.495.7639 to i32) to i64), i64 32)) )
-	unreachable
-
-bb84:		; preds = %bb
-	%tmp93 = sub i32 %tmp30, %tmp26		; <i32> [#uses=2]
-	%tmp9394 = sext i32 %tmp93 to i36		; <i36> [#uses=1]
-	%tmp95 = shl i36 %tmp9394, 3		; <i36> [#uses=1]
-	%tmp96 = add i36 %tmp95, 8		; <i36> [#uses=2]
-	%tmp97 = icmp sgt i36 %tmp96, -1		; <i1> [#uses=1]
-	%tmp100 = select i1 %tmp97, i36 %tmp96, i36 0		; <i36> [#uses=2]
-	%tmp101 = icmp slt i36 %tmp100, 17179869177		; <i1> [#uses=1]
-	%tmp100.cast = trunc i36 %tmp100 to i32		; <i32> [#uses=1]
-	%min102 = select i1 %tmp101, i32 %tmp100.cast, i32 -8		; <i32> [#uses=1]
-	tail call void @ada__strings__unbounded__realloc_for_chunk( %struct.ada__strings__unbounded__unbounded_string* %source, i32 %min102 )
-	%tmp148 = load i32* %tmp71, align 4		; <i32> [#uses=4]
-	%tmp152 = add i32 %tmp93, 1		; <i32> [#uses=2]
-	%tmp153 = icmp sgt i32 %tmp152, -1		; <i1> [#uses=1]
-	%max154 = select i1 %tmp153, i32 %tmp152, i32 0		; <i32> [#uses=5]
-	%tmp155 = add i32 %tmp148, %max154		; <i32> [#uses=5]
-	%tmp315 = getelementptr %struct.ada__strings__unbounded__unbounded_string* %source, i32 0, i32 1, i32 0		; <i8**> [#uses=4]
-	%tmp328 = load %struct.RETURN** %tmp63, align 4		; <%struct.RETURN*> [#uses=1]
-	%tmp329 = getelementptr %struct.RETURN* %tmp328, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp330 = load i32* %tmp329, align 4		; <i32> [#uses=4]
-	%tmp324 = add i32 %max154, %before		; <i32> [#uses=3]
-	%tmp331 = sub i32 %tmp324, %tmp330		; <i32> [#uses=1]
-	%tmp349 = sub i32 %before, %tmp330		; <i32> [#uses=1]
-	%tmp356 = icmp sgt i32 %tmp331, %tmp349		; <i1> [#uses=1]
-	%tmp431 = icmp sgt i32 %tmp324, %tmp155		; <i1> [#uses=2]
-	br i1 %tmp356, label %bb420, label %bb359
-
-bb359:		; preds = %bb84
-	br i1 %tmp431, label %bb481, label %bb382
-
-bb382:		; preds = %bb382, %bb359
-	%indvar = phi i32 [ 0, %bb359 ], [ %indvar.next, %bb382 ]		; <i32> [#uses=2]
-	%max379.pn = phi i32 [ %max154, %bb359 ], [ %L492b.0, %bb382 ]		; <i32> [#uses=1]
-	%before.pn = phi i32 [ %before, %bb359 ], [ 1, %bb382 ]		; <i32> [#uses=1]
-	%L492b.0 = add i32 %before.pn, %max379.pn		; <i32> [#uses=3]
-	%tmp386 = load %struct.RETURN** %tmp63, align 4		; <%struct.RETURN*> [#uses=1]
-	%tmp387 = getelementptr %struct.RETURN* %tmp386, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp388 = load i32* %tmp387, align 4		; <i32> [#uses=2]
-	%tmp392 = load i8** %tmp315, align 4		; <i8*> [#uses=2]
-	%R493b.0 = add i32 %indvar, %before		; <i32> [#uses=1]
-	%tmp405 = sub i32 %R493b.0, %tmp388		; <i32> [#uses=1]
-	%tmp406 = getelementptr i8* %tmp392, i32 %tmp405		; <i8*> [#uses=1]
-	%tmp407 = load i8* %tmp406, align 1		; <i8> [#uses=1]
-	%tmp408 = sub i32 %L492b.0, %tmp388		; <i32> [#uses=1]
-	%tmp409 = getelementptr i8* %tmp392, i32 %tmp408		; <i8*> [#uses=1]
-	store i8 %tmp407, i8* %tmp409, align 1
-	%tmp414 = icmp eq i32 %L492b.0, %tmp155		; <i1> [#uses=1]
-	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
-	br i1 %tmp414, label %bb481, label %bb382
-
-bb420:		; preds = %bb84
-	br i1 %tmp431, label %bb481, label %bb436.preheader
-
-bb436.preheader:		; preds = %bb420
-	%tmp4468 = load i8** %tmp315, align 4		; <i8*> [#uses=2]
-	%tmp4599 = sub i32 %tmp148, %tmp330		; <i32> [#uses=1]
-	%tmp46010 = getelementptr i8* %tmp4468, i32 %tmp4599		; <i8*> [#uses=1]
-	%tmp46111 = load i8* %tmp46010, align 1		; <i8> [#uses=1]
-	%tmp46212 = sub i32 %tmp155, %tmp330		; <i32> [#uses=1]
-	%tmp46313 = getelementptr i8* %tmp4468, i32 %tmp46212		; <i8*> [#uses=1]
-	store i8 %tmp46111, i8* %tmp46313, align 1
-	%exitcond14 = icmp eq i32 %tmp155, %tmp324		; <i1> [#uses=1]
-	br i1 %exitcond14, label %bb481, label %bb.nph
-
-bb.nph:		; preds = %bb436.preheader
-	%tmp5 = sub i32 %tmp148, %before		; <i32> [#uses=1]
-	br label %bb478
-
-bb478:		; preds = %bb478, %bb.nph
-	%indvar6422 = phi i32 [ 0, %bb.nph ], [ %indvar.next643, %bb478 ]		; <i32> [#uses=1]
-	%indvar.next643 = add i32 %indvar6422, 1		; <i32> [#uses=4]
-	%L490b.0 = sub i32 %tmp155, %indvar.next643		; <i32> [#uses=1]
-	%R491b.0 = sub i32 %tmp148, %indvar.next643		; <i32> [#uses=1]
-	%tmp440 = load %struct.RETURN** %tmp63, align 4		; <%struct.RETURN*> [#uses=1]
-	%tmp441 = getelementptr %struct.RETURN* %tmp440, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp442 = load i32* %tmp441, align 4		; <i32> [#uses=2]
-	%tmp446 = load i8** %tmp315, align 4		; <i8*> [#uses=2]
-	%tmp459 = sub i32 %R491b.0, %tmp442		; <i32> [#uses=1]
-	%tmp460 = getelementptr i8* %tmp446, i32 %tmp459		; <i8*> [#uses=1]
-	%tmp461 = load i8* %tmp460, align 1		; <i8> [#uses=1]
-	%tmp462 = sub i32 %L490b.0, %tmp442		; <i32> [#uses=1]
-	%tmp463 = getelementptr i8* %tmp446, i32 %tmp462		; <i8*> [#uses=1]
-	store i8 %tmp461, i8* %tmp463, align 1
-	%exitcond = icmp eq i32 %indvar.next643, %tmp5		; <i1> [#uses=1]
-	br i1 %exitcond, label %bb481, label %bb478
-
-bb481:		; preds = %bb478, %bb436.preheader, %bb420, %bb382, %bb359
-	%tmp577 = add i32 %before, -1		; <i32> [#uses=3]
-	%tmp578 = add i32 %max154, %tmp577		; <i32> [#uses=2]
-	%tmp581 = icmp sge i32 %tmp578, %tmp577		; <i1> [#uses=1]
-	%max582 = select i1 %tmp581, i32 %tmp578, i32 %tmp577		; <i32> [#uses=1]
-	%tmp584 = sub i32 %max582, %before		; <i32> [#uses=1]
-	%tmp585 = add i32 %tmp584, 1		; <i32> [#uses=2]
-	%tmp586 = icmp sgt i32 %tmp585, -1		; <i1> [#uses=1]
-	%max587 = select i1 %tmp586, i32 %tmp585, i32 0		; <i32> [#uses=1]
-	%tmp591 = load %struct.RETURN** %tmp63, align 4		; <%struct.RETURN*> [#uses=1]
-	%tmp592 = getelementptr %struct.RETURN* %tmp591, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp593 = load i32* %tmp592, align 4		; <i32> [#uses=1]
-	%tmp597 = load i8** %tmp315, align 4		; <i8*> [#uses=1]
-	%tmp600621 = trunc i64 %new_item.0.0 to i32		; <i32> [#uses=1]
-	%tmp600622 = inttoptr i32 %tmp600621 to i8*		; <i8*> [#uses=1]
-	%tmp601 = sub i32 %before, %tmp593		; <i32> [#uses=1]
-	%tmp602 = getelementptr i8* %tmp597, i32 %tmp601		; <i8*> [#uses=1]
-	tail call void @llvm.memcpy.i32( i8* %tmp602, i8* %tmp600622, i32 %max587, i32 1 )
-	%tmp606 = load i32* %tmp71, align 4		; <i32> [#uses=1]
-	%tmp613 = add i32 %tmp606, %max154		; <i32> [#uses=1]
-	store i32 %tmp613, i32* %tmp71, align 4
-	ret void
-}
diff --git a/test/CodeGen/Generic/invalid-memcpy.ll b/test/CodeGen/Generic/invalid-memcpy.ll
index 8448565..2dfa28b 100644
--- a/test/CodeGen/Generic/invalid-memcpy.ll
+++ b/test/CodeGen/Generic/invalid-memcpy.ll
@@ -4,16 +4,14 @@
 ; greater than the alignment guaranteed for Qux or C.0.1173), but it
 ; should compile, not crash the code generator.
 
-@C.0.1173 = external constant [33 x i8]         ; <[33 x i8]*> [#uses=1]
+@C.0.1173 = external constant [33 x i8]
 
 define void @Bork() {
 entry:
-        %Qux = alloca [33 x i8]         ; <[33 x i8]*> [#uses=1]
-        %Qux1 = bitcast [33 x i8]* %Qux to i8*          ; <i8*> [#uses=1]
-        call void @llvm.memcpy.i64( i8* %Qux1, i8* getelementptr ([33 x i8]* @C.0.1173, i32 0, i32 0), i64 33, i32 8 )
-        ret void
+  %Qux = alloca [33 x i8]
+  %Qux1 = bitcast [33 x i8]* %Qux to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %Qux1, i8* getelementptr inbounds ([33 x i8]* @C.0.1173, i32 0, i32 0), i64 33, i32 8, i1 false)
+  ret void
 }
 
-declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
-
-
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/CodeGen/PowerPC/2008-10-30-IllegalShift.ll b/test/CodeGen/PowerPC/2008-10-30-IllegalShift.ll
deleted file mode 100644
index 83f3f6f..0000000
--- a/test/CodeGen/PowerPC/2008-10-30-IllegalShift.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s -march=ppc32
-; PR2986
-@argc = external global i32		; <i32*> [#uses=1]
-@buffer = external global [32 x i8], align 4		; <[32 x i8]*> [#uses=1]
-
-define void @test1() nounwind noinline {
-entry:
-	%0 = load i32* @argc, align 4		; <i32> [#uses=1]
-	%1 = trunc i32 %0 to i8		; <i8> [#uses=1]
-	tail call void @llvm.memset.i32(i8* getelementptr ([32 x i8]* @buffer, i32 0, i32 0), i8 %1, i32 17, i32 4)
-	unreachable
-}
-
-declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
diff --git a/test/CodeGen/PowerPC/2009-11-15-ReMatBug.ll b/test/CodeGen/PowerPC/2009-11-15-ReMatBug.ll
deleted file mode 100644
index 54f4b2e..0000000
--- a/test/CodeGen/PowerPC/2009-11-15-ReMatBug.ll
+++ /dev/null
@@ -1,155 +0,0 @@
-; RUN: llc < %s -mtriple=powerpc-apple-darwin8
-
-%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
-%struct.__gcov_var = type { %struct.FILE*, i32, i32, i32, i32, i32, i32, [1025 x i32] }
-%struct.__sFILEX = type opaque
-%struct.__sbuf = type { i8*, i32 }
-%struct.gcov_ctr_info = type { i32, i64*, void (i64*, i32)* }
-%struct.gcov_ctr_summary = type { i32, i32, i64, i64, i64 }
-%struct.gcov_fn_info = type { i32, i32, [0 x i32] }
-%struct.gcov_info = type { i32, %struct.gcov_info*, i32, i8*, i32, %struct.gcov_fn_info*, i32, [0 x %struct.gcov_ctr_info] }
-%struct.gcov_summary = type { i32, [1 x %struct.gcov_ctr_summary] }
-
-@__gcov_var = external global %struct.__gcov_var  ; <%struct.__gcov_var*> [#uses=1]
-@__sF = external global [0 x %struct.FILE]        ; <[0 x %struct.FILE]*> [#uses=1]
-@.str = external constant [56 x i8], align 4      ; <[56 x i8]*> [#uses=1]
-@gcov_list = external global %struct.gcov_info*   ; <%struct.gcov_info**> [#uses=1]
-@.str7 = external constant [35 x i8], align 4     ; <[35 x i8]*> [#uses=1]
-@.str8 = external constant [9 x i8], align 4      ; <[9 x i8]*> [#uses=1]
-@.str9 = external constant [10 x i8], align 4     ; <[10 x i8]*> [#uses=1]
-@.str10 = external constant [36 x i8], align 4    ; <[36 x i8]*> [#uses=1]
-
-declare i32 @"\01_fprintf$LDBL128"(%struct.FILE*, i8*, ...) nounwind
-
-define void @gcov_exit() nounwind {
-entry:
-  %gi_ptr.0357 = load %struct.gcov_info** @gcov_list, align 4 ; <%struct.gcov_info*> [#uses=1]
-  %0 = alloca i8, i32 undef, align 1              ; <i8*> [#uses=3]
-  br i1 undef, label %return, label %bb.nph341
-
-bb.nph341:                                        ; preds = %entry
-  %object27 = bitcast %struct.gcov_summary* undef to i8* ; <i8*> [#uses=1]
-  br label %bb25
-
-bb25:                                             ; preds = %read_fatal, %bb.nph341
-  %gi_ptr.1329 = phi %struct.gcov_info* [ %gi_ptr.0357, %bb.nph341 ], [ undef, %read_fatal ] ; <%struct.gcov_info*> [#uses=1]
-  call void @llvm.memset.i32(i8* %object27, i8 0, i32 36, i32 8)
-  br i1 undef, label %bb49.1, label %bb48
-
-bb48:                                             ; preds = %bb25
-  br label %bb49.1
-
-bb51:                                             ; preds = %bb48.4, %bb49.3
-  switch i32 undef, label %bb58 [
-    i32 0, label %rewrite
-    i32 1734567009, label %bb59
-  ]
-
-bb58:                                             ; preds = %bb51
-  %1 = call i32 (%struct.FILE*, i8*, ...)* @"\01_fprintf$LDBL128"(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE]* @__sF, i32 0, i32 2), i8* getelementptr inbounds ([35 x i8]* @.str7, i32 0, i32 0), i8* %0) nounwind ; <i32> [#uses=0]
-  br label %read_fatal
-
-bb59:                                             ; preds = %bb51
-  br i1 undef, label %bb60, label %bb3.i156
-
-bb3.i156:                                         ; preds = %bb59
-  store i8 52, i8* undef, align 1
-  store i8 42, i8* undef, align 1
-  %2 = call i32 (%struct.FILE*, i8*, ...)* @"\01_fprintf$LDBL128"(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE]* @__sF, i32 0, i32 2), i8* getelementptr inbounds ([56 x i8]* @.str, i32 0, i32 0), i8* %0, i8* undef, i8* undef) nounwind ; <i32> [#uses=0]
-  br label %read_fatal
-
-bb60:                                             ; preds = %bb59
-  br i1 undef, label %bb78.preheader, label %rewrite
-
-bb78.preheader:                                   ; preds = %bb60
-  br i1 undef, label %bb62, label %bb80
-
-bb62:                                             ; preds = %bb78.preheader
-  br i1 undef, label %bb64, label %read_mismatch
-
-bb64:                                             ; preds = %bb62
-  br i1 undef, label %bb65, label %read_mismatch
-
-bb65:                                             ; preds = %bb64
-  br i1 undef, label %bb75, label %read_mismatch
-
-read_mismatch:                                    ; preds = %bb98, %bb119.preheader, %bb72, %bb71, %bb65, %bb64, %bb62
-  %3 = icmp eq i32 undef, -1                      ; <i1> [#uses=1]
-  %iftmp.11.0 = select i1 %3, i8* getelementptr inbounds ([10 x i8]* @.str9, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8]* @.str8, i32 0, i32 0) ; <i8*> [#uses=1]
-  %4 = call i32 (%struct.FILE*, i8*, ...)* @"\01_fprintf$LDBL128"(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE]* @__sF, i32 0, i32 2), i8* getelementptr inbounds ([36 x i8]* @.str10, i32 0, i32 0), i8* %0, i8* %iftmp.11.0) nounwind ; <i32> [#uses=0]
-  br label %read_fatal
-
-bb71:                                             ; preds = %bb75
-  %5 = load i32* undef, align 4                   ; <i32> [#uses=1]
-  %6 = getelementptr inbounds %struct.gcov_info* %gi_ptr.1329, i32 0, i32 7, i32 undef, i32 2 ; <void (i64*, i32)**> [#uses=1]
-  %7 = load void (i64*, i32)** %6, align 4        ; <void (i64*, i32)*> [#uses=1]
-  %8 = call i32 @__gcov_read_unsigned() nounwind  ; <i32> [#uses=1]
-  %9 = call i32 @__gcov_read_unsigned() nounwind  ; <i32> [#uses=1]
-  %10 = icmp eq i32 %tmp386, %8                   ; <i1> [#uses=1]
-  br i1 %10, label %bb72, label %read_mismatch
-
-bb72:                                             ; preds = %bb71
-  %11 = icmp eq i32 undef, %9                     ; <i1> [#uses=1]
-  br i1 %11, label %bb73, label %read_mismatch
-
-bb73:                                             ; preds = %bb72
-  call void %7(i64* null, i32 %5) nounwind
-  unreachable
-
-bb74:                                             ; preds = %bb75
-  %12 = add i32 %13, 1                            ; <i32> [#uses=1]
-  br label %bb75
-
-bb75:                                             ; preds = %bb74, %bb65
-  %13 = phi i32 [ %12, %bb74 ], [ 0, %bb65 ]      ; <i32> [#uses=2]
-  %tmp386 = add i32 0, 27328512                   ; <i32> [#uses=1]
-  %14 = shl i32 1, %13                            ; <i32> [#uses=1]
-  %15 = load i32* undef, align 4                  ; <i32> [#uses=1]
-  %16 = and i32 %15, %14                          ; <i32> [#uses=1]
-  %17 = icmp eq i32 %16, 0                        ; <i1> [#uses=1]
-  br i1 %17, label %bb74, label %bb71
-
-bb80:                                             ; preds = %bb78.preheader
-  unreachable
-
-read_fatal:                                       ; preds = %read_mismatch, %bb3.i156, %bb58
-  br i1 undef, label %return, label %bb25
-
-rewrite:                                          ; preds = %bb60, %bb51
-  store i32 -1, i32* getelementptr inbounds (%struct.__gcov_var* @__gcov_var, i32 0, i32 6), align 4
-  br i1 undef, label %bb94, label %bb119.preheader
-
-bb94:                                             ; preds = %rewrite
-  unreachable
-
-bb119.preheader:                                  ; preds = %rewrite
-  br i1 undef, label %read_mismatch, label %bb98
-
-bb98:                                             ; preds = %bb119.preheader
-  br label %read_mismatch
-
-return:                                           ; preds = %read_fatal, %entry
-  ret void
-
-bb49.1:                                           ; preds = %bb48, %bb25
-  br i1 undef, label %bb49.2, label %bb48.2
-
-bb49.2:                                           ; preds = %bb48.2, %bb49.1
-  br i1 undef, label %bb49.3, label %bb48.3
-
-bb48.2:                                           ; preds = %bb49.1
-  br label %bb49.2
-
-bb49.3:                                           ; preds = %bb48.3, %bb49.2
-  br i1 undef, label %bb51, label %bb48.4
-
-bb48.3:                                           ; preds = %bb49.2
-  br label %bb49.3
-
-bb48.4:                                           ; preds = %bb49.3
-  br label %bb51
-}
-
-declare i32 @__gcov_read_unsigned() nounwind
-
-declare void @llvm.memset.i32(i8* nocapture, i8, i32, i32) nounwind
diff --git a/test/CodeGen/PowerPC/atomic-1.ll b/test/CodeGen/PowerPC/atomic-1.ll
index ec4e42d..a2cf170 100644
--- a/test/CodeGen/PowerPC/atomic-1.ll
+++ b/test/CodeGen/PowerPC/atomic-1.ll
@@ -1,21 +1,23 @@
 ; RUN: llc < %s -march=ppc32 | grep lwarx  | count 3
 ; RUN: llc < %s -march=ppc32 | grep stwcx. | count 4
 
-define i32 @exchange_and_add(i32* %mem, i32 %val) nounwind  {
-	%tmp = call i32 @llvm.atomic.load.add.i32( i32* %mem, i32 %val )
-	ret i32 %tmp
+define i32 @exchange_and_add(i32* %mem, i32 %val) nounwind {
+  %tmp = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %mem, i32 %val)
+  ret i32 %tmp
 }
 
-define i32 @exchange_and_cmp(i32* %mem) nounwind  {
-       	%tmp = call i32 @llvm.atomic.cmp.swap.i32( i32* %mem, i32 0, i32 1 )
-	ret i32 %tmp
+define i32 @exchange_and_cmp(i32* %mem) nounwind {
+  %tmp = call i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* %mem, i32 0, i32 1)
+  ret i32 %tmp
 }
 
-define i32 @exchange(i32* %mem, i32 %val) nounwind  {
-	%tmp = call i32 @llvm.atomic.swap.i32( i32* %mem, i32 1 )
-	ret i32 %tmp
+define i32 @exchange(i32* %mem, i32 %val) nounwind {
+  %tmp = call i32 @llvm.atomic.swap.i32.p0i32(i32* %mem, i32 1)
+  ret i32 %tmp
 }
 
-declare i32 @llvm.atomic.load.add.i32(i32*, i32) nounwind 
-declare i32 @llvm.atomic.cmp.swap.i32(i32*, i32, i32) nounwind 
-declare i32 @llvm.atomic.swap.i32(i32*, i32) nounwind 
+declare i32 @llvm.atomic.load.add.i32.p0i32(i32* nocapture, i32) nounwind
+
+declare i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* nocapture, i32, i32) nounwind
+
+declare i32 @llvm.atomic.swap.i32.p0i32(i32* nocapture, i32) nounwind
diff --git a/test/CodeGen/PowerPC/atomic-2.ll b/test/CodeGen/PowerPC/atomic-2.ll
index 6d9daef..0fa2a29 100644
--- a/test/CodeGen/PowerPC/atomic-2.ll
+++ b/test/CodeGen/PowerPC/atomic-2.ll
@@ -1,21 +1,23 @@
 ; RUN: llc < %s -march=ppc64 | grep ldarx  | count 3
 ; RUN: llc < %s -march=ppc64 | grep stdcx. | count 4
 
-define i64 @exchange_and_add(i64* %mem, i64 %val) nounwind  {
-	%tmp = call i64 @llvm.atomic.load.add.i64( i64* %mem, i64 %val )
-	ret i64 %tmp
+define i64 @exchange_and_add(i64* %mem, i64 %val) nounwind {
+  %tmp = call i64 @llvm.atomic.load.add.i64.p0i64(i64* %mem, i64 %val)
+  ret i64 %tmp
 }
 
-define i64 @exchange_and_cmp(i64* %mem) nounwind  {
-       	%tmp = call i64 @llvm.atomic.cmp.swap.i64( i64* %mem, i64 0, i64 1 )
-	ret i64 %tmp
+define i64 @exchange_and_cmp(i64* %mem) nounwind {
+  %tmp = call i64 @llvm.atomic.cmp.swap.i64.p0i64(i64* %mem, i64 0, i64 1)
+  ret i64 %tmp
 }
 
-define i64 @exchange(i64* %mem, i64 %val) nounwind  {
-	%tmp = call i64 @llvm.atomic.swap.i64( i64* %mem, i64 1 )
-	ret i64 %tmp
+define i64 @exchange(i64* %mem, i64 %val) nounwind {
+  %tmp = call i64 @llvm.atomic.swap.i64.p0i64(i64* %mem, i64 1)
+  ret i64 %tmp
 }
 
-declare i64 @llvm.atomic.load.add.i64(i64*, i64) nounwind 
-declare i64 @llvm.atomic.cmp.swap.i64(i64*, i64, i64) nounwind 
-declare i64 @llvm.atomic.swap.i64(i64*, i64) nounwind 
+declare i64 @llvm.atomic.load.add.i64.p0i64(i64* nocapture, i64) nounwind
+
+declare i64 @llvm.atomic.cmp.swap.i64.p0i64(i64* nocapture, i64, i64) nounwind
+
+declare i64 @llvm.atomic.swap.i64.p0i64(i64* nocapture, i64) nounwind
diff --git a/test/CodeGen/PowerPC/invalid-memcpy.ll b/test/CodeGen/PowerPC/invalid-memcpy.ll
deleted file mode 100644
index 3b1f306..0000000
--- a/test/CodeGen/PowerPC/invalid-memcpy.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: llc < %s -march=ppc32
-; RUN: llc < %s -march=ppc64
-
-; This testcase is invalid (the alignment specified for memcpy is 
-; greater than the alignment guaranteed for Qux or C.0.1173, but it
-; should compile, not crash the code generator.
-
-@C.0.1173 = external constant [33 x i8]         ; <[33 x i8]*> [#uses=1]
-
-define void @Bork() {
-entry:
-        %Qux = alloca [33 x i8]         ; <[33 x i8]*> [#uses=1]
-        %Qux1 = bitcast [33 x i8]* %Qux to i8*          ; <i8*> [#uses=1]
-        call void @llvm.memcpy.i64( i8* %Qux1, i8* getelementptr ([33 x i8]* @C.0.1173, i32 0, i32 0), i64 33, i32 8 )
-        ret void
-}
-
-declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
-
-
diff --git a/test/CodeGen/Thumb/dyn-stackalloc.ll b/test/CodeGen/Thumb/dyn-stackalloc.ll
index cd35be6..f3f0834 100644
--- a/test/CodeGen/Thumb/dyn-stackalloc.ll
+++ b/test/CodeGen/Thumb/dyn-stackalloc.ll
@@ -60,7 +60,7 @@ define void @t2(%struct.comment* %vc, i8* %tag, i8* %contents) {
 	%tmp9 = call i8* @strcpy( i8* %tmp6, i8* %tag )
 	%tmp6.len = call i32 @strlen( i8* %tmp6 )
 	%tmp6.indexed = getelementptr i8* %tmp6, i32 %tmp6.len
-	call void @llvm.memcpy.i32( i8* %tmp6.indexed, i8* getelementptr ([2 x i8]* @str215, i32 0, i32 0), i32 2, i32 1 )
+	call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp6.indexed, i8* getelementptr inbounds ([2 x i8]* @str215, i32 0, i32 0), i32 2, i32 1, i1 false)
 	%tmp15 = call i8* @strcat( i8* %tmp6, i8* %contents )
 	call fastcc void @comment_add( %struct.comment* %vc, i8* %tmp6 )
 	ret void
@@ -72,6 +72,6 @@ declare i8* @strcat(i8*, i8*)
 
 declare fastcc void @comment_add(%struct.comment*, i8*)
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
 declare i8* @strcpy(i8*, i8*)
diff --git a/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll b/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll
index bfea003..779e100 100644
--- a/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll
+++ b/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll
@@ -1,4 +1,3 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | not grep fcpys
 ; rdar://7117307
 
@@ -13,7 +12,7 @@ entry:
 	br i1 undef, label %bb, label %bb6.preheader
 
 bb6.preheader:		; preds = %entry
-	call void @llvm.memcpy.i32(i8* undef, i8* undef, i32 12, i32 4)
+        call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* undef, i32 12, i32 4, i1 false)
 	br i1 undef, label %bb15, label %bb13
 
 bb:		; preds = %entry
@@ -31,4 +30,4 @@ bb15:		; preds = %bb13, %bb6.preheader
 	ret void
 }
 
-declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/X86/2004-02-12-Memcpy.ll b/test/CodeGen/X86/2004-02-12-Memcpy.ll
deleted file mode 100644
index f15a1b4..0000000
--- a/test/CodeGen/X86/2004-02-12-Memcpy.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep movs | count 1
-
-@A = global [32 x i32] zeroinitializer
-@B = global [32 x i32] zeroinitializer
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-define void @main() nounwind {
-  ; dword copy
-  call void @llvm.memcpy.i32(i8* bitcast ([32 x i32]* @A to i8*),
-                           i8* bitcast ([32 x i32]* @B to i8*),
-                           i32 128, i32 4 )
-
-  ; word copy
-  call void @llvm.memcpy.i32( i8* bitcast ([32 x i32]* @A to i8*),
-                           i8* bitcast ([32 x i32]* @B to i8*),
-                           i32 128, i32 2 )
-
-  ; byte copy
-  call void @llvm.memcpy.i32( i8* bitcast ([32 x i32]* @A to i8*),
-                           i8* bitcast ([32 x i32]* @B to i8*),
-                            i32 128, i32 1 )
-
-  ret void
-}
diff --git a/test/CodeGen/X86/2006-11-28-Memcpy.ll b/test/CodeGen/X86/2006-11-28-Memcpy.ll
deleted file mode 100644
index 8c1573f..0000000
--- a/test/CodeGen/X86/2006-11-28-Memcpy.ll
+++ /dev/null
@@ -1,34 +0,0 @@
-; PR1022, PR1023
-; RUN: llc < %s -march=x86 | grep -- -573785174 | count 2
-; RUN: llc < %s -march=x86 | grep -E {movl	_?bytes2} | count 1
-
-@fmt = constant [4 x i8] c"%x\0A\00"            ; <[4 x i8]*> [#uses=2]
-@bytes = constant [4 x i8] c"\AA\BB\CC\DD"              ; <[4 x i8]*> [#uses=1]
-@bytes2 = global [4 x i8] c"\AA\BB\CC\DD"               ; <[4 x i8]*> [#uses=1]
-
-define i32 @test1() nounwind {
-        %y = alloca i32         ; <i32*> [#uses=2]
-        %c = bitcast i32* %y to i8*             ; <i8*> [#uses=1]
-        %z = getelementptr [4 x i8]* @bytes, i32 0, i32 0               ; <i8*> [#uses=1]
-        call void @llvm.memcpy.i32( i8* %c, i8* %z, i32 4, i32 1 )
-        %r = load i32* %y               ; <i32> [#uses=1]
-        %t = bitcast [4 x i8]* @fmt to i8*              ; <i8*> [#uses=1]
-        %tmp = call i32 (i8*, ...)* @printf( i8* %t, i32 %r )           ; <i32> [#uses=0]
-        ret i32 0
-}
-
-define void @test2() nounwind {
-        %y = alloca i32         ; <i32*> [#uses=2]
-        %c = bitcast i32* %y to i8*             ; <i8*> [#uses=1]
-        %z = getelementptr [4 x i8]* @bytes2, i32 0, i32 0              ; <i8*> [#uses=1]
-        call void @llvm.memcpy.i32( i8* %c, i8* %z, i32 4, i32 1 )
-        %r = load i32* %y               ; <i32> [#uses=1]
-        %t = bitcast [4 x i8]* @fmt to i8*              ; <i8*> [#uses=1]
-        %tmp = call i32 (i8*, ...)* @printf( i8* %t, i32 %r )           ; <i32> [#uses=0]
-        ret void
-}
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-declare i32 @printf(i8*, ...)
-
diff --git a/test/CodeGen/X86/2007-05-05-Personality.ll b/test/CodeGen/X86/2007-05-05-Personality.ll
index 0f49d2e..d1fc70d 100644
--- a/test/CodeGen/X86/2007-05-05-Personality.ll
+++ b/test/CodeGen/X86/2007-05-05-Personality.ll
@@ -3,35 +3,35 @@
 ; CHECK: .cfi_personality 0, __gnat_eh_personality
 ; CHECK: .cfi_lsda 0, .Lexception0
 
-@error = external global i8		; <i8*> [#uses=2]
+@error = external global i8
 
 define void @_ada_x() {
 entry:
-	invoke void @raise( )
-			to label %eh_then unwind label %unwind
-
-unwind:		; preds = %entry
-	%eh_ptr = tail call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
-	%eh_select = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), i8* @error )		; <i32> [#uses=1]
-	%eh_typeid = tail call i32 @llvm.eh.typeid.for.i32( i8* @error )		; <i32> [#uses=1]
-	%tmp2 = icmp eq i32 %eh_select, %eh_typeid		; <i1> [#uses=1]
-	br i1 %tmp2, label %eh_then, label %Unwind
-
-eh_then:		; preds = %unwind, %entry
-	ret void
-
-Unwind:		; preds = %unwind
-	tail call i32 (...)* @_Unwind_Resume( i8* %eh_ptr )		; <i32>:0 [#uses=0]
-	unreachable
+  invoke void @raise()
+          to label %eh_then unwind label %unwind
+
+unwind:                                           ; preds = %entry
+  %eh_ptr = tail call i8* @llvm.eh.exception()
+  %eh_select = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), i8* @error)
+  %eh_typeid = tail call i32 @llvm.eh.typeid.for(i8* @error)
+  %tmp2 = icmp eq i32 %eh_select, %eh_typeid
+  br i1 %tmp2, label %eh_then, label %Unwind
+
+eh_then:                                          ; preds = %unwind, %entry
+  ret void
+
+Unwind:                                           ; preds = %unwind
+  %0 = tail call i32 (...)* @_Unwind_Resume(i8* %eh_ptr)
+  unreachable
 }
 
 declare void @raise()
 
-declare i8* @llvm.eh.exception()
+declare i8* @llvm.eh.exception() nounwind readonly
 
-declare i32 @llvm.eh.selector.i32(i8*, i8*, ...)
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
 
-declare i32 @llvm.eh.typeid.for.i32(i8*)
+declare i32 @llvm.eh.typeid.for(i8*) nounwind
 
 declare i32 @__gnat_eh_personality(...)
 
diff --git a/test/CodeGen/X86/2007-06-05-LSR-Dominator.ll b/test/CodeGen/X86/2007-06-05-LSR-Dominator.ll
deleted file mode 100644
index 36a97ef..0000000
--- a/test/CodeGen/X86/2007-06-05-LSR-Dominator.ll
+++ /dev/null
@@ -1,129 +0,0 @@
-; PR1495
-; RUN: llc < %s -march=x86
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "i686-pc-linux-gnu"
-	%struct.AVRational = type { i32, i32 }
-	%struct.FFTComplex = type { float, float }
-	%struct.FFTContext = type { i32, i32, i16*, %struct.FFTComplex*, %struct.FFTComplex*, void (%struct.FFTContext*, %struct.FFTComplex*)*, void (%struct.MDCTContext*, float*, float*, float*)* }
-	%struct.MDCTContext = type { i32, i32, float*, float*, %struct.FFTContext }
-	%struct.Minima = type { i32, i32, i32, i32 }
-	%struct.codebook_t = type { i32, i8*, i32*, i32, float, float, i32, i32, i32*, float*, float* }
-	%struct.floor_class_t = type { i32, i32, i32, i32* }
-	%struct.floor_t = type { i32, i32*, i32, %struct.floor_class_t*, i32, i32, i32, %struct.Minima* }
-	%struct.mapping_t = type { i32, i32*, i32*, i32*, i32, i32*, i32* }
-	%struct.residue_t = type { i32, i32, i32, i32, i32, i32, [8 x i8]*, [2 x float]* }
-	%struct.venc_context_t = type { i32, i32, [2 x i32], [2 x %struct.MDCTContext], [2 x float*], i32, float*, float*, float*, float*, float, i32, %struct.codebook_t*, i32, %struct.floor_t*, i32, %struct.residue_t*, i32, %struct.mapping_t*, i32, %struct.AVRational* }
-
-define fastcc i32 @put_main_header(%struct.venc_context_t* %venc, i8** %out) {
-entry:
-	br i1 false, label %bb1820, label %bb288.bb148_crit_edge
-
-bb288.bb148_crit_edge:		; preds = %entry
-	ret i32 0
-
-cond_next1712:		; preds = %bb1820.bb1680_crit_edge
-	ret i32 0
-
-bb1817:		; preds = %bb1820.bb1680_crit_edge
-	br label %bb1820
-
-bb1820:		; preds = %bb1817, %entry
-	%pb.1.50 = phi i32 [ %tmp1693, %bb1817 ], [ 8, %entry ]		; <i32> [#uses=3]
-	br i1 false, label %bb2093, label %bb1820.bb1680_crit_edge
-
-bb1820.bb1680_crit_edge:		; preds = %bb1820
-	%tmp1693 = add i32 %pb.1.50, 8		; <i32> [#uses=2]
-	%tmp1702 = icmp slt i32 %tmp1693, 0		; <i1> [#uses=1]
-	br i1 %tmp1702, label %cond_next1712, label %bb1817
-
-bb2093:		; preds = %bb1820
-	%tmp2102 = add i32 %pb.1.50, 65		; <i32> [#uses=0]
-	%tmp2236 = add i32 %pb.1.50, 72		; <i32> [#uses=1]
-	%tmp2237 = sdiv i32 %tmp2236, 8		; <i32> [#uses=2]
-	br i1 false, label %bb2543, label %bb2536.bb2396_crit_edge
-
-bb2536.bb2396_crit_edge:		; preds = %bb2093
-	ret i32 0
-
-bb2543:		; preds = %bb2093
-	br i1 false, label %cond_next2576, label %bb2690
-
-cond_next2576:		; preds = %bb2543
-	ret i32 0
-
-bb2682:		; preds = %bb2690
-	ret i32 0
-
-bb2690:		; preds = %bb2543
-	br i1 false, label %bb2682, label %bb2698
-
-bb2698:		; preds = %bb2690
-	br i1 false, label %cond_next2726, label %bb2831
-
-cond_next2726:		; preds = %bb2698
-	ret i32 0
-
-bb2831:		; preds = %bb2698
-	br i1 false, label %cond_next2859, label %bb2964
-
-cond_next2859:		; preds = %bb2831
-	br i1 false, label %bb2943, label %cond_true2866
-
-cond_true2866:		; preds = %cond_next2859
-	br i1 false, label %cond_true2874, label %cond_false2897
-
-cond_true2874:		; preds = %cond_true2866
-	ret i32 0
-
-cond_false2897:		; preds = %cond_true2866
-	ret i32 0
-
-bb2943:		; preds = %cond_next2859
-	ret i32 0
-
-bb2964:		; preds = %bb2831
-	br i1 false, label %cond_next2997, label %bb4589
-
-cond_next2997:		; preds = %bb2964
-	ret i32 0
-
-bb3103:		; preds = %bb4589
-	ret i32 0
-
-bb4589:		; preds = %bb2964
-	br i1 false, label %bb3103, label %bb4597
-
-bb4597:		; preds = %bb4589
-	br i1 false, label %cond_next4630, label %bb4744
-
-cond_next4630:		; preds = %bb4597
-	br i1 false, label %bb4744, label %cond_true4724
-
-cond_true4724:		; preds = %cond_next4630
-	br i1 false, label %bb4736, label %bb7531
-
-bb4736:		; preds = %cond_true4724
-	ret i32 0
-
-bb4744:		; preds = %cond_next4630, %bb4597
-	ret i32 0
-
-bb7531:		; preds = %cond_true4724
-	%v_addr.023.0.i6 = add i32 %tmp2237, -255		; <i32> [#uses=1]
-	br label %bb.i14
-
-bb.i14:		; preds = %bb.i14, %bb7531
-	%n.021.0.i8 = phi i32 [ 0, %bb7531 ], [ %indvar.next, %bb.i14 ]		; <i32> [#uses=2]
-	%tmp..i9 = mul i32 %n.021.0.i8, -255		; <i32> [#uses=1]
-	%tmp5.i11 = add i32 %v_addr.023.0.i6, %tmp..i9		; <i32> [#uses=1]
-	%tmp10.i12 = icmp ugt i32 %tmp5.i11, 254		; <i1> [#uses=1]
-	%indvar.next = add i32 %n.021.0.i8, 1		; <i32> [#uses=1]
-	br i1 %tmp10.i12, label %bb.i14, label %bb12.loopexit.i18
-
-bb12.loopexit.i18:		; preds = %bb.i14
-	call void @llvm.memcpy.i32( i8* null, i8* null, i32 %tmp2237, i32 1 )
-	ret i32 0
-}
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
diff --git a/test/CodeGen/X86/2008-06-04-MemCpyLoweringBug.ll b/test/CodeGen/X86/2008-06-04-MemCpyLoweringBug.ll
deleted file mode 100644
index 236b7cd..0000000
--- a/test/CodeGen/X86/2008-06-04-MemCpyLoweringBug.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim | grep subl | grep 24
-
-	%struct.argument_t = type { i8*, %struct.argument_t*, i32, %struct.ipc_type_t*, i32, void (...)*, void (...)*, void (...)*, void (...)*, void (...)*, i8*, i8*, i8*, i8*, i8*, i32, i32, i32, %struct.routine*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, i32, i32, i32, i32, i32, i32 }
-	%struct.ipc_type_t = type { i8*, %struct.ipc_type_t*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, i32, i32, i32, i32, %struct.ipc_type_t*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8* }
-	%struct.routine = type opaque
-@"\01LC" = external constant [11 x i8]		; <[11 x i8]*> [#uses=1]
-
-define i8* @InArgMsgField(%struct.argument_t* %arg, i8* %str) nounwind  {
-entry:
-	%who = alloca [20 x i8]		; <[20 x i8]*> [#uses=1]
-	%who1 = getelementptr [20 x i8]* %who, i32 0, i32 0		; <i8*> [#uses=2]
-	call void @llvm.memset.i32( i8* %who1, i8 0, i32 20, i32 1 )
-	call void @llvm.memcpy.i32( i8* %who1, i8* getelementptr ([11 x i8]* @"\01LC", i32 0, i32 0), i32 11, i32 1 )
-	unreachable
-}
-
-declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind 
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
diff --git a/test/CodeGen/X86/2008-10-27-StackRealignment.ll b/test/CodeGen/X86/2008-10-27-StackRealignment.ll
index 3d0766c..a57f716 100644
--- a/test/CodeGen/X86/2008-10-27-StackRealignment.ll
+++ b/test/CodeGen/X86/2008-10-27-StackRealignment.ll
@@ -11,12 +11,12 @@ target triple = "i386-pc-linux-gnu"
   
 define void @foo(i32 %t) nounwind {
   %tmp1210 = alloca i8, i32 32, align 4
-  call void @llvm.memset.i64(i8* %tmp1210, i8 0, i64 32, i32 4)
-  
+  call void @llvm.memset.p0i8.i64(i8* %tmp1210, i8 0, i64 32, i32 4, i1 false)
   %x = alloca i8, i32 %t
   call void @dummy(i8* %x)
   ret void
 }
 
-declare void @dummy(i8* %x)
-declare void @llvm.memset.i64(i8*, i8, i64, i32) nounwind
+declare void @dummy(i8*)
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/X86/2009-01-25-NoSSE.ll b/test/CodeGen/X86/2009-01-25-NoSSE.ll
index 0583ef1..8406c4a 100644
--- a/test/CodeGen/X86/2009-01-25-NoSSE.ll
+++ b/test/CodeGen/X86/2009-01-25-NoSSE.ll
@@ -3,18 +3,18 @@
 target datalayout =
 "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
-        %struct.ktermios = type { i32, i32, i32, i32, i8, [19 x i8], i32, i32 }
+
+%struct.ktermios = type { i32, i32, i32, i32, i8, [19 x i8], i32, i32 }
 
 define void @foo() nounwind {
 entry:
-        %termios = alloca %struct.ktermios, align 8
-        %termios1 = bitcast %struct.ktermios* %termios to i8*
-        call void @llvm.memset.i64(i8* %termios1, i8 0, i64 44, i32 8)
-        call void @bar(%struct.ktermios* %termios) nounwind
-        ret void
+  %termios = alloca %struct.ktermios, align 8
+  %termios1 = bitcast %struct.ktermios* %termios to i8*
+  call void @llvm.memset.p0i8.i64(i8* %termios1, i8 0, i64 44, i32 8, i1 false)
+  call void @bar(%struct.ktermios* %termios) nounwind
+  ret void
 }
 
-declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
-
 declare void @bar(%struct.ktermios*)
 
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll b/test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll
deleted file mode 100644
index 0a2fcdb..0000000
--- a/test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll
+++ /dev/null
@@ -1,165 +0,0 @@
-; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | grep cmpxchgl | not grep eax
-; PR4076
-
-	type { i8, i8, i8 }		; type %0
-	type { i32, i8** }		; type %1
-	type { %3* }		; type %2
-	type { %4 }		; type %3
-	type { %5 }		; type %4
-	type { %6, i32, %7 }		; type %5
-	type { i8* }		; type %6
-	type { i32, [12 x i8] }		; type %7
-	type { %9 }		; type %8
-	type { %10, %11*, i8 }		; type %9
-	type { %11* }		; type %10
-	type { i32, %6, i8*, %12, %13*, i8, i32, %28, %29, i32, %30, i32, i32, i32, i8*, i8*, i8, i8 }		; type %11
-	type { %13* }		; type %12
-	type { %14, i32, %13*, %21 }		; type %13
-	type { %15, %16 }		; type %14
-	type { i32 (...)** }		; type %15
-	type { %17, i8* (i32)*, void (i8*)*, i8 }		; type %16
-	type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %18 }		; type %17
-	type { %19* }		; type %18
-	type { i32, %20**, i32, %20**, i8** }		; type %19
-	type { i32 (...)**, i32 }		; type %20
-	type { %22, %25*, i8, i8, %17*, %26*, %27*, %27* }		; type %21
-	type { i32 (...)**, i32, i32, i32, i32, i32, %23*, %24, [8 x %24], i32, %24*, %18 }		; type %22
-	type { %23*, void (i32, %22*, i32)*, i32, i32 }		; type %23
-	type { i8*, i32 }		; type %24
-	type { i32 (...)**, %21 }		; type %25
-	type { %20, i32*, i8, i32*, i32*, i16*, i8, [256 x i8], [256 x i8], i8 }		; type %26
-	type { %20 }		; type %27
-	type { void (%9*)*, i32 }		; type %28
-	type { %15* }		; type %29
-	type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8* }		; type %30
-@AtomicOps_Internalx86CPUFeatures = external global %0		; <%0*> [#uses=1]
-internal constant [19 x i8] c"xxxxxxxxxxxxxxxxxx\00"		; <[19 x i8]*>:0 [#uses=1]
-internal constant [47 x i8] c"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\00"		; <[47 x i8]*>:1 [#uses=1]
-
-define i8** @func6(i8 zeroext, i32, i32, %1*) nounwind {
-; <label>:4
-	%5 = alloca i32, align 4		; <i32*> [#uses=2]
-	%6 = alloca i32, align 4		; <i32*> [#uses=2]
-	%7 = alloca %2, align 8		; <%2*> [#uses=3]
-	%8 = alloca %8, align 8		; <%8*> [#uses=2]
-	br label %17
-
-; <label>:9		; preds = %17
-	%10 = getelementptr %1* %3, i32 %19, i32 0		; <i32*> [#uses=1]
-	%11 = load i32* %10, align 4		; <i32> [#uses=1]
-	%12 = icmp eq i32 %11, %2		; <i1> [#uses=1]
-	br i1 %12, label %13, label %16
-
-; <label>:13		; preds = %9
-	%14 = getelementptr %1* %3, i32 %19, i32 1		; <i8***> [#uses=1]
-	%15 = load i8*** %14, align 4		; <i8**> [#uses=1]
-	ret i8** %15
-
-; <label>:16		; preds = %9
-	%indvar.next13 = add i32 %18, 1		; <i32> [#uses=1]
-	br label %17
-
-; <label>:17		; preds = %16, %4
-	%18 = phi i32 [ 0, %4 ], [ %indvar.next13, %16 ]		; <i32> [#uses=2]
-	%19 = add i32 %18, %1		; <i32> [#uses=3]
-	%20 = icmp sgt i32 %19, 3		; <i1> [#uses=1]
-	br i1 %20, label %21, label %9
-
-; <label>:21		; preds = %17
-	call void @func5()
-	%22 = getelementptr %1* %3, i32 0, i32 0		; <i32*> [#uses=1]
-	%23 = load i32* %22, align 4		; <i32> [#uses=1]
-	%24 = icmp eq i32 %23, 0		; <i1> [#uses=1]
-	br i1 %24, label %._crit_edge, label %._crit_edge1
-
-._crit_edge1:		; preds = %._crit_edge1, %21
-	%25 = phi i32 [ 0, %21 ], [ %26, %._crit_edge1 ]		; <i32> [#uses=1]
-	%26 = add i32 %25, 1		; <i32> [#uses=4]
-	%27 = getelementptr %1* %3, i32 %26, i32 0		; <i32*> [#uses=1]
-	%28 = load i32* %27, align 4		; <i32> [#uses=1]
-	%29 = icmp ne i32 %28, 0		; <i1> [#uses=1]
-	%30 = icmp ne i32 %26, 4		; <i1> [#uses=1]
-	%31 = and i1 %29, %30		; <i1> [#uses=1]
-	br i1 %31, label %._crit_edge1, label %._crit_edge
-
-._crit_edge:		; preds = %._crit_edge1, %21
-	%32 = phi i32 [ 0, %21 ], [ %26, %._crit_edge1 ]		; <i32> [#uses=3]
-	%33 = call i8* @pthread_getspecific(i32 0) nounwind		; <i8*> [#uses=2]
-	%34 = icmp ne i8* %33, null		; <i1> [#uses=1]
-	%35 = icmp eq i8 %0, 0		; <i1> [#uses=1]
-	%36 = or i1 %34, %35		; <i1> [#uses=1]
-	br i1 %36, label %._crit_edge4, label %37
-
-; <label>:37		; preds = %._crit_edge
-	%38 = call i8* @func2(i32 2048)		; <i8*> [#uses=4]
-	call void @llvm.memset.i32(i8* %38, i8 0, i32 2048, i32 4)
-	%39 = call i32 @pthread_setspecific(i32 0, i8* %38) nounwind		; <i32> [#uses=2]
-	store i32 %39, i32* %5
-	store i32 0, i32* %6
-	%40 = icmp eq i32 %39, 0		; <i1> [#uses=1]
-	br i1 %40, label %41, label %43
-
-; <label>:41		; preds = %37
-	%42 = getelementptr %2* %7, i32 0, i32 0		; <%3**> [#uses=1]
-	store %3* null, %3** %42, align 8
-	br label %._crit_edge4
-
-; <label>:43		; preds = %37
-	%44 = call %3* @func1(i32* %5, i32* %6, i8* getelementptr ([47 x i8]* @1, i32 0, i32 0))		; <%3*> [#uses=2]
-	%45 = getelementptr %2* %7, i32 0, i32 0		; <%3**> [#uses=1]
-	store %3* %44, %3** %45, align 8
-	%46 = icmp eq %3* %44, null		; <i1> [#uses=1]
-	br i1 %46, label %._crit_edge4, label %47
-
-; <label>:47		; preds = %43
-	call void @func4(%8* %8, i8* getelementptr ([19 x i8]* @0, i32 0, i32 0), i32 165, %2* %7)
-	call void @func3(%8* %8) noreturn
-	unreachable
-
-._crit_edge4:		; preds = %43, %41, %._crit_edge
-	%48 = phi i8* [ %38, %41 ], [ %33, %._crit_edge ], [ %38, %43 ]		; <i8*> [#uses=2]
-	%49 = bitcast i8* %48 to i8**		; <i8**> [#uses=3]
-	%50 = icmp ne i8* %48, null		; <i1> [#uses=1]
-	%51 = icmp slt i32 %32, 4		; <i1> [#uses=1]
-	%52 = and i1 %50, %51		; <i1> [#uses=1]
-	br i1 %52, label %53, label %._crit_edge6
-
-; <label>:53		; preds = %._crit_edge4
-	%54 = getelementptr %1* %3, i32 %32, i32 0		; <i32*> [#uses=1]
-	%55 = call i32 asm sideeffect "lock; cmpxchgl $1,$2", "={ax},q,*m,0,~{dirflag},~{fpsr},~{flags},~{memory}"(i32 %2, i32* %54, i32 0) nounwind		; <i32> [#uses=1]
-	%56 = load i8* getelementptr (%0* @AtomicOps_Internalx86CPUFeatures, i32 0, i32 0), align 8		; <i8> [#uses=1]
-	%57 = icmp eq i8 %56, 0		; <i1> [#uses=1]
-	br i1 %57, label %._crit_edge7, label %58
-
-; <label>:58		; preds = %53
-	call void asm sideeffect "lfence", "~{dirflag},~{fpsr},~{flags},~{memory}"() nounwind
-	br label %._crit_edge7
-
-._crit_edge7:		; preds = %58, %53
-	%59 = icmp eq i32 %55, 0		; <i1> [#uses=1]
-	br i1 %59, label %60, label %._crit_edge6
-
-._crit_edge6:		; preds = %._crit_edge7, %._crit_edge4
-	ret i8** %49
-
-; <label>:60		; preds = %._crit_edge7
-	%61 = getelementptr %1* %3, i32 %32, i32 1		; <i8***> [#uses=1]
-	store i8** %49, i8*** %61, align 4
-	ret i8** %49
-}
-
-declare %3* @func1(i32* nocapture, i32* nocapture, i8*)
-
-declare void @func5()
-
-declare void @func4(%8*, i8*, i32, %2*)
-
-declare void @func3(%8*) noreturn
-
-declare i8* @pthread_getspecific(i32) nounwind
-
-declare i8* @func2(i32)
-
-declare void @llvm.memset.i32(i8* nocapture, i8, i32, i32) nounwind
-
-declare i32 @pthread_setspecific(i32, i8*) nounwind
diff --git a/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll b/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
index d33f93e..94075e7 100644
--- a/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
+++ b/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
@@ -16,7 +16,7 @@ bb1:
 ; CHECK: LBB0_1:
 ; CHECK: movaps %xmm0, (%rsp)
   %tmp2 = phi i32 [ %tmp3, %bb1 ], [ 0, %entry ]
-  call void @llvm.memcpy.i64(i8* %tmp1, i8* getelementptr inbounds ([28 x i8]* @str, i64 0, i64 0), i64 28, i32 1)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* getelementptr inbounds ([28 x i8]* @str, i64 0, i64 0), i64 28, i32 1, i1 false)
   %tmp3 = add i32 %tmp2, 1
   %tmp4 = icmp eq i32 %tmp3, %count
   br i1 %tmp4, label %bb2, label %bb1
@@ -25,4 +25,4 @@ bb2:
   ret void
 }
 
-declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/CodeGen/X86/darwin-bzero.ll b/test/CodeGen/X86/darwin-bzero.ll
index a9573cf..3099526 100644
--- a/test/CodeGen/X86/darwin-bzero.ll
+++ b/test/CodeGen/X86/darwin-bzero.ll
@@ -1,8 +1,8 @@
 ; RUN: llc < %s -mtriple=i386-apple-darwin10 | grep __bzero
 
-declare void @llvm.memset.i32(i8*, i8, i32, i32)
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
 
 define void @foo(i8* %p, i32 %len) {
-  call void @llvm.memset.i32(i8* %p, i8 0, i32 %len, i32 1)
+  call void @llvm.memset.p0i8.i32(i8* %p, i8 0, i32 %len, i32 1, i1 false)
   ret void
 }
diff --git a/test/CodeGen/X86/memcpy-2.ll b/test/CodeGen/X86/memcpy-2.ll
index 17cd8e8..eae2e70 100644
--- a/test/CodeGen/X86/memcpy-2.ll
+++ b/test/CodeGen/X86/memcpy-2.ll
@@ -38,7 +38,7 @@ entry:
 ; X86-64: movq $0
   %tmp1 = alloca [25 x i8]
   %tmp2 = bitcast [25 x i8]* %tmp1 to i8*
-  call void @llvm.memcpy.i32( i8* %tmp2, i8* getelementptr ([25 x i8]* @.str, i32 0, i32 0), i32 25, i32 1 ) nounwind 
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* getelementptr inbounds ([25 x i8]* @.str, i32 0, i32 0), i32 25, i32 1, i1 false)
   unreachable
 }
 
@@ -72,7 +72,7 @@ entry:
 ; X86-64: movaps %xmm0, (%rdi)
   %tmp2 = bitcast %struct.s0* %a to i8*           ; <i8*> [#uses=1]
   %tmp3 = bitcast %struct.s0* %b to i8*           ; <i8*> [#uses=1]
-  tail call void @llvm.memcpy.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 16)
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 16, i1 false)
   ret void
 }
 
@@ -115,7 +115,7 @@ entry:
 ; X86-64: movq %rax, (%rdi)
   %tmp2 = bitcast %struct.s0* %a to i8*           ; <i8*> [#uses=1]
   %tmp3 = bitcast %struct.s0* %b to i8*           ; <i8*> [#uses=1]
-  tail call void @llvm.memcpy.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 8)
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 8, i1 false)
   ret void
 }
 
@@ -160,8 +160,8 @@ entry:
 ; X86-64: movl $2021161080
   %tmp1 = alloca [30 x i8]
   %tmp2 = bitcast [30 x i8]* %tmp1 to i8*
-  call void @llvm.memcpy.i32(i8* %tmp2, i8* getelementptr inbounds ([30 x i8]* @.str2, i32 0, i32 0), i32 30, i32 1)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* getelementptr inbounds ([30 x i8]* @.str2, i32 0, i32 0), i32 30, i32 1, i1 false)
   unreachable
 }
 
-declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/X86/memmove-4.ll b/test/CodeGen/X86/memmove-4.ll
deleted file mode 100644
index 027db1f..0000000
--- a/test/CodeGen/X86/memmove-4.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s | not grep call
-
-target triple = "i686-pc-linux-gnu"
-
-define void @a(i8* %a, i8* %b) nounwind {
-        %tmp2 = bitcast i8* %a to i8*
-        %tmp3 = bitcast i8* %b to i8*
-        tail call void @llvm.memmove.i32( i8* %tmp2, i8* %tmp3, i32 12, i32 4 )
-        ret void
-}
-
-declare void @llvm.memmove.i32(i8*, i8*, i32, i32)
diff --git a/test/CodeGen/X86/memset-2.ll b/test/CodeGen/X86/memset-2.ll
index 993583b..b2bd72b 100644
--- a/test/CodeGen/X86/memset-2.ll
+++ b/test/CodeGen/X86/memset-2.ll
@@ -6,7 +6,7 @@ define fastcc void @t1() nounwind {
 entry:
 ; CHECK: t1:
 ; CHECK: calll _memset
-  call void @llvm.memset.i32( i8* null, i8 0, i32 188, i32 1 ) nounwind
+  call void @llvm.memset.p0i8.i32(i8* null, i8 0, i32 188, i32 1, i1 false)
   unreachable
 }
 
@@ -14,7 +14,7 @@ define fastcc void @t2(i8 signext %c) nounwind {
 entry:
 ; CHECK: t2:
 ; CHECK: calll _memset
-  call void @llvm.memset.i32( i8* undef, i8 %c, i32 76, i32 1 ) nounwind
+  call void @llvm.memset.p0i8.i32(i8* undef, i8 %c, i32 76, i32 1, i1 false)
   unreachable
 }
 
diff --git a/test/CodeGen/X86/memset-3.ll b/test/CodeGen/X86/memset-3.ll
index 9b20ad5..29febfa 100644
--- a/test/CodeGen/X86/memset-3.ll
+++ b/test/CodeGen/X86/memset-3.ll
@@ -5,8 +5,8 @@ define void @t() nounwind ssp {
 entry:
   %buf = alloca [512 x i8], align 1
   %ptr = getelementptr inbounds [512 x i8]* %buf, i32 0, i32 0
-  call void @llvm.memset.i32(i8* %ptr, i8 undef, i32 512, i32 1)
+  call void @llvm.memset.p0i8.i32(i8* %ptr, i8 undef, i32 512, i32 1, i1 false)
   unreachable
 }
 
-declare void @llvm.memset.i32(i8* nocapture, i8, i32, i32) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/CodeGen/X86/memset.ll b/test/CodeGen/X86/memset.ll
index cf7464d..72b3e0f 100644
--- a/test/CodeGen/X86/memset.ll
+++ b/test/CodeGen/X86/memset.ll
@@ -8,11 +8,11 @@ entry:
 	%up_mvd = alloca [8 x %struct.x]		; <[8 x %struct.x]*> [#uses=2]
 	%up_mvd116 = getelementptr [8 x %struct.x]* %up_mvd, i32 0, i32 0		; <%struct.x*> [#uses=1]
 	%tmp110117 = bitcast [8 x %struct.x]* %up_mvd to i8*		; <i8*> [#uses=1]
-	call void @llvm.memset.i64( i8* %tmp110117, i8 0, i64 32, i32 8 )
+	call void @llvm.memset.p0i8.i64(i8* %tmp110117, i8 0, i64 32, i32 8, i1 false)
 	call void @foo( %struct.x* %up_mvd116 ) nounwind 
 	ret void
 }
 
 declare void @foo(%struct.x*)
 
-declare void @llvm.memset.i64(i8*, i8, i64, i32) nounwind 
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/X86/memset64-on-x86-32.ll b/test/CodeGen/X86/memset64-on-x86-32.ll
index 3f069b4..e20fce1 100644
--- a/test/CodeGen/X86/memset64-on-x86-32.ll
+++ b/test/CodeGen/X86/memset64-on-x86-32.ll
@@ -4,9 +4,8 @@
 
 define void @bork() nounwind {
 entry:
-        call void @llvm.memset.i64( i8* null, i8 0, i64 80, i32 4 )
-        ret void
+  call void @llvm.memset.p0i8.i64(i8* null, i8 0, i64 80, i32 4, i1 false)
+  ret void
 }
 
-declare void @llvm.memset.i64(i8*, i8, i64, i32) nounwind
-
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/X86/optimize-max-0.ll b/test/CodeGen/X86/optimize-max-0.ll
index 162c7a5..981a16a 100644
--- a/test/CodeGen/X86/optimize-max-0.ll
+++ b/test/CodeGen/X86/optimize-max-0.ll
@@ -8,454 +8,454 @@ target triple = "i386-apple-darwin9"
 
 define void @foo(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
 entry:
-	%0 = mul i32 %x, %w		; <i32> [#uses=2]
-	%1 = mul i32 %x, %w		; <i32> [#uses=1]
-	%2 = sdiv i32 %1, 4		; <i32> [#uses=1]
-	%.sum2 = add i32 %2, %0		; <i32> [#uses=2]
-	%cond = icmp eq i32 %d, 1		; <i1> [#uses=1]
-	br i1 %cond, label %bb29, label %bb10.preheader
-
-bb10.preheader:		; preds = %entry
-	%3 = icmp sgt i32 %x, 0		; <i1> [#uses=1]
-	br i1 %3, label %bb.nph9, label %bb18.loopexit
-
-bb.nph7:		; preds = %bb7.preheader
-	%4 = mul i32 %y.08, %w		; <i32> [#uses=1]
-	%5 = mul i32 %y.08, %s		; <i32> [#uses=1]
-	%6 = add i32 %5, 1		; <i32> [#uses=1]
-	%tmp8 = icmp sgt i32 1, %w		; <i1> [#uses=1]
-	%smax9 = select i1 %tmp8, i32 1, i32 %w		; <i32> [#uses=1]
-	br label %bb6
-
-bb6:		; preds = %bb7, %bb.nph7
-	%x.06 = phi i32 [ 0, %bb.nph7 ], [ %indvar.next7, %bb7 ]		; <i32> [#uses=3]
-	%7 = add i32 %x.06, %4		; <i32> [#uses=1]
-	%8 = shl i32 %x.06, 1		; <i32> [#uses=1]
-	%9 = add i32 %6, %8		; <i32> [#uses=1]
-	%10 = getelementptr i8* %r, i32 %9		; <i8*> [#uses=1]
-	%11 = load i8* %10, align 1		; <i8> [#uses=1]
-	%12 = getelementptr i8* %j, i32 %7		; <i8*> [#uses=1]
-	store i8 %11, i8* %12, align 1
-	br label %bb7
-
-bb7:		; preds = %bb6
-	%indvar.next7 = add i32 %x.06, 1		; <i32> [#uses=2]
-	%exitcond10 = icmp ne i32 %indvar.next7, %smax9		; <i1> [#uses=1]
-	br i1 %exitcond10, label %bb6, label %bb7.bb9_crit_edge
-
-bb7.bb9_crit_edge:		; preds = %bb7
-	br label %bb9
-
-bb9:		; preds = %bb7.preheader, %bb7.bb9_crit_edge
-	br label %bb10
-
-bb10:		; preds = %bb9
-	%indvar.next11 = add i32 %y.08, 1		; <i32> [#uses=2]
-	%exitcond12 = icmp ne i32 %indvar.next11, %x		; <i1> [#uses=1]
-	br i1 %exitcond12, label %bb7.preheader, label %bb10.bb18.loopexit_crit_edge
-
-bb10.bb18.loopexit_crit_edge:		; preds = %bb10
-	br label %bb10.bb18.loopexit_crit_edge.split
-
-bb10.bb18.loopexit_crit_edge.split:		; preds = %bb.nph9, %bb10.bb18.loopexit_crit_edge
-	br label %bb18.loopexit
-
-bb.nph9:		; preds = %bb10.preheader
-	%13 = icmp sgt i32 %w, 0		; <i1> [#uses=1]
-	br i1 %13, label %bb.nph9.split, label %bb10.bb18.loopexit_crit_edge.split
-
-bb.nph9.split:		; preds = %bb.nph9
-	br label %bb7.preheader
-
-bb7.preheader:		; preds = %bb.nph9.split, %bb10
-	%y.08 = phi i32 [ 0, %bb.nph9.split ], [ %indvar.next11, %bb10 ]		; <i32> [#uses=3]
-	br i1 true, label %bb.nph7, label %bb9
-
-bb.nph5:		; preds = %bb18.loopexit
-	%14 = sdiv i32 %w, 2		; <i32> [#uses=1]
-	%15 = icmp slt i32 %w, 2		; <i1> [#uses=1]
-	%16 = sdiv i32 %x, 2		; <i32> [#uses=2]
-	br i1 %15, label %bb18.bb20_crit_edge.split, label %bb.nph5.split
-
-bb.nph5.split:		; preds = %bb.nph5
-	%tmp2 = icmp sgt i32 1, %16		; <i1> [#uses=1]
-	%smax3 = select i1 %tmp2, i32 1, i32 %16		; <i32> [#uses=1]
-	br label %bb13
-
-bb13:		; preds = %bb18, %bb.nph5.split
-	%y.14 = phi i32 [ 0, %bb.nph5.split ], [ %indvar.next1, %bb18 ]		; <i32> [#uses=4]
-	%17 = mul i32 %14, %y.14		; <i32> [#uses=2]
-	%18 = shl i32 %y.14, 1		; <i32> [#uses=1]
-	%19 = srem i32 %y.14, 2		; <i32> [#uses=1]
-	%20 = add i32 %19, %18		; <i32> [#uses=1]
-	%21 = mul i32 %20, %s		; <i32> [#uses=2]
-	br i1 true, label %bb.nph3, label %bb17
-
-bb.nph3:		; preds = %bb13
-	%22 = add i32 %17, %0		; <i32> [#uses=1]
-	%23 = add i32 %17, %.sum2		; <i32> [#uses=1]
-	%24 = sdiv i32 %w, 2		; <i32> [#uses=2]
-	%tmp = icmp sgt i32 1, %24		; <i1> [#uses=1]
-	%smax = select i1 %tmp, i32 1, i32 %24		; <i32> [#uses=1]
-	br label %bb14
-
-bb14:		; preds = %bb15, %bb.nph3
-	%x.12 = phi i32 [ 0, %bb.nph3 ], [ %indvar.next, %bb15 ]		; <i32> [#uses=5]
-	%25 = shl i32 %x.12, 2		; <i32> [#uses=1]
-	%26 = add i32 %25, %21		; <i32> [#uses=1]
-	%27 = getelementptr i8* %r, i32 %26		; <i8*> [#uses=1]
-	%28 = load i8* %27, align 1		; <i8> [#uses=1]
-	%.sum = add i32 %22, %x.12		; <i32> [#uses=1]
-	%29 = getelementptr i8* %j, i32 %.sum		; <i8*> [#uses=1]
-	store i8 %28, i8* %29, align 1
-	%30 = shl i32 %x.12, 2		; <i32> [#uses=1]
-	%31 = or i32 %30, 2		; <i32> [#uses=1]
-	%32 = add i32 %31, %21		; <i32> [#uses=1]
-	%33 = getelementptr i8* %r, i32 %32		; <i8*> [#uses=1]
-	%34 = load i8* %33, align 1		; <i8> [#uses=1]
-	%.sum6 = add i32 %23, %x.12		; <i32> [#uses=1]
-	%35 = getelementptr i8* %j, i32 %.sum6		; <i8*> [#uses=1]
-	store i8 %34, i8* %35, align 1
-	br label %bb15
-
-bb15:		; preds = %bb14
-	%indvar.next = add i32 %x.12, 1		; <i32> [#uses=2]
-	%exitcond = icmp ne i32 %indvar.next, %smax		; <i1> [#uses=1]
-	br i1 %exitcond, label %bb14, label %bb15.bb17_crit_edge
-
-bb15.bb17_crit_edge:		; preds = %bb15
-	br label %bb17
-
-bb17:		; preds = %bb15.bb17_crit_edge, %bb13
-	br label %bb18
-
-bb18.loopexit:		; preds = %bb10.bb18.loopexit_crit_edge.split, %bb10.preheader
-	%36 = icmp slt i32 %x, 2		; <i1> [#uses=1]
-	br i1 %36, label %bb20, label %bb.nph5
-
-bb18:		; preds = %bb17
-	%indvar.next1 = add i32 %y.14, 1		; <i32> [#uses=2]
-	%exitcond4 = icmp ne i32 %indvar.next1, %smax3		; <i1> [#uses=1]
-	br i1 %exitcond4, label %bb13, label %bb18.bb20_crit_edge
-
-bb18.bb20_crit_edge:		; preds = %bb18
-	br label %bb18.bb20_crit_edge.split
-
-bb18.bb20_crit_edge.split:		; preds = %bb18.bb20_crit_edge, %bb.nph5
-	br label %bb20
-
-bb20:		; preds = %bb18.bb20_crit_edge.split, %bb18.loopexit
-	switch i32 %d, label %return [
-		i32 3, label %bb22
-		i32 1, label %bb29
-	]
-
-bb22:		; preds = %bb20
-	%37 = mul i32 %x, %w		; <i32> [#uses=1]
-	%38 = sdiv i32 %37, 4		; <i32> [#uses=1]
-	%.sum3 = add i32 %38, %.sum2		; <i32> [#uses=2]
-	%39 = add i32 %x, 15		; <i32> [#uses=1]
-	%40 = and i32 %39, -16		; <i32> [#uses=1]
-	%41 = add i32 %w, 15		; <i32> [#uses=1]
-	%42 = and i32 %41, -16		; <i32> [#uses=1]
-	%43 = mul i32 %40, %s		; <i32> [#uses=1]
-	%44 = icmp sgt i32 %x, 0		; <i1> [#uses=1]
-	br i1 %44, label %bb.nph, label %bb26
-
-bb.nph:		; preds = %bb22
-	br label %bb23
-
-bb23:		; preds = %bb24, %bb.nph
-	%y.21 = phi i32 [ 0, %bb.nph ], [ %indvar.next5, %bb24 ]		; <i32> [#uses=3]
-	%45 = mul i32 %y.21, %42		; <i32> [#uses=1]
-	%.sum1 = add i32 %45, %43		; <i32> [#uses=1]
-	%46 = getelementptr i8* %r, i32 %.sum1		; <i8*> [#uses=1]
-	%47 = mul i32 %y.21, %w		; <i32> [#uses=1]
-	%.sum5 = add i32 %47, %.sum3		; <i32> [#uses=1]
-	%48 = getelementptr i8* %j, i32 %.sum5		; <i8*> [#uses=1]
-	tail call void @llvm.memcpy.i32(i8* %48, i8* %46, i32 %w, i32 1)
-	br label %bb24
-
-bb24:		; preds = %bb23
-	%indvar.next5 = add i32 %y.21, 1		; <i32> [#uses=2]
-	%exitcond6 = icmp ne i32 %indvar.next5, %x		; <i1> [#uses=1]
-	br i1 %exitcond6, label %bb23, label %bb24.bb26_crit_edge
-
-bb24.bb26_crit_edge:		; preds = %bb24
-	br label %bb26
-
-bb26:		; preds = %bb24.bb26_crit_edge, %bb22
-	%49 = mul i32 %x, %w		; <i32> [#uses=1]
-	%.sum4 = add i32 %.sum3, %49		; <i32> [#uses=1]
-	%50 = getelementptr i8* %j, i32 %.sum4		; <i8*> [#uses=1]
-	%51 = mul i32 %x, %w		; <i32> [#uses=1]
-	%52 = sdiv i32 %51, 2		; <i32> [#uses=1]
-	tail call void @llvm.memset.i32(i8* %50, i8 -128, i32 %52, i32 1)
-	ret void
-
-bb29:		; preds = %bb20, %entry
-	%53 = add i32 %w, 15		; <i32> [#uses=1]
-	%54 = and i32 %53, -16		; <i32> [#uses=1]
-	%55 = icmp sgt i32 %x, 0		; <i1> [#uses=1]
-	br i1 %55, label %bb.nph11, label %bb33
-
-bb.nph11:		; preds = %bb29
-	br label %bb30
-
-bb30:		; preds = %bb31, %bb.nph11
-	%y.310 = phi i32 [ 0, %bb.nph11 ], [ %indvar.next13, %bb31 ]		; <i32> [#uses=3]
-	%56 = mul i32 %y.310, %54		; <i32> [#uses=1]
-	%57 = getelementptr i8* %r, i32 %56		; <i8*> [#uses=1]
-	%58 = mul i32 %y.310, %w		; <i32> [#uses=1]
-	%59 = getelementptr i8* %j, i32 %58		; <i8*> [#uses=1]
-	tail call void @llvm.memcpy.i32(i8* %59, i8* %57, i32 %w, i32 1)
-	br label %bb31
-
-bb31:		; preds = %bb30
-	%indvar.next13 = add i32 %y.310, 1		; <i32> [#uses=2]
-	%exitcond14 = icmp ne i32 %indvar.next13, %x		; <i1> [#uses=1]
-	br i1 %exitcond14, label %bb30, label %bb31.bb33_crit_edge
-
-bb31.bb33_crit_edge:		; preds = %bb31
-	br label %bb33
-
-bb33:		; preds = %bb31.bb33_crit_edge, %bb29
-	%60 = mul i32 %x, %w		; <i32> [#uses=1]
-	%61 = getelementptr i8* %j, i32 %60		; <i8*> [#uses=1]
-	%62 = mul i32 %x, %w		; <i32> [#uses=1]
-	%63 = sdiv i32 %62, 2		; <i32> [#uses=1]
-	tail call void @llvm.memset.i32(i8* %61, i8 -128, i32 %63, i32 1)
-	ret void
-
-return:		; preds = %bb20
-	ret void
+  %0 = mul i32 %x, %w
+  %1 = mul i32 %x, %w
+  %2 = sdiv i32 %1, 4
+  %.sum2 = add i32 %2, %0
+  %cond = icmp eq i32 %d, 1
+  br i1 %cond, label %bb29, label %bb10.preheader
+
+bb10.preheader:                                   ; preds = %entry
+  %3 = icmp sgt i32 %x, 0
+  br i1 %3, label %bb.nph9, label %bb18.loopexit
+
+bb.nph7:                                          ; preds = %bb7.preheader
+  %4 = mul i32 %y.08, %w
+  %5 = mul i32 %y.08, %s
+  %6 = add i32 %5, 1
+  %tmp8 = icmp sgt i32 1, %w
+  %smax9 = select i1 %tmp8, i32 1, i32 %w
+  br label %bb6
+
+bb6:                                              ; preds = %bb7, %bb.nph7
+  %x.06 = phi i32 [ 0, %bb.nph7 ], [ %indvar.next7, %bb7 ]
+  %7 = add i32 %x.06, %4
+  %8 = shl i32 %x.06, 1
+  %9 = add i32 %6, %8
+  %10 = getelementptr i8* %r, i32 %9
+  %11 = load i8* %10, align 1
+  %12 = getelementptr i8* %j, i32 %7
+  store i8 %11, i8* %12, align 1
+  br label %bb7
+
+bb7:                                              ; preds = %bb6
+  %indvar.next7 = add i32 %x.06, 1
+  %exitcond10 = icmp ne i32 %indvar.next7, %smax9
+  br i1 %exitcond10, label %bb6, label %bb7.bb9_crit_edge
+
+bb7.bb9_crit_edge:                                ; preds = %bb7
+  br label %bb9
+
+bb9:                                              ; preds = %bb7.preheader, %bb7.bb9_crit_edge
+  br label %bb10
+
+bb10:                                             ; preds = %bb9
+  %indvar.next11 = add i32 %y.08, 1
+  %exitcond12 = icmp ne i32 %indvar.next11, %x
+  br i1 %exitcond12, label %bb7.preheader, label %bb10.bb18.loopexit_crit_edge
+
+bb10.bb18.loopexit_crit_edge:                     ; preds = %bb10
+  br label %bb10.bb18.loopexit_crit_edge.split
+
+bb10.bb18.loopexit_crit_edge.split:               ; preds = %bb.nph9, %bb10.bb18.loopexit_crit_edge
+  br label %bb18.loopexit
+
+bb.nph9:                                          ; preds = %bb10.preheader
+  %13 = icmp sgt i32 %w, 0
+  br i1 %13, label %bb.nph9.split, label %bb10.bb18.loopexit_crit_edge.split
+
+bb.nph9.split:                                    ; preds = %bb.nph9
+  br label %bb7.preheader
+
+bb7.preheader:                                    ; preds = %bb.nph9.split, %bb10
+  %y.08 = phi i32 [ 0, %bb.nph9.split ], [ %indvar.next11, %bb10 ]
+  br i1 true, label %bb.nph7, label %bb9
+
+bb.nph5:                                          ; preds = %bb18.loopexit
+  %14 = sdiv i32 %w, 2
+  %15 = icmp slt i32 %w, 2
+  %16 = sdiv i32 %x, 2
+  br i1 %15, label %bb18.bb20_crit_edge.split, label %bb.nph5.split
+
+bb.nph5.split:                                    ; preds = %bb.nph5
+  %tmp2 = icmp sgt i32 1, %16
+  %smax3 = select i1 %tmp2, i32 1, i32 %16
+  br label %bb13
+
+bb13:                                             ; preds = %bb18, %bb.nph5.split
+  %y.14 = phi i32 [ 0, %bb.nph5.split ], [ %indvar.next1, %bb18 ]
+  %17 = mul i32 %14, %y.14
+  %18 = shl i32 %y.14, 1
+  %19 = srem i32 %y.14, 2
+  %20 = add i32 %19, %18
+  %21 = mul i32 %20, %s
+  br i1 true, label %bb.nph3, label %bb17
+
+bb.nph3:                                          ; preds = %bb13
+  %22 = add i32 %17, %0
+  %23 = add i32 %17, %.sum2
+  %24 = sdiv i32 %w, 2
+  %tmp = icmp sgt i32 1, %24
+  %smax = select i1 %tmp, i32 1, i32 %24
+  br label %bb14
+
+bb14:                                             ; preds = %bb15, %bb.nph3
+  %x.12 = phi i32 [ 0, %bb.nph3 ], [ %indvar.next, %bb15 ]
+  %25 = shl i32 %x.12, 2
+  %26 = add i32 %25, %21
+  %27 = getelementptr i8* %r, i32 %26
+  %28 = load i8* %27, align 1
+  %.sum = add i32 %22, %x.12
+  %29 = getelementptr i8* %j, i32 %.sum
+  store i8 %28, i8* %29, align 1
+  %30 = shl i32 %x.12, 2
+  %31 = or i32 %30, 2
+  %32 = add i32 %31, %21
+  %33 = getelementptr i8* %r, i32 %32
+  %34 = load i8* %33, align 1
+  %.sum6 = add i32 %23, %x.12
+  %35 = getelementptr i8* %j, i32 %.sum6
+  store i8 %34, i8* %35, align 1
+  br label %bb15
+
+bb15:                                             ; preds = %bb14
+  %indvar.next = add i32 %x.12, 1
+  %exitcond = icmp ne i32 %indvar.next, %smax
+  br i1 %exitcond, label %bb14, label %bb15.bb17_crit_edge
+
+bb15.bb17_crit_edge:                              ; preds = %bb15
+  br label %bb17
+
+bb17:                                             ; preds = %bb15.bb17_crit_edge, %bb13
+  br label %bb18
+
+bb18.loopexit:                                    ; preds = %bb10.bb18.loopexit_crit_edge.split, %bb10.preheader
+  %36 = icmp slt i32 %x, 2
+  br i1 %36, label %bb20, label %bb.nph5
+
+bb18:                                             ; preds = %bb17
+  %indvar.next1 = add i32 %y.14, 1
+  %exitcond4 = icmp ne i32 %indvar.next1, %smax3
+  br i1 %exitcond4, label %bb13, label %bb18.bb20_crit_edge
+
+bb18.bb20_crit_edge:                              ; preds = %bb18
+  br label %bb18.bb20_crit_edge.split
+
+bb18.bb20_crit_edge.split:                        ; preds = %bb18.bb20_crit_edge, %bb.nph5
+  br label %bb20
+
+bb20:                                             ; preds = %bb18.bb20_crit_edge.split, %bb18.loopexit
+  switch i32 %d, label %return [
+    i32 3, label %bb22
+    i32 1, label %bb29
+  ]
+
+bb22:                                             ; preds = %bb20
+  %37 = mul i32 %x, %w
+  %38 = sdiv i32 %37, 4
+  %.sum3 = add i32 %38, %.sum2
+  %39 = add i32 %x, 15
+  %40 = and i32 %39, -16
+  %41 = add i32 %w, 15
+  %42 = and i32 %41, -16
+  %43 = mul i32 %40, %s
+  %44 = icmp sgt i32 %x, 0
+  br i1 %44, label %bb.nph, label %bb26
+
+bb.nph:                                           ; preds = %bb22
+  br label %bb23
+
+bb23:                                             ; preds = %bb24, %bb.nph
+  %y.21 = phi i32 [ 0, %bb.nph ], [ %indvar.next5, %bb24 ]
+  %45 = mul i32 %y.21, %42
+  %.sum1 = add i32 %45, %43
+  %46 = getelementptr i8* %r, i32 %.sum1
+  %47 = mul i32 %y.21, %w
+  %.sum5 = add i32 %47, %.sum3
+  %48 = getelementptr i8* %j, i32 %.sum5
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %48, i8* %46, i32 %w, i32 1, i1 false)
+  br label %bb24
+
+bb24:                                             ; preds = %bb23
+  %indvar.next5 = add i32 %y.21, 1
+  %exitcond6 = icmp ne i32 %indvar.next5, %x
+  br i1 %exitcond6, label %bb23, label %bb24.bb26_crit_edge
+
+bb24.bb26_crit_edge:                              ; preds = %bb24
+  br label %bb26
+
+bb26:                                             ; preds = %bb24.bb26_crit_edge, %bb22
+  %49 = mul i32 %x, %w
+  %.sum4 = add i32 %.sum3, %49
+  %50 = getelementptr i8* %j, i32 %.sum4
+  %51 = mul i32 %x, %w
+  %52 = sdiv i32 %51, 2
+  tail call void @llvm.memset.p0i8.i32(i8* %50, i8 -128, i32 %52, i32 1, i1 false)
+  ret void
+
+bb29:                                             ; preds = %bb20, %entry
+  %53 = add i32 %w, 15
+  %54 = and i32 %53, -16
+  %55 = icmp sgt i32 %x, 0
+  br i1 %55, label %bb.nph11, label %bb33
+
+bb.nph11:                                         ; preds = %bb29
+  br label %bb30
+
+bb30:                                             ; preds = %bb31, %bb.nph11
+  %y.310 = phi i32 [ 0, %bb.nph11 ], [ %indvar.next13, %bb31 ]
+  %56 = mul i32 %y.310, %54
+  %57 = getelementptr i8* %r, i32 %56
+  %58 = mul i32 %y.310, %w
+  %59 = getelementptr i8* %j, i32 %58
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %59, i8* %57, i32 %w, i32 1, i1 false)
+  br label %bb31
+
+bb31:                                             ; preds = %bb30
+  %indvar.next13 = add i32 %y.310, 1
+  %exitcond14 = icmp ne i32 %indvar.next13, %x
+  br i1 %exitcond14, label %bb30, label %bb31.bb33_crit_edge
+
+bb31.bb33_crit_edge:                              ; preds = %bb31
+  br label %bb33
+
+bb33:                                             ; preds = %bb31.bb33_crit_edge, %bb29
+  %60 = mul i32 %x, %w
+  %61 = getelementptr i8* %j, i32 %60
+  %62 = mul i32 %x, %w
+  %63 = sdiv i32 %62, 2
+  tail call void @llvm.memset.p0i8.i32(i8* %61, i8 -128, i32 %63, i32 1, i1 false)
+  ret void
+
+return:                                           ; preds = %bb20
+  ret void
 }
 
 define void @bar(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
 entry:
-	%0 = mul i32 %x, %w		; <i32> [#uses=2]
-	%1 = mul i32 %x, %w		; <i32> [#uses=1]
-	%2 = udiv i32 %1, 4		; <i32> [#uses=1]
-	%.sum2 = add i32 %2, %0		; <i32> [#uses=2]
-	%cond = icmp eq i32 %d, 1		; <i1> [#uses=1]
-	br i1 %cond, label %bb29, label %bb10.preheader
-
-bb10.preheader:		; preds = %entry
-	%3 = icmp ne i32 %x, 0		; <i1> [#uses=1]
-	br i1 %3, label %bb.nph9, label %bb18.loopexit
-
-bb.nph7:		; preds = %bb7.preheader
-	%4 = mul i32 %y.08, %w		; <i32> [#uses=1]
-	%5 = mul i32 %y.08, %s		; <i32> [#uses=1]
-	%6 = add i32 %5, 1		; <i32> [#uses=1]
-	%tmp8 = icmp ugt i32 1, %w		; <i1> [#uses=1]
-	%smax9 = select i1 %tmp8, i32 1, i32 %w		; <i32> [#uses=1]
-	br label %bb6
-
-bb6:		; preds = %bb7, %bb.nph7
-	%x.06 = phi i32 [ 0, %bb.nph7 ], [ %indvar.next7, %bb7 ]		; <i32> [#uses=3]
-	%7 = add i32 %x.06, %4		; <i32> [#uses=1]
-	%8 = shl i32 %x.06, 1		; <i32> [#uses=1]
-	%9 = add i32 %6, %8		; <i32> [#uses=1]
-	%10 = getelementptr i8* %r, i32 %9		; <i8*> [#uses=1]
-	%11 = load i8* %10, align 1		; <i8> [#uses=1]
-	%12 = getelementptr i8* %j, i32 %7		; <i8*> [#uses=1]
-	store i8 %11, i8* %12, align 1
-	br label %bb7
-
-bb7:		; preds = %bb6
-	%indvar.next7 = add i32 %x.06, 1		; <i32> [#uses=2]
-	%exitcond10 = icmp ne i32 %indvar.next7, %smax9		; <i1> [#uses=1]
-	br i1 %exitcond10, label %bb6, label %bb7.bb9_crit_edge
-
-bb7.bb9_crit_edge:		; preds = %bb7
-	br label %bb9
-
-bb9:		; preds = %bb7.preheader, %bb7.bb9_crit_edge
-	br label %bb10
-
-bb10:		; preds = %bb9
-	%indvar.next11 = add i32 %y.08, 1		; <i32> [#uses=2]
-	%exitcond12 = icmp ne i32 %indvar.next11, %x		; <i1> [#uses=1]
-	br i1 %exitcond12, label %bb7.preheader, label %bb10.bb18.loopexit_crit_edge
-
-bb10.bb18.loopexit_crit_edge:		; preds = %bb10
-	br label %bb10.bb18.loopexit_crit_edge.split
-
-bb10.bb18.loopexit_crit_edge.split:		; preds = %bb.nph9, %bb10.bb18.loopexit_crit_edge
-	br label %bb18.loopexit
-
-bb.nph9:		; preds = %bb10.preheader
-	%13 = icmp ugt i32 %w, 0		; <i1> [#uses=1]
-	br i1 %13, label %bb.nph9.split, label %bb10.bb18.loopexit_crit_edge.split
-
-bb.nph9.split:		; preds = %bb.nph9
-	br label %bb7.preheader
-
-bb7.preheader:		; preds = %bb.nph9.split, %bb10
-	%y.08 = phi i32 [ 0, %bb.nph9.split ], [ %indvar.next11, %bb10 ]		; <i32> [#uses=3]
-	br i1 true, label %bb.nph7, label %bb9
-
-bb.nph5:		; preds = %bb18.loopexit
-	%14 = udiv i32 %w, 2		; <i32> [#uses=1]
-	%15 = icmp ult i32 %w, 2		; <i1> [#uses=1]
-	%16 = udiv i32 %x, 2		; <i32> [#uses=2]
-	br i1 %15, label %bb18.bb20_crit_edge.split, label %bb.nph5.split
-
-bb.nph5.split:		; preds = %bb.nph5
-	%tmp2 = icmp ugt i32 1, %16		; <i1> [#uses=1]
-	%smax3 = select i1 %tmp2, i32 1, i32 %16		; <i32> [#uses=1]
-	br label %bb13
-
-bb13:		; preds = %bb18, %bb.nph5.split
-	%y.14 = phi i32 [ 0, %bb.nph5.split ], [ %indvar.next1, %bb18 ]		; <i32> [#uses=4]
-	%17 = mul i32 %14, %y.14		; <i32> [#uses=2]
-	%18 = shl i32 %y.14, 1		; <i32> [#uses=1]
-	%19 = urem i32 %y.14, 2		; <i32> [#uses=1]
-	%20 = add i32 %19, %18		; <i32> [#uses=1]
-	%21 = mul i32 %20, %s		; <i32> [#uses=2]
-	br i1 true, label %bb.nph3, label %bb17
-
-bb.nph3:		; preds = %bb13
-	%22 = add i32 %17, %0		; <i32> [#uses=1]
-	%23 = add i32 %17, %.sum2		; <i32> [#uses=1]
-	%24 = udiv i32 %w, 2		; <i32> [#uses=2]
-	%tmp = icmp ugt i32 1, %24		; <i1> [#uses=1]
-	%smax = select i1 %tmp, i32 1, i32 %24		; <i32> [#uses=1]
-	br label %bb14
-
-bb14:		; preds = %bb15, %bb.nph3
-	%x.12 = phi i32 [ 0, %bb.nph3 ], [ %indvar.next, %bb15 ]		; <i32> [#uses=5]
-	%25 = shl i32 %x.12, 2		; <i32> [#uses=1]
-	%26 = add i32 %25, %21		; <i32> [#uses=1]
-	%27 = getelementptr i8* %r, i32 %26		; <i8*> [#uses=1]
-	%28 = load i8* %27, align 1		; <i8> [#uses=1]
-	%.sum = add i32 %22, %x.12		; <i32> [#uses=1]
-	%29 = getelementptr i8* %j, i32 %.sum		; <i8*> [#uses=1]
-	store i8 %28, i8* %29, align 1
-	%30 = shl i32 %x.12, 2		; <i32> [#uses=1]
-	%31 = or i32 %30, 2		; <i32> [#uses=1]
-	%32 = add i32 %31, %21		; <i32> [#uses=1]
-	%33 = getelementptr i8* %r, i32 %32		; <i8*> [#uses=1]
-	%34 = load i8* %33, align 1		; <i8> [#uses=1]
-	%.sum6 = add i32 %23, %x.12		; <i32> [#uses=1]
-	%35 = getelementptr i8* %j, i32 %.sum6		; <i8*> [#uses=1]
-	store i8 %34, i8* %35, align 1
-	br label %bb15
-
-bb15:		; preds = %bb14
-	%indvar.next = add i32 %x.12, 1		; <i32> [#uses=2]
-	%exitcond = icmp ne i32 %indvar.next, %smax		; <i1> [#uses=1]
-	br i1 %exitcond, label %bb14, label %bb15.bb17_crit_edge
-
-bb15.bb17_crit_edge:		; preds = %bb15
-	br label %bb17
-
-bb17:		; preds = %bb15.bb17_crit_edge, %bb13
-	br label %bb18
-
-bb18.loopexit:		; preds = %bb10.bb18.loopexit_crit_edge.split, %bb10.preheader
-	%36 = icmp ult i32 %x, 2		; <i1> [#uses=1]
-	br i1 %36, label %bb20, label %bb.nph5
-
-bb18:		; preds = %bb17
-	%indvar.next1 = add i32 %y.14, 1		; <i32> [#uses=2]
-	%exitcond4 = icmp ne i32 %indvar.next1, %smax3		; <i1> [#uses=1]
-	br i1 %exitcond4, label %bb13, label %bb18.bb20_crit_edge
-
-bb18.bb20_crit_edge:		; preds = %bb18
-	br label %bb18.bb20_crit_edge.split
-
-bb18.bb20_crit_edge.split:		; preds = %bb18.bb20_crit_edge, %bb.nph5
-	br label %bb20
-
-bb20:		; preds = %bb18.bb20_crit_edge.split, %bb18.loopexit
-	switch i32 %d, label %return [
-		i32 3, label %bb22
-		i32 1, label %bb29
-	]
-
-bb22:		; preds = %bb20
-	%37 = mul i32 %x, %w		; <i32> [#uses=1]
-	%38 = udiv i32 %37, 4		; <i32> [#uses=1]
-	%.sum3 = add i32 %38, %.sum2		; <i32> [#uses=2]
-	%39 = add i32 %x, 15		; <i32> [#uses=1]
-	%40 = and i32 %39, -16		; <i32> [#uses=1]
-	%41 = add i32 %w, 15		; <i32> [#uses=1]
-	%42 = and i32 %41, -16		; <i32> [#uses=1]
-	%43 = mul i32 %40, %s		; <i32> [#uses=1]
-	%44 = icmp ugt i32 %x, 0		; <i1> [#uses=1]
-	br i1 %44, label %bb.nph, label %bb26
-
-bb.nph:		; preds = %bb22
-	br label %bb23
-
-bb23:		; preds = %bb24, %bb.nph
-	%y.21 = phi i32 [ 0, %bb.nph ], [ %indvar.next5, %bb24 ]		; <i32> [#uses=3]
-	%45 = mul i32 %y.21, %42		; <i32> [#uses=1]
-	%.sum1 = add i32 %45, %43		; <i32> [#uses=1]
-	%46 = getelementptr i8* %r, i32 %.sum1		; <i8*> [#uses=1]
-	%47 = mul i32 %y.21, %w		; <i32> [#uses=1]
-	%.sum5 = add i32 %47, %.sum3		; <i32> [#uses=1]
-	%48 = getelementptr i8* %j, i32 %.sum5		; <i8*> [#uses=1]
-	tail call void @llvm.memcpy.i32(i8* %48, i8* %46, i32 %w, i32 1)
-	br label %bb24
-
-bb24:		; preds = %bb23
-	%indvar.next5 = add i32 %y.21, 1		; <i32> [#uses=2]
-	%exitcond6 = icmp ne i32 %indvar.next5, %x		; <i1> [#uses=1]
-	br i1 %exitcond6, label %bb23, label %bb24.bb26_crit_edge
-
-bb24.bb26_crit_edge:		; preds = %bb24
-	br label %bb26
-
-bb26:		; preds = %bb24.bb26_crit_edge, %bb22
-	%49 = mul i32 %x, %w		; <i32> [#uses=1]
-	%.sum4 = add i32 %.sum3, %49		; <i32> [#uses=1]
-	%50 = getelementptr i8* %j, i32 %.sum4		; <i8*> [#uses=1]
-	%51 = mul i32 %x, %w		; <i32> [#uses=1]
-	%52 = udiv i32 %51, 2		; <i32> [#uses=1]
-	tail call void @llvm.memset.i32(i8* %50, i8 -128, i32 %52, i32 1)
-	ret void
-
-bb29:		; preds = %bb20, %entry
-	%53 = add i32 %w, 15		; <i32> [#uses=1]
-	%54 = and i32 %53, -16		; <i32> [#uses=1]
-	%55 = icmp ugt i32 %x, 0		; <i1> [#uses=1]
-	br i1 %55, label %bb.nph11, label %bb33
-
-bb.nph11:		; preds = %bb29
-	br label %bb30
-
-bb30:		; preds = %bb31, %bb.nph11
-	%y.310 = phi i32 [ 0, %bb.nph11 ], [ %indvar.next13, %bb31 ]		; <i32> [#uses=3]
-	%56 = mul i32 %y.310, %54		; <i32> [#uses=1]
-	%57 = getelementptr i8* %r, i32 %56		; <i8*> [#uses=1]
-	%58 = mul i32 %y.310, %w		; <i32> [#uses=1]
-	%59 = getelementptr i8* %j, i32 %58		; <i8*> [#uses=1]
-	tail call void @llvm.memcpy.i32(i8* %59, i8* %57, i32 %w, i32 1)
-	br label %bb31
-
-bb31:		; preds = %bb30
-	%indvar.next13 = add i32 %y.310, 1		; <i32> [#uses=2]
-	%exitcond14 = icmp ne i32 %indvar.next13, %x		; <i1> [#uses=1]
-	br i1 %exitcond14, label %bb30, label %bb31.bb33_crit_edge
-
-bb31.bb33_crit_edge:		; preds = %bb31
-	br label %bb33
-
-bb33:		; preds = %bb31.bb33_crit_edge, %bb29
-	%60 = mul i32 %x, %w		; <i32> [#uses=1]
-	%61 = getelementptr i8* %j, i32 %60		; <i8*> [#uses=1]
-	%62 = mul i32 %x, %w		; <i32> [#uses=1]
-	%63 = udiv i32 %62, 2		; <i32> [#uses=1]
-	tail call void @llvm.memset.i32(i8* %61, i8 -128, i32 %63, i32 1)
-	ret void
-
-return:		; preds = %bb20
-	ret void
+  %0 = mul i32 %x, %w
+  %1 = mul i32 %x, %w
+  %2 = udiv i32 %1, 4
+  %.sum2 = add i32 %2, %0
+  %cond = icmp eq i32 %d, 1
+  br i1 %cond, label %bb29, label %bb10.preheader
+
+bb10.preheader:                                   ; preds = %entry
+  %3 = icmp ne i32 %x, 0
+  br i1 %3, label %bb.nph9, label %bb18.loopexit
+
+bb.nph7:                                          ; preds = %bb7.preheader
+  %4 = mul i32 %y.08, %w
+  %5 = mul i32 %y.08, %s
+  %6 = add i32 %5, 1
+  %tmp8 = icmp ugt i32 1, %w
+  %smax9 = select i1 %tmp8, i32 1, i32 %w
+  br label %bb6
+
+bb6:                                              ; preds = %bb7, %bb.nph7
+  %x.06 = phi i32 [ 0, %bb.nph7 ], [ %indvar.next7, %bb7 ]
+  %7 = add i32 %x.06, %4
+  %8 = shl i32 %x.06, 1
+  %9 = add i32 %6, %8
+  %10 = getelementptr i8* %r, i32 %9
+  %11 = load i8* %10, align 1
+  %12 = getelementptr i8* %j, i32 %7
+  store i8 %11, i8* %12, align 1
+  br label %bb7
+
+bb7:                                              ; preds = %bb6
+  %indvar.next7 = add i32 %x.06, 1
+  %exitcond10 = icmp ne i32 %indvar.next7, %smax9
+  br i1 %exitcond10, label %bb6, label %bb7.bb9_crit_edge
+
+bb7.bb9_crit_edge:                                ; preds = %bb7
+  br label %bb9
+
+bb9:                                              ; preds = %bb7.preheader, %bb7.bb9_crit_edge
+  br label %bb10
+
+bb10:                                             ; preds = %bb9
+  %indvar.next11 = add i32 %y.08, 1
+  %exitcond12 = icmp ne i32 %indvar.next11, %x
+  br i1 %exitcond12, label %bb7.preheader, label %bb10.bb18.loopexit_crit_edge
+
+bb10.bb18.loopexit_crit_edge:                     ; preds = %bb10
+  br label %bb10.bb18.loopexit_crit_edge.split
+
+bb10.bb18.loopexit_crit_edge.split:               ; preds = %bb.nph9, %bb10.bb18.loopexit_crit_edge
+  br label %bb18.loopexit
+
+bb.nph9:                                          ; preds = %bb10.preheader
+  %13 = icmp ugt i32 %w, 0
+  br i1 %13, label %bb.nph9.split, label %bb10.bb18.loopexit_crit_edge.split
+
+bb.nph9.split:                                    ; preds = %bb.nph9
+  br label %bb7.preheader
+
+bb7.preheader:                                    ; preds = %bb.nph9.split, %bb10
+  %y.08 = phi i32 [ 0, %bb.nph9.split ], [ %indvar.next11, %bb10 ]
+  br i1 true, label %bb.nph7, label %bb9
+
+bb.nph5:                                          ; preds = %bb18.loopexit
+  %14 = udiv i32 %w, 2
+  %15 = icmp ult i32 %w, 2
+  %16 = udiv i32 %x, 2
+  br i1 %15, label %bb18.bb20_crit_edge.split, label %bb.nph5.split
+
+bb.nph5.split:                                    ; preds = %bb.nph5
+  %tmp2 = icmp ugt i32 1, %16
+  %smax3 = select i1 %tmp2, i32 1, i32 %16
+  br label %bb13
+
+bb13:                                             ; preds = %bb18, %bb.nph5.split
+  %y.14 = phi i32 [ 0, %bb.nph5.split ], [ %indvar.next1, %bb18 ]
+  %17 = mul i32 %14, %y.14
+  %18 = shl i32 %y.14, 1
+  %19 = urem i32 %y.14, 2
+  %20 = add i32 %19, %18
+  %21 = mul i32 %20, %s
+  br i1 true, label %bb.nph3, label %bb17
+
+bb.nph3:                                          ; preds = %bb13
+  %22 = add i32 %17, %0
+  %23 = add i32 %17, %.sum2
+  %24 = udiv i32 %w, 2
+  %tmp = icmp ugt i32 1, %24
+  %smax = select i1 %tmp, i32 1, i32 %24
+  br label %bb14
+
+bb14:                                             ; preds = %bb15, %bb.nph3
+  %x.12 = phi i32 [ 0, %bb.nph3 ], [ %indvar.next, %bb15 ]
+  %25 = shl i32 %x.12, 2
+  %26 = add i32 %25, %21
+  %27 = getelementptr i8* %r, i32 %26
+  %28 = load i8* %27, align 1
+  %.sum = add i32 %22, %x.12
+  %29 = getelementptr i8* %j, i32 %.sum
+  store i8 %28, i8* %29, align 1
+  %30 = shl i32 %x.12, 2
+  %31 = or i32 %30, 2
+  %32 = add i32 %31, %21
+  %33 = getelementptr i8* %r, i32 %32
+  %34 = load i8* %33, align 1
+  %.sum6 = add i32 %23, %x.12
+  %35 = getelementptr i8* %j, i32 %.sum6
+  store i8 %34, i8* %35, align 1
+  br label %bb15
+
+bb15:                                             ; preds = %bb14
+  %indvar.next = add i32 %x.12, 1
+  %exitcond = icmp ne i32 %indvar.next, %smax
+  br i1 %exitcond, label %bb14, label %bb15.bb17_crit_edge
+
+bb15.bb17_crit_edge:                              ; preds = %bb15
+  br label %bb17
+
+bb17:                                             ; preds = %bb15.bb17_crit_edge, %bb13
+  br label %bb18
+
+bb18.loopexit:                                    ; preds = %bb10.bb18.loopexit_crit_edge.split, %bb10.preheader
+  %36 = icmp ult i32 %x, 2
+  br i1 %36, label %bb20, label %bb.nph5
+
+bb18:                                             ; preds = %bb17
+  %indvar.next1 = add i32 %y.14, 1
+  %exitcond4 = icmp ne i32 %indvar.next1, %smax3
+  br i1 %exitcond4, label %bb13, label %bb18.bb20_crit_edge
+
+bb18.bb20_crit_edge:                              ; preds = %bb18
+  br label %bb18.bb20_crit_edge.split
+
+bb18.bb20_crit_edge.split:                        ; preds = %bb18.bb20_crit_edge, %bb.nph5
+  br label %bb20
+
+bb20:                                             ; preds = %bb18.bb20_crit_edge.split, %bb18.loopexit
+  switch i32 %d, label %return [
+    i32 3, label %bb22
+    i32 1, label %bb29
+  ]
+
+bb22:                                             ; preds = %bb20
+  %37 = mul i32 %x, %w
+  %38 = udiv i32 %37, 4
+  %.sum3 = add i32 %38, %.sum2
+  %39 = add i32 %x, 15
+  %40 = and i32 %39, -16
+  %41 = add i32 %w, 15
+  %42 = and i32 %41, -16
+  %43 = mul i32 %40, %s
+  %44 = icmp ugt i32 %x, 0
+  br i1 %44, label %bb.nph, label %bb26
+
+bb.nph:                                           ; preds = %bb22
+  br label %bb23
+
+bb23:                                             ; preds = %bb24, %bb.nph
+  %y.21 = phi i32 [ 0, %bb.nph ], [ %indvar.next5, %bb24 ]
+  %45 = mul i32 %y.21, %42
+  %.sum1 = add i32 %45, %43
+  %46 = getelementptr i8* %r, i32 %.sum1
+  %47 = mul i32 %y.21, %w
+  %.sum5 = add i32 %47, %.sum3
+  %48 = getelementptr i8* %j, i32 %.sum5
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %48, i8* %46, i32 %w, i32 1, i1 false)
+  br label %bb24
+
+bb24:                                             ; preds = %bb23
+  %indvar.next5 = add i32 %y.21, 1
+  %exitcond6 = icmp ne i32 %indvar.next5, %x
+  br i1 %exitcond6, label %bb23, label %bb24.bb26_crit_edge
+
+bb24.bb26_crit_edge:                              ; preds = %bb24
+  br label %bb26
+
+bb26:                                             ; preds = %bb24.bb26_crit_edge, %bb22
+  %49 = mul i32 %x, %w
+  %.sum4 = add i32 %.sum3, %49
+  %50 = getelementptr i8* %j, i32 %.sum4
+  %51 = mul i32 %x, %w
+  %52 = udiv i32 %51, 2
+  tail call void @llvm.memset.p0i8.i32(i8* %50, i8 -128, i32 %52, i32 1, i1 false)
+  ret void
+
+bb29:                                             ; preds = %bb20, %entry
+  %53 = add i32 %w, 15
+  %54 = and i32 %53, -16
+  %55 = icmp ugt i32 %x, 0
+  br i1 %55, label %bb.nph11, label %bb33
+
+bb.nph11:                                         ; preds = %bb29
+  br label %bb30
+
+bb30:                                             ; preds = %bb31, %bb.nph11
+  %y.310 = phi i32 [ 0, %bb.nph11 ], [ %indvar.next13, %bb31 ]
+  %56 = mul i32 %y.310, %54
+  %57 = getelementptr i8* %r, i32 %56
+  %58 = mul i32 %y.310, %w
+  %59 = getelementptr i8* %j, i32 %58
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %59, i8* %57, i32 %w, i32 1, i1 false)
+  br label %bb31
+
+bb31:                                             ; preds = %bb30
+  %indvar.next13 = add i32 %y.310, 1
+  %exitcond14 = icmp ne i32 %indvar.next13, %x
+  br i1 %exitcond14, label %bb30, label %bb31.bb33_crit_edge
+
+bb31.bb33_crit_edge:                              ; preds = %bb31
+  br label %bb33
+
+bb33:                                             ; preds = %bb31.bb33_crit_edge, %bb29
+  %60 = mul i32 %x, %w
+  %61 = getelementptr i8* %j, i32 %60
+  %62 = mul i32 %x, %w
+  %63 = udiv i32 %62, 2
+  tail call void @llvm.memset.p0i8.i32(i8* %61, i8 -128, i32 %63, i32 1, i1 false)
+  ret void
+
+return:                                           ; preds = %bb20
+  ret void
 }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
-declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/CodeGen/X86/personality.ll b/test/CodeGen/X86/personality.ll
index e952a9b..d3d8e3f 100644
--- a/test/CodeGen/X86/personality.ll
+++ b/test/CodeGen/X86/personality.ll
@@ -4,40 +4,41 @@
 
 define void @_Z1fv() {
 entry:
-	invoke void @_Z1gv( )
-			to label %return unwind label %unwind
+  invoke void @_Z1gv()
+          to label %return unwind label %unwind
 
-unwind:		; preds = %entry
-	br i1 false, label %eh_then, label %cleanup20
+unwind:                                           ; preds = %entry
+  br i1 false, label %eh_then, label %cleanup20
 
-eh_then:		; preds = %unwind
-	invoke void @__cxa_end_catch( )
-			to label %return unwind label %unwind10
+eh_then:                                          ; preds = %unwind
+  invoke void @__cxa_end_catch()
+          to label %return unwind label %unwind10
 
-unwind10:		; preds = %eh_then
-	%eh_select13 = tail call i64 (i8*, i8*, ...)* @llvm.eh.selector.i64( i8* null, i8* bitcast (void ()* @__gxx_personality_v0 to i8*), i32 1 )		; <i32> [#uses=2]
-	%tmp18 = icmp slt i64 %eh_select13, 0		; <i1> [#uses=1]
-	br i1 %tmp18, label %filter, label %cleanup20
+unwind10:                                         ; preds = %eh_then
+  %upgraded.eh_select13 = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* null, i8* bitcast (void ()* @__gxx_personality_v0 to i8*), i32 1)
+  %upgraded.eh_select131 = sext i32 %upgraded.eh_select13 to i64
+  %tmp18 = icmp slt i64 %upgraded.eh_select131, 0
+  br i1 %tmp18, label %filter, label %cleanup20
 
-filter:		; preds = %unwind10
-	unreachable
+filter:                                           ; preds = %unwind10
+  unreachable
 
-cleanup20:		; preds = %unwind10, %unwind
-	%eh_selector.0 = phi i64 [ 0, %unwind ], [ %eh_select13, %unwind10 ]		; <i32> [#uses=0]
-	ret void
+cleanup20:                                        ; preds = %unwind10, %unwind
+  %eh_selector.0 = phi i64 [ 0, %unwind ], [ %upgraded.eh_select131, %unwind10 ]
+  ret void
 
-return:		; preds = %eh_then, %entry
-	ret void
+return:                                           ; preds = %eh_then, %entry
+  ret void
 }
 
 declare void @_Z1gv()
 
-declare i64 @llvm.eh.selector.i64(i8*, i8*, ...)
-
 declare void @__gxx_personality_v0()
 
 declare void @__cxa_end_catch()
 
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
+
 ; X64:      zPLR
 ; X64:      .byte 155
 ; X64-NEXT: .long	___gxx_personality_v0@GOTPCREL+4
diff --git a/test/CodeGen/X86/pre-split2.ll b/test/CodeGen/X86/pre-split2.ll
deleted file mode 100644
index 670737b..0000000
--- a/test/CodeGen/X86/pre-split2.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats -regalloc=linearscan |& \
-; RUN:   grep {pre-alloc-split} | count 2
-
-define i32 @t(i32 %arg) {
-entry:
-	br label %bb6
-
-.noexc6:		; preds = %bb6
-	%0 = and i32 %2, -8		; <i32> [#uses=1]
-	tail call void @llvm.memmove.i32(i8* %3, i8* null, i32 %0, i32 1) nounwind
-	store double %1, double* null, align 8
-	br label %bb6
-
-bb6:		; preds = %.noexc6, %entry
-	%1 = uitofp i32 %arg to double		; <double> [#uses=1]
-	%2 = sub i32 0, 0		; <i32> [#uses=1]
-	%3 = invoke i8* @_Znwm(i32 0)
-			to label %.noexc6 unwind label %lpad32		; <i8*> [#uses=1]
-
-lpad32:		; preds = %bb6
-	unreachable
-}
-
-declare void @llvm.memmove.i32(i8*, i8*, i32, i32) nounwind
-
-declare i8* @_Znwm(i32)
diff --git a/test/CodeGen/X86/pre-split3.ll b/test/CodeGen/X86/pre-split3.ll
deleted file mode 100644
index 0c49a91..0000000
--- a/test/CodeGen/X86/pre-split3.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan -stats |& \
-; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
-
-define i32 @t(i32 %arg) {
-entry:
-	br label %bb6
-
-.noexc6:		; preds = %bb6
-	%0 = and i32 %2, -8		; <i32> [#uses=1]
-	tail call void @llvm.memmove.i32(i8* %3, i8* null, i32 %0, i32 1) nounwind
-	store double %1, double* null, align 8
-	br label %bb6
-
-bb6:		; preds = %.noexc6, %entry
-	%1 = uitofp i32 %arg to double		; <double> [#uses=1]
-	%2 = sub i32 0, 0		; <i32> [#uses=1]
-	%3 = invoke i8* @_Znwm(i32 0)
-			to label %.noexc6 unwind label %lpad32		; <i8*> [#uses=1]
-
-lpad32:		; preds = %bb6
-	unreachable
-}
-
-declare void @llvm.memmove.i32(i8*, i8*, i32, i32) nounwind
-
-declare i8* @_Znwm(i32)
diff --git a/test/CodeGen/X86/unaligned-load.ll b/test/CodeGen/X86/unaligned-load.ll
index 9f70489..d8fffbe 100644
--- a/test/CodeGen/X86/unaligned-load.ll
+++ b/test/CodeGen/X86/unaligned-load.ll
@@ -10,9 +10,17 @@ entry:
   %String2Loc = alloca [31 x i8], align 1
   br label %bb
 
-bb:
+bb:                                               ; preds = %bb, %entry
   %String2Loc9 = getelementptr inbounds [31 x i8]* %String2Loc, i64 0, i64 0
-  call void @llvm.memcpy.i64(i8* %String2Loc9, i8* getelementptr inbounds ([31 x i8]* @.str3, i64 0, i64 0), i64 31, i32 1)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %String2Loc9, i8* getelementptr inbounds ([31 x i8]* @.str3, i64 0, i64 0), i64 31, i32 1, i1 false)
+  br label %bb
+
+return:                                           ; No predecessors!
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
 ; I386: calll {{_?}}memcpy
 
 ; CORE2: movabsq
@@ -20,13 +28,6 @@ bb:
 ; CORE2: movabsq
 
 ; COREI7: movups _.str3
-  br label %bb
-
-return:
-  ret void
-}
-
-declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
 
 ; CORE2: .section
 ; CORE2: .align  3
diff --git a/test/CodeGen/X86/variable-sized-darwin-bzero.ll b/test/CodeGen/X86/variable-sized-darwin-bzero.ll
index 4817db2..1e86d75 100644
--- a/test/CodeGen/X86/variable-sized-darwin-bzero.ll
+++ b/test/CodeGen/X86/variable-sized-darwin-bzero.ll
@@ -1,8 +1,8 @@
 ; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin10 | grep __bzero
 
-declare void @llvm.memset.i64(i8*, i8, i64, i32)
-
 define void @foo(i8* %p, i64 %n) {
-  call void @llvm.memset.i64(i8* %p, i8 0, i64 %n, i32 4)
+  call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 %n, i32 4, i1 false)
   ret void
 }
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/Transforms/DeadStoreElimination/crash.ll b/test/Transforms/DeadStoreElimination/crash.ll
index bb279cd..148695f 100644
--- a/test/Transforms/DeadStoreElimination/crash.ll
+++ b/test/Transforms/DeadStoreElimination/crash.ll
@@ -36,11 +36,11 @@ bb14:                                             ; preds = %bb4
   %6 = getelementptr inbounds i16* %2, i64 undef  ; <i16*> [#uses=1]
   store i16 undef, i16* %6, align 2
   %7 = getelementptr inbounds i8* %5, i64 undef   ; <i8*> [#uses=1]
-  call void @llvm.memcpy.i64(i8* %7, i8* undef, i64 undef, i32 1) nounwind
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %7, i8* undef, i64 undef, i32 1, i1 false)
   unreachable
 }
 
-declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 
 ; rdar://7635088
diff --git a/test/Transforms/DeadStoreElimination/lifetime.ll b/test/Transforms/DeadStoreElimination/lifetime.ll
index 2b5cc5a..6785653 100644
--- a/test/Transforms/DeadStoreElimination/lifetime.ll
+++ b/test/Transforms/DeadStoreElimination/lifetime.ll
@@ -4,7 +4,7 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1
 
 declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
 declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
-declare void @llvm.memset.i8(i8*, i8, i8, i32)
+declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i32, i1) nounwind
 
 define void @test1() {
 ; CHECK: @test1
@@ -14,7 +14,7 @@ define void @test1() {
   call void @llvm.lifetime.end(i64 1, i8* %A)
 ; CHECK: lifetime.end
 
-  call void @llvm.memset.i8(i8* %A, i8 0, i8 -1, i32 0)
+  call void @llvm.memset.p0i8.i8(i8* %A, i8 0, i8 -1, i32 0, i1 false)
 ; CHECK-NOT: memset
 
   ret void
diff --git a/test/Transforms/DeadStoreElimination/memintrinsics.ll b/test/Transforms/DeadStoreElimination/memintrinsics.ll
index e31e9fa..d5c5365 100644
--- a/test/Transforms/DeadStoreElimination/memintrinsics.ll
+++ b/test/Transforms/DeadStoreElimination/memintrinsics.ll
@@ -1,8 +1,8 @@
 ; RUN: opt -S -dse < %s | FileCheck %s
 
-declare void @llvm.memcpy.i8(i8*, i8*, i8, i32)
-declare void @llvm.memmove.i8(i8*, i8*, i8, i32)
-declare void @llvm.memset.i8(i8*, i8, i8, i32)
+declare void @llvm.memcpy.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i32, i1) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i32, i1) nounwind
 
 define void @test1() {
 ; CHECK: @test1
@@ -12,7 +12,7 @@ define void @test1() {
   store i8 0, i8* %A  ;; Written to by memcpy
 ; CHECK-NOT: store
 
-  call void @llvm.memcpy.i8(i8* %A, i8* %B, i8 -1, i32 0)
+  call void @llvm.memcpy.p0i8.p0i8.i8(i8* %A, i8* %B, i8 -1, i32 0, i1 false)
 
   ret void
 ; CHECK: ret void
@@ -26,7 +26,7 @@ define void @test2() {
   store i8 0, i8* %A  ;; Written to by memmove
 ; CHECK-NOT: store
 
-  call void @llvm.memmove.i8(i8* %A, i8* %B, i8 -1, i32 0)
+  call void @llvm.memmove.p0i8.p0i8.i8(i8* %A, i8* %B, i8 -1, i32 0, i1 false)
 
   ret void
 ; CHECK: ret void
@@ -40,7 +40,7 @@ define void @test3() {
   store i8 0, i8* %A  ;; Written to by memset
 ; CHECK-NOT: store
 
-  call void @llvm.memset.i8(i8* %A, i8 0, i8 -1, i32 0)
+  call void @llvm.memset.p0i8.i8(i8* %A, i8 0, i8 -1, i32 0, i1 false)
 
   ret void
 ; CHECK: ret void
diff --git a/test/Transforms/DeadStoreElimination/simple.ll b/test/Transforms/DeadStoreElimination/simple.ll
index 23576da..5f143fc 100644
--- a/test/Transforms/DeadStoreElimination/simple.ll
+++ b/test/Transforms/DeadStoreElimination/simple.ll
@@ -3,8 +3,6 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1
 
 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
-declare void @llvm.memset.i64(i8*, i8, i64, i32)
-declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
 declare i8* @llvm.init.trampoline(i8*, i8*, i8*)
 
 define void @test1(i32* %Q, i32* %P) {
@@ -65,7 +63,7 @@ define void @test5(i32* %Q) {
 ; alias).
 define void @test6(i32 *%p, i8 *%q) {
   store i32 10, i32* %p, align 4       ;; dead.
-  call void @llvm.memset.i64(i8* %q, i8 42, i64 900, i32 1)
+  call void @llvm.memset.p0i8.i64(i8* %q, i8 42, i64 900, i32 1, i1 false)
   store i32 30, i32* %p, align 4
   ret void
 ; CHECK: @test6
@@ -76,7 +74,7 @@ define void @test6(i32 *%p, i8 *%q) {
 ; alias).
 define void @test7(i32 *%p, i8 *%q, i8* noalias %r) {
   store i32 10, i32* %p, align 4       ;; dead.
-  call void @llvm.memcpy.i64(i8* %q, i8* %r, i64 900, i32 1)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %q, i8* %r, i64 900, i32 1, i1 false)
   store i32 30, i32* %p, align 4
   ret void
 ; CHECK: @test7
@@ -184,8 +182,8 @@ define void @test14(i32* %Q) {
 
 ;; Fully dead overwrite of memcpy.
 define void @test15(i8* %P, i8* %Q) nounwind ssp {
-  tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 12, i32 1)
-  tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 12, i32 1)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
   ret void
 ; CHECK: @test15
 ; CHECK-NEXT: call void @llvm.memcpy
@@ -194,8 +192,8 @@ define void @test15(i8* %P, i8* %Q) nounwind ssp {
 
 ;; Full overwrite of smaller memcpy.
 define void @test16(i8* %P, i8* %Q) nounwind ssp {
-  tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 8, i32 1)
-  tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 12, i32 1)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
   ret void
 ; CHECK: @test16
 ; CHECK-NEXT: call void @llvm.memcpy
@@ -204,8 +202,8 @@ define void @test16(i8* %P, i8* %Q) nounwind ssp {
 
 ;; Overwrite of memset by memcpy.
 define void @test17(i8* %P, i8* noalias %Q) nounwind ssp {
-  tail call void @llvm.memset.i64(i8* %P, i8 42, i64 8, i32 1)
-  tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 12, i32 1)
+  tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
   ret void
 ; CHECK: @test17
 ; CHECK-NEXT: call void @llvm.memcpy
@@ -228,8 +226,8 @@ define void @test17v(i8* %P, i8* %Q) nounwind ssp {
 ; A = B
 ; A = A
 define void @test18(i8* %P, i8* %Q, i8* %R) nounwind ssp {
-  tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 12, i32 1)
-  tail call void @llvm.memcpy.i64(i8* %P, i8* %R, i64 12, i32 1)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false)
   ret void
 ; CHECK: @test18
 ; CHECK-NEXT: call void @llvm.memcpy
diff --git a/test/Transforms/FunctionAttrs/2008-10-04-LocalMemory.ll b/test/Transforms/FunctionAttrs/2008-10-04-LocalMemory.ll
deleted file mode 100644
index faac118..0000000
--- a/test/Transforms/FunctionAttrs/2008-10-04-LocalMemory.ll
+++ /dev/null
@@ -1,64 +0,0 @@
-; RUN: opt < %s -basicaa -functionattrs -S | FileCheck %s
-
-%struct.X = type { i32*, i32* }
-
-declare i32 @g(i32*) readnone
-
-define i32 @f() {
-; CHECK: @f() readnone
-	%x = alloca i32		; <i32*> [#uses=2]
-	store i32 0, i32* %x
-	%y = call i32 @g(i32* %x)		; <i32> [#uses=1]
-	ret i32 %y
-}
-
-define i32 @foo() nounwind {
-; CHECK: @foo() nounwind readonly
-entry:
-  %y = alloca %struct.X                           ; <%struct.X*> [#uses=2]
-  %x = alloca %struct.X                           ; <%struct.X*> [#uses=2]
-  %j = alloca i32                                 ; <i32*> [#uses=2]
-  %i = alloca i32                                 ; <i32*> [#uses=2]
-  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  store i32 0, i32* %i, align 4
-  store i32 1, i32* %j, align 4
-  %0 = getelementptr inbounds %struct.X* %y, i32 0, i32 0 ; <i32**> [#uses=1]
-  store i32* %i, i32** %0, align 8
-  %1 = getelementptr inbounds %struct.X* %x, i32 0, i32 1 ; <i32**> [#uses=1]
-  store i32* %j, i32** %1, align 8
-  %x1 = bitcast %struct.X* %x to i8*              ; <i8*> [#uses=2]
-  %y2 = bitcast %struct.X* %y to i8*              ; <i8*> [#uses=1]
-  call void @llvm.memcpy.i64(i8* %x1, i8* %y2, i64 8, i32 1)
-  %2 = bitcast i8* %x1 to i32**                   ; <i32**> [#uses=1]
-  %3 = load i32** %2, align 8                     ; <i32*> [#uses=1]
-  %4 = load i32* %3, align 4                      ; <i32> [#uses=1]
-  br label %return
-
-return:                                           ; preds = %entry
-  ret i32 %4
-}
-
-define i32 @t(i32 %a, i32 %b, i32 %c) nounwind {
-; CHECK: @t(i32 %a, i32 %b, i32 %c) nounwind readnone
-entry:
-  %a.addr = alloca i32                            ; <i32*> [#uses=3]
-  %c.addr = alloca i32                            ; <i32*> [#uses=2]
-  store i32 %a, i32* %a.addr
-  store i32 %c, i32* %c.addr
-  %tmp = load i32* %a.addr                        ; <i32> [#uses=1]
-  %tobool = icmp ne i32 %tmp, 0                   ; <i1> [#uses=1]
-  br i1 %tobool, label %if.then, label %if.else
-
-if.then:                                          ; preds = %entry
-  br label %if.end
-
-if.else:                                          ; preds = %entry
-  br label %if.end
-
-if.end:                                           ; preds = %if.else, %if.then
-  %p.0 = phi i32* [ %a.addr, %if.then ], [ %c.addr, %if.else ] ; <i32*> [#uses=1]
-  %tmp2 = load i32* %p.0                          ; <i32> [#uses=1]
-  ret i32 %tmp2
-}
-
-declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
diff --git a/test/Transforms/GVN/2008-02-24-NonDominatedMemcpy.ll b/test/Transforms/GVN/2008-02-24-NonDominatedMemcpy.ll
deleted file mode 100644
index 9a75e1a..0000000
--- a/test/Transforms/GVN/2008-02-24-NonDominatedMemcpy.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: opt < %s -gvn -dse -S | grep {call.*memcpy} | count 1
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-target triple = "x86_64-apple-darwin8"
-	%struct.ggFrame3 = type { %struct.ggPoint3, %struct.ggONB3 }
-	%struct.ggHMatrix3 = type { [4 x [4 x double]] }
-	%struct.ggONB3 = type { %struct.ggPoint3, %struct.ggPoint3, %struct.ggPoint3 }
-	%struct.ggPoint3 = type { [3 x double] }
-	%struct.ggQuaternion = type { [4 x double], i32, %struct.ggHMatrix3 }
-
-declare void @llvm.memcpy.i64(i8*, i8*, i64, i32) nounwind 
-
-define void @_Z10ggCRSplineRK8ggFrame3S1_S1_S1_d(%struct.ggFrame3* noalias sret  %agg.result, %struct.ggFrame3* %f0, %struct.ggFrame3* %f1, %struct.ggFrame3* %f2, %struct.ggFrame3* %f3, double %t) nounwind  {
-entry:
-	%qresult = alloca %struct.ggQuaternion		; <%struct.ggQuaternion*> [#uses=1]
-	%tmp = alloca %struct.ggONB3		; <%struct.ggONB3*> [#uses=2]
-	call void @_ZN12ggQuaternion7getONB3Ev( %struct.ggONB3* noalias sret  %tmp, %struct.ggQuaternion* %qresult ) nounwind 
-	%tmp1.i = getelementptr %struct.ggFrame3* %agg.result, i32 0, i32 1		; <%struct.ggONB3*> [#uses=1]
-	%tmp13.i = bitcast %struct.ggONB3* %tmp1.i to i8*		; <i8*> [#uses=1]
-	%tmp24.i = bitcast %struct.ggONB3* %tmp to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i64( i8* %tmp13.i, i8* %tmp24.i, i64 72, i32 8 ) nounwind 
-	ret void
-}
-
-declare void @_ZN12ggQuaternion7getONB3Ev(%struct.ggONB3* noalias sret , %struct.ggQuaternion*) nounwind 
diff --git a/test/Transforms/GVN/2008-02-26-MemCpySize.ll b/test/Transforms/GVN/2008-02-26-MemCpySize.ll
deleted file mode 100644
index 6ed8a76..0000000
--- a/test/Transforms/GVN/2008-02-26-MemCpySize.ll
+++ /dev/null
@@ -1,46 +0,0 @@
-; RUN: opt < %s -gvn -dse -S | grep {call.*memcpy.*cell} | count 2
-; PR2099
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i686-apple-darwin9"
-	%struct.s = type { [11 x i8], i32 }
-@.str = internal constant [11 x i8] c"0123456789\00"		; <[11 x i8]*> [#uses=1]
-@cell = weak global %struct.s zeroinitializer		; <%struct.s*> [#uses=2]
-
-declare i32 @check(%struct.s* byval  %p) nounwind
-
-declare i32 @strcmp(i8*, i8*) nounwind readonly 
-
-define i32 @main() noreturn nounwind  {
-entry:
-	%p = alloca %struct.s, align 8		; <%struct.s*> [#uses=2]
-	store i32 99, i32* getelementptr (%struct.s* @cell, i32 0, i32 1), align 4
-	call void @llvm.memcpy.i32( i8* getelementptr (%struct.s* @cell, i32 0, i32 0, i32 0), i8* getelementptr ([11 x i8]* @.str, i32 0, i32 0), i32 11, i32 1 )
-	%tmp = getelementptr %struct.s* %p, i32 0, i32 0, i32 0		; <i8*> [#uses=2]
-	call void @llvm.memcpy.i64( i8* %tmp, i8* getelementptr (%struct.s* @cell, i32 0, i32 0, i32 0), i64 16, i32 8 )
-	%tmp1.i = getelementptr %struct.s* %p, i32 0, i32 1		; <i32*> [#uses=1]
-	%tmp2.i = load i32* %tmp1.i, align 4		; <i32> [#uses=1]
-	%tmp3.i = icmp eq i32 %tmp2.i, 99		; <i1> [#uses=1]
-	br i1 %tmp3.i, label %bb5.i, label %bb
-
-bb5.i:		; preds = %entry
-	%tmp91.i = call i32 @strcmp( i8* %tmp, i8* getelementptr ([11 x i8]* @.str, i32 0, i32 0) ) nounwind readonly 		; <i32> [#uses=1]
-	%tmp53 = icmp eq i32 %tmp91.i, 0		; <i1> [#uses=1]
-	br i1 %tmp53, label %bb7, label %bb
-
-bb:		; preds = %bb5.i, %entry
-	call void @abort( ) noreturn nounwind 
-	unreachable
-
-bb7:		; preds = %bb5.i
-	call void @exit( i32 0 ) noreturn nounwind 
-	unreachable
-}
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
-
-declare void @abort() noreturn nounwind 
-
-declare void @exit(i32) noreturn nounwind 
-
-declare void @llvm.memcpy.i64(i8*, i8*, i64, i32) nounwind 
diff --git a/test/Transforms/GVN/nonescaping-malloc.ll b/test/Transforms/GVN/nonescaping-malloc.ll
index 1d50205..dba9d81 100644
--- a/test/Transforms/GVN/nonescaping-malloc.ll
+++ b/test/Transforms/GVN/nonescaping-malloc.ll
@@ -20,13 +20,13 @@ target triple = "x86_64-apple-darwin10.0"
 
 define %"struct.llvm::StringMapEntry<void*>"* @_Z3fooRN4llvm9StringMapIPvNS_15MallocAllocatorEEEPKc(%"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %X, i8* %P) ssp {
 entry:
-  %tmp = alloca %"struct.llvm::StringRef", align 8 ; <%"struct.llvm::StringRef"*> [#uses=3]
-  %tmp.i = getelementptr inbounds %"struct.llvm::StringRef"* %tmp, i64 0, i32 0 ; <i8**> [#uses=1]
+  %tmp = alloca %"struct.llvm::StringRef", align 8
+  %tmp.i = getelementptr inbounds %"struct.llvm::StringRef"* %tmp, i64 0, i32 0
   store i8* %P, i8** %tmp.i, align 8
-  %tmp1.i = call i64 @strlen(i8* %P) nounwind readonly ; <i64> [#uses=1]
-  %tmp2.i = getelementptr inbounds %"struct.llvm::StringRef"* %tmp, i64 0, i32 1 ; <i64*> [#uses=1]
+  %tmp1.i = call i64 @strlen(i8* %P) nounwind readonly
+  %tmp2.i = getelementptr inbounds %"struct.llvm::StringRef"* %tmp, i64 0, i32 1
   store i64 %tmp1.i, i64* %tmp2.i, align 8
-  %tmp1 = call %"struct.llvm::StringMapEntry<void*>"* @_ZN4llvm9StringMapIPvNS_15MallocAllocatorEE16GetOrCreateValueERKNS_9StringRefE(%"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %X, %"struct.llvm::StringRef"* %tmp) ssp ; <%"struct.llvm::StringMapEntry<void*>"*> [#uses=1]
+  %tmp1 = call %"struct.llvm::StringMapEntry<void*>"* @_ZN4llvm9StringMapIPvNS_15MallocAllocatorEE16GetOrCreateValueERKNS_9StringRefE(%"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %X, %"struct.llvm::StringRef"* %tmp) ssp
   ret %"struct.llvm::StringMapEntry<void*>"* %tmp1
 }
 
@@ -34,75 +34,75 @@ declare i64 @strlen(i8* nocapture) nounwind readonly
 
 declare noalias i8* @malloc(i64) nounwind
 
-declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
-
 declare i32 @_ZN4llvm13StringMapImpl15LookupBucketForENS_9StringRefE(%"struct.llvm::StringMapImpl"*, i64, i64)
 
 define linkonce_odr %"struct.llvm::StringMapEntry<void*>"* @_ZN4llvm9StringMapIPvNS_15MallocAllocatorEE16GetOrCreateValueERKNS_9StringRefE(%"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %this, %"struct.llvm::StringRef"* nocapture %Key) ssp align 2 {
 entry:
-  %elt = bitcast %"struct.llvm::StringRef"* %Key to i64* ; <i64*> [#uses=1]
-  %val = load i64* %elt                           ; <i64> [#uses=3]
-  %tmp = getelementptr inbounds %"struct.llvm::StringRef"* %Key, i64 0, i32 1 ; <i64*> [#uses=1]
-  %val2 = load i64* %tmp                          ; <i64> [#uses=2]
-  %tmp2.i = getelementptr inbounds %"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %this, i64 0, i32 0 ; <%"struct.llvm::StringMapImpl"*> [#uses=1]
-  %tmp3.i = tail call i32 @_ZN4llvm13StringMapImpl15LookupBucketForENS_9StringRefE(%"struct.llvm::StringMapImpl"* %tmp2.i, i64 %val, i64 %val2) ; <i32> [#uses=1]
-  %tmp4.i = getelementptr inbounds %"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %this, i64 0, i32 0, i32 0 ; <%"struct.llvm::StringMapImpl::ItemBucket"**> [#uses=1]
-  %tmp5.i = load %"struct.llvm::StringMapImpl::ItemBucket"** %tmp4.i, align 8 ; <%"struct.llvm::StringMapImpl::ItemBucket"*> [#uses=1]
-  %tmp6.i = zext i32 %tmp3.i to i64               ; <i64> [#uses=1]
-  %tmp7.i = getelementptr inbounds %"struct.llvm::StringMapImpl::ItemBucket"* %tmp5.i, i64 %tmp6.i, i32 1 ; <%"struct.llvm::StringMapEntryBase"**> [#uses=2]
-  %tmp8.i = load %"struct.llvm::StringMapEntryBase"** %tmp7.i, align 8 ; <%"struct.llvm::StringMapEntryBase"*> [#uses=3]
-  %tmp9.i = icmp eq %"struct.llvm::StringMapEntryBase"* %tmp8.i, null ; <i1> [#uses=1]
-  %tmp13.i = icmp eq %"struct.llvm::StringMapEntryBase"* %tmp8.i, inttoptr (i64 -1 to %"struct.llvm::StringMapEntryBase"*) ; <i1> [#uses=1]
-  %or.cond.i = or i1 %tmp9.i, %tmp13.i            ; <i1> [#uses=1]
+  %elt = bitcast %"struct.llvm::StringRef"* %Key to i64*
+  %val = load i64* %elt
+  %tmp = getelementptr inbounds %"struct.llvm::StringRef"* %Key, i64 0, i32 1
+  %val2 = load i64* %tmp
+  %tmp2.i = getelementptr inbounds %"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %this, i64 0, i32 0
+  %tmp3.i = tail call i32 @_ZN4llvm13StringMapImpl15LookupBucketForENS_9StringRefE(%"struct.llvm::StringMapImpl"* %tmp2.i, i64 %val, i64 %val2)
+  %tmp4.i = getelementptr inbounds %"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %this, i64 0, i32 0, i32 0
+  %tmp5.i = load %"struct.llvm::StringMapImpl::ItemBucket"** %tmp4.i, align 8
+  %tmp6.i = zext i32 %tmp3.i to i64
+  %tmp7.i = getelementptr inbounds %"struct.llvm::StringMapImpl::ItemBucket"* %tmp5.i, i64 %tmp6.i, i32 1
+  %tmp8.i = load %"struct.llvm::StringMapEntryBase"** %tmp7.i, align 8
+  %tmp9.i = icmp eq %"struct.llvm::StringMapEntryBase"* %tmp8.i, null
+  %tmp13.i = icmp eq %"struct.llvm::StringMapEntryBase"* %tmp8.i, inttoptr (i64 -1 to %"struct.llvm::StringMapEntryBase"*)
+  %or.cond.i = or i1 %tmp9.i, %tmp13.i
   br i1 %or.cond.i, label %bb4.i, label %bb6.i
 
 bb4.i:                                            ; preds = %entry
-  %tmp41.i = inttoptr i64 %val to i8*             ; <i8*> [#uses=2]
-  %tmp4.i35.i = getelementptr inbounds i8* %tmp41.i, i64 %val2 ; <i8*> [#uses=1]
-  %tmp.i.i = ptrtoint i8* %tmp4.i35.i to i64      ; <i64> [#uses=1]
-  %tmp1.i.i = trunc i64 %tmp.i.i to i32           ; <i32> [#uses=1]
-  %tmp3.i.i = trunc i64 %val to i32               ; <i32> [#uses=1]
-  %tmp4.i.i = sub i32 %tmp1.i.i, %tmp3.i.i        ; <i32> [#uses=3]
-  %tmp5.i.i = add i32 %tmp4.i.i, 17               ; <i32> [#uses=1]
-  %tmp8.i.i = zext i32 %tmp5.i.i to i64           ; <i64> [#uses=1]
-  %tmp.i20.i.i = tail call noalias i8* @malloc(i64 %tmp8.i.i) nounwind ; <i8*> [#uses=7]
-  %tmp10.i.i = bitcast i8* %tmp.i20.i.i to %"struct.llvm::StringMapEntry<void*>"* ; <%"struct.llvm::StringMapEntry<void*>"*> [#uses=2]
-  %tmp12.i.i = icmp eq i8* %tmp.i20.i.i, null     ; <i1> [#uses=1]
+  %tmp41.i = inttoptr i64 %val to i8*
+  %tmp4.i35.i = getelementptr inbounds i8* %tmp41.i, i64 %val2
+  %tmp.i.i = ptrtoint i8* %tmp4.i35.i to i64
+  %tmp1.i.i = trunc i64 %tmp.i.i to i32
+  %tmp3.i.i = trunc i64 %val to i32
+  %tmp4.i.i = sub i32 %tmp1.i.i, %tmp3.i.i
+  %tmp5.i.i = add i32 %tmp4.i.i, 17
+  %tmp8.i.i = zext i32 %tmp5.i.i to i64
+  %tmp.i20.i.i = tail call noalias i8* @malloc(i64 %tmp8.i.i) nounwind
+  %tmp10.i.i = bitcast i8* %tmp.i20.i.i to %"struct.llvm::StringMapEntry<void*>"*
+  %tmp12.i.i = icmp eq i8* %tmp.i20.i.i, null
   br i1 %tmp12.i.i, label %_ZN4llvm14StringMapEntryIPvE6CreateINS_15MallocAllocatorES1_EEPS2_PKcS7_RT_T0_.exit.i, label %bb.i.i
 
 bb.i.i:                                           ; preds = %bb4.i
-  %tmp.i.i.i.i = bitcast i8* %tmp.i20.i.i to i32* ; <i32*> [#uses=1]
+  %tmp.i.i.i.i = bitcast i8* %tmp.i20.i.i to i32*
   store i32 %tmp4.i.i, i32* %tmp.i.i.i.i, align 4
-  %tmp1.i19.i.i = getelementptr inbounds i8* %tmp.i20.i.i, i64 8 ; <i8*> [#uses=1]
-  %0 = bitcast i8* %tmp1.i19.i.i to i8**          ; <i8**> [#uses=1]
+  %tmp1.i19.i.i = getelementptr inbounds i8* %tmp.i20.i.i, i64 8
+  %0 = bitcast i8* %tmp1.i19.i.i to i8**
   store i8* null, i8** %0, align 8
   br label %_ZN4llvm14StringMapEntryIPvE6CreateINS_15MallocAllocatorES1_EEPS2_PKcS7_RT_T0_.exit.i
 
-_ZN4llvm14StringMapEntryIPvE6CreateINS_15MallocAllocatorES1_EEPS2_PKcS7_RT_T0_.exit.i: ; preds = %bb4.i, %bb.i.i
-  %tmp.i18.i.i = getelementptr inbounds i8* %tmp.i20.i.i, i64 16 ; <i8*> [#uses=1]
-  %tmp15.i.i = zext i32 %tmp4.i.i to i64          ; <i64> [#uses=2]
-  tail call void @llvm.memcpy.i64(i8* %tmp.i18.i.i, i8* %tmp41.i, i64 %tmp15.i.i, i32 1) nounwind
-  %tmp.i18.sum.i.i = add i64 %tmp15.i.i, 16       ; <i64> [#uses=1]
-  %tmp17.i.i = getelementptr inbounds i8* %tmp.i20.i.i, i64 %tmp.i18.sum.i.i ; <i8*> [#uses=1]
+_ZN4llvm14StringMapEntryIPvE6CreateINS_15MallocAllocatorES1_EEPS2_PKcS7_RT_T0_.exit.i: ; preds = %bb.i.i, %bb4.i
+  %tmp.i18.i.i = getelementptr inbounds i8* %tmp.i20.i.i, i64 16
+  %tmp15.i.i = zext i32 %tmp4.i.i to i64
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp.i18.i.i, i8* %tmp41.i, i64 %tmp15.i.i, i32 1, i1 false)
+  %tmp.i18.sum.i.i = add i64 %tmp15.i.i, 16
+  %tmp17.i.i = getelementptr inbounds i8* %tmp.i20.i.i, i64 %tmp.i18.sum.i.i
   store i8 0, i8* %tmp17.i.i, align 1
-  %tmp.i.i.i = getelementptr inbounds i8* %tmp.i20.i.i, i64 8 ; <i8*> [#uses=1]
-  %1 = bitcast i8* %tmp.i.i.i to i8**             ; <i8**> [#uses=1]
+  %tmp.i.i.i = getelementptr inbounds i8* %tmp.i20.i.i, i64 8
+  %1 = bitcast i8* %tmp.i.i.i to i8**
   store i8* null, i8** %1, align 8
-  %tmp22.i = load %"struct.llvm::StringMapEntryBase"** %tmp7.i, align 8 ; <%"struct.llvm::StringMapEntryBase"*> [#uses=1]
-  %tmp24.i = icmp eq %"struct.llvm::StringMapEntryBase"* %tmp22.i, inttoptr (i64 -1 to %"struct.llvm::StringMapEntryBase"*) ; <i1> [#uses=1]
+  %tmp22.i = load %"struct.llvm::StringMapEntryBase"** %tmp7.i, align 8
+  %tmp24.i = icmp eq %"struct.llvm::StringMapEntryBase"* %tmp22.i, inttoptr (i64 -1 to %"struct.llvm::StringMapEntryBase"*)
   br i1 %tmp24.i, label %bb9.i, label %_ZN4llvm9StringMapIPvNS_15MallocAllocatorEE16GetOrCreateValueIS1_EERNS_14StringMapEntryIS1_EENS_9StringRefET_.exit
 
 bb6.i:                                            ; preds = %entry
-  %tmp16.i = bitcast %"struct.llvm::StringMapEntryBase"* %tmp8.i to %"struct.llvm::StringMapEntry<void*>"* ; <%"struct.llvm::StringMapEntry<void*>"*> [#uses=1]
+  %tmp16.i = bitcast %"struct.llvm::StringMapEntryBase"* %tmp8.i to %"struct.llvm::StringMapEntry<void*>"*
   ret %"struct.llvm::StringMapEntry<void*>"* %tmp16.i
 
 bb9.i:                                            ; preds = %_ZN4llvm14StringMapEntryIPvE6CreateINS_15MallocAllocatorES1_EEPS2_PKcS7_RT_T0_.exit.i
-  %tmp25.i = getelementptr inbounds %"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %this, i64 0, i32 0, i32 3 ; <i32*> [#uses=2]
-  %tmp26.i = load i32* %tmp25.i, align 8          ; <i32> [#uses=1]
-  %tmp27.i = add i32 %tmp26.i, -1                 ; <i32> [#uses=1]
+  %tmp25.i = getelementptr inbounds %"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %this, i64 0, i32 0, i32 3
+  %tmp26.i = load i32* %tmp25.i, align 8
+  %tmp27.i = add i32 %tmp26.i, -1
   store i32 %tmp27.i, i32* %tmp25.i, align 8
   ret %"struct.llvm::StringMapEntry<void*>"* %tmp10.i.i
 
 _ZN4llvm9StringMapIPvNS_15MallocAllocatorEE16GetOrCreateValueIS1_EERNS_14StringMapEntryIS1_EENS_9StringRefET_.exit: ; preds = %_ZN4llvm14StringMapEntryIPvE6CreateINS_15MallocAllocatorES1_EEPS2_PKcS7_RT_T0_.exit.i
   ret %"struct.llvm::StringMapEntry<void*>"* %tmp10.i.i
 }
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/Transforms/GVN/rle.ll b/test/Transforms/GVN/rle.ll
index 28b1fc7..2f0d2eb 100644
--- a/test/Transforms/GVN/rle.ll
+++ b/test/Transforms/GVN/rle.ll
@@ -135,7 +135,7 @@ define i8* @coerce_mustalias7(i64 %V, i64* %P) {
 define signext i16 @memset_to_i16_local(i16* %A) nounwind ssp {
 entry:
   %conv = bitcast i16* %A to i8* 
-  tail call void @llvm.memset.i64(i8* %conv, i8 1, i64 200, i32 1)
+  tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 1, i64 200, i32 1, i1 false)
   %arrayidx = getelementptr inbounds i16* %A, i64 42
   %tmp2 = load i16* %arrayidx
   ret i16 %tmp2
@@ -148,7 +148,7 @@ entry:
 define float @memset_to_float_local(float* %A, i8 %Val) nounwind ssp {
 entry:
   %conv = bitcast float* %A to i8*                ; <i8*> [#uses=1]
-  tail call void @llvm.memset.i64(i8* %conv, i8 %Val, i64 400, i32 1)
+  tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 %Val, i64 400, i32 1, i1 false)
   %arrayidx = getelementptr inbounds float* %A, i64 42 ; <float*> [#uses=1]
   %tmp2 = load float* %arrayidx                   ; <float> [#uses=1]
   ret float %tmp2
@@ -168,11 +168,11 @@ define i16 @memset_to_i16_nonlocal0(i16* %P, i1 %cond) {
   %P3 = bitcast i16* %P to i8*
   br i1 %cond, label %T, label %F
 T:
-  tail call void @llvm.memset.i64(i8* %P3, i8 1, i64 400, i32 1)
+  tail call void @llvm.memset.p0i8.i64(i8* %P3, i8 1, i64 400, i32 1, i1 false)
   br label %Cont
   
 F:
-  tail call void @llvm.memset.i64(i8* %P3, i8 2, i64 400, i32 1)
+  tail call void @llvm.memset.p0i8.i64(i8* %P3, i8 2, i64 400, i32 1, i1 false)
   br label %Cont
 
 Cont:
@@ -193,7 +193,7 @@ Cont:
 define float @memcpy_to_float_local(float* %A) nounwind ssp {
 entry:
   %conv = bitcast float* %A to i8*                ; <i8*> [#uses=1]
-  tail call void @llvm.memcpy.i64(i8* %conv, i8* bitcast ({i32, float, i32 }* @GCst to i8*), i64 12, i32 1)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %conv, i8* bitcast ({i32, float, i32 }* @GCst to i8*), i64 12, i32 1, i1 false)
   %arrayidx = getelementptr inbounds float* %A, i64 1 ; <float*> [#uses=1]
   %tmp2 = load float* %arrayidx                   ; <float> [#uses=1]
   ret float %tmp2
@@ -203,11 +203,6 @@ entry:
 }
 
 
-declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
-declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
-
-
-
 
 ;; non-local i32/float -> i8 load forwarding.
 define i8 @coerce_mustalias_nonlocal0(i32* %P, i1 %cond) {
@@ -539,7 +534,7 @@ define i32 @memset_to_load() nounwind readnone {
 entry:
   %x = alloca [256 x i32], align 4                ; <[256 x i32]*> [#uses=2]
   %tmp = bitcast [256 x i32]* %x to i8*           ; <i8*> [#uses=1]
-  call void @llvm.memset.i64(i8* %tmp, i8 0, i64 1024, i32 4)
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 1024, i32 4, i1 false)
   %arraydecay = getelementptr inbounds [256 x i32]* %x, i32 0, i32 0 ; <i32*>
   %tmp1 = load i32* %arraydecay                   ; <i32> [#uses=1]
   ret i32 %tmp1
@@ -643,3 +638,7 @@ entry:
 ; CHECK-ret i32
 }
 
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
diff --git a/test/Transforms/GlobalOpt/memcpy.ll b/test/Transforms/GlobalOpt/memcpy.ll
index 8f063a2..94e07a0 100644
--- a/test/Transforms/GlobalOpt/memcpy.ll
+++ b/test/Transforms/GlobalOpt/memcpy.ll
@@ -3,13 +3,11 @@
 
 @G1 = internal global [58 x i8] c"asdlfkajsdlfkajsd;lfkajds;lfkjasd;flkajsd;lkfja;sdlkfjasd\00"         ; <[58 x i8]*> [#uses=1]
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
 define void @foo() {
-        %Blah = alloca [58 x i8]                ; <[58 x i8]*> [#uses=1]
-        %tmp.0 = getelementptr [58 x i8]* %Blah, i32 0, i32 0           ; <i8*> [#uses=1]
-        call void @llvm.memcpy.i32( i8* %tmp.0, i8* getelementptr ([58 x i8]* @G1, i32 0, i32 0), i32 58, i32 1 )
-        ret void
+  %Blah = alloca [58 x i8]
+  %tmp.0 = getelementptr [58 x i8]* %Blah, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp.0, i8* getelementptr inbounds ([58 x i8]* @G1, i32 0, i32 0), i32 58, i32 1, i1 false)
+  ret void
 }
 
-
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/GlobalOpt/memset.ll b/test/Transforms/GlobalOpt/memset.ll
index a9b9d5e..3bb5ce9 100644
--- a/test/Transforms/GlobalOpt/memset.ll
+++ b/test/Transforms/GlobalOpt/memset.ll
@@ -1,21 +1,18 @@
 ; both globals are write only, delete them.
 
-; RUN: opt < %s -globalopt -S | \
-; RUN:   not grep internal
+; RUN: opt < %s -globalopt -S | not grep internal
 
 @G0 = internal global [58 x i8] c"asdlfkajsdlfkajsd;lfkajds;lfkjasd;flkajsd;lkfja;sdlkfjasd\00"         ; <[58 x i8]*> [#uses=1]
 @G1 = internal global [4 x i32] [ i32 1, i32 2, i32 3, i32 4 ]          ; <[4 x i32]*> [#uses=1]
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-declare void @llvm.memset.i32(i8*, i8, i32, i32)
-
 define void @foo() {
-        %Blah = alloca [58 x i8]                ; <[58 x i8]*> [#uses=1]
-        %tmp3 = bitcast [58 x i8]* %Blah to i8*         ; <i8*> [#uses=1]
-        call void @llvm.memcpy.i32( i8* bitcast ([4 x i32]* @G1 to i8*), i8* %tmp3, i32 16, i32 1 )
-        call void @llvm.memset.i32( i8* getelementptr ([58 x i8]* @G0, i32 0, i32 0), i8 17, i32 58, i32 1 )
-        ret void
+  %Blah = alloca [58 x i8]
+  %tmp3 = bitcast [58 x i8]* %Blah to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([4 x i32]* @G1 to i8*), i8* %tmp3, i32 16, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([58 x i8]* @G0, i32 0, i32 0), i8 17, i32 58, i32 1, i1 false)
+  ret void
 }
 
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/Transforms/Inline/inline-invoke-tail.ll b/test/Transforms/Inline/inline-invoke-tail.ll
index 961f678..462c29a 100644
--- a/test/Transforms/Inline/inline-invoke-tail.ll
+++ b/test/Transforms/Inline/inline-invoke-tail.ll
@@ -4,7 +4,7 @@
 define internal void @foo(i32* %p, i32* %q) {
 	%pp = bitcast i32* %p to i8*
 	%qq = bitcast i32* %q to i8*
-	tail call void @llvm.memcpy.i32(i8* %pp, i8* %qq, i32 4, i32 1)
+	tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %pp, i8* %qq, i32 4, i32 1, i1 false)
 	ret void
 }
 
@@ -24,12 +24,14 @@ invcont:
 
 lpad:
 	%eh_ptr = call i8* @llvm.eh.exception()
-	%eh_select = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32(i8* %eh_ptr, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* null)
+	%eh_select = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* null)
 	unreachable
 }
 
-declare i8* @llvm.eh.exception() nounwind
+declare i8* @llvm.eh.exception() nounwind readonly
 
-declare i32 @llvm.eh.selector.i32(i8*, i8*, ...) nounwind
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
 
 declare i32 @__gxx_personality_v0(...)
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/InstCombine/2006-11-03-Memmove64.ll b/test/Transforms/InstCombine/2006-11-03-Memmove64.ll
deleted file mode 100644
index 35bb45e..0000000
--- a/test/Transforms/InstCombine/2006-11-03-Memmove64.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: opt < %s -instcombine -S | \
-; RUN:    not grep memmove.i32
-; Instcombine was trying to turn this into a memmove.i32
-
-target datalayout = "e-p:64:64"
-target triple = "alphaev67-unknown-linux-gnu"
-@str10 = internal constant [1 x i8] zeroinitializer             ; <[1 x i8]*> [#uses=1]
-
-define void @do_join(i8* %b) {
-entry:
-        call void @llvm.memmove.i64( i8* %b, i8* getelementptr ([1 x i8]* @str10, i32 0, i64 0), i64 1, i32 1 )
-        ret void
-}
-
-declare void @llvm.memmove.i64(i8*, i8*, i64, i32)
-
diff --git a/test/Transforms/InstCombine/2007-05-04-Crash.ll b/test/Transforms/InstCombine/2007-05-04-Crash.ll
deleted file mode 100644
index 9f50d8a..0000000
--- a/test/Transforms/InstCombine/2007-05-04-Crash.ll
+++ /dev/null
@@ -1,30 +0,0 @@
-; RUN: opt < %s -instcombine -disable-output
-; PR1384
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "i686-apple-darwin8"
-	%struct.CFRuntimeBase = type { i32, [4 x i8] }
-	%struct.CGColor = type opaque
-	%struct.CGColorSpace = type { %struct.CFRuntimeBase, i8, i8, i8, i32, i32, i32, %struct.CGColor*, float*, %struct.CGMD5Signature, %struct.CGMD5Signature*, [0 x %struct.CGColorSpaceDescriptor] }
-	%struct.CGColorSpaceCalibratedRGBData = type { [3 x float], [3 x float], [3 x float], [9 x float] }
-	%struct.CGColorSpaceDescriptor = type { %struct.CGColorSpaceCalibratedRGBData }
-	%struct.CGColorSpaceLabData = type { [3 x float], [3 x float], [4 x float] }
-	%struct.CGMD5Signature = type { [16 x i8], i8 }
-
-declare fastcc %struct.CGColorSpace* @CGColorSpaceCreate(i32, i32)
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-define %struct.CGColorSpace* @CGColorSpaceCreateLab(float* %whitePoint, float* %blackPoint, float* %range) {
-entry:
-	%tmp17 = call fastcc %struct.CGColorSpace* @CGColorSpaceCreate( i32 5, i32 3 )		; <%struct.CGColorSpace*> [#uses=2]
-	%tmp28 = getelementptr %struct.CGColorSpace* %tmp17, i32 0, i32 11		; <[0 x %struct.CGColorSpaceDescriptor]*> [#uses=1]
-	%tmp29 = getelementptr [0 x %struct.CGColorSpaceDescriptor]* %tmp28, i32 0, i32 0		; <%struct.CGColorSpaceDescriptor*> [#uses=1]
-	%tmp30 = getelementptr %struct.CGColorSpaceDescriptor* %tmp29, i32 0, i32 0		; <%struct.CGColorSpaceCalibratedRGBData*> [#uses=1]
-	%tmp3031 = bitcast %struct.CGColorSpaceCalibratedRGBData* %tmp30 to %struct.CGColorSpaceLabData*		; <%struct.CGColorSpaceLabData*> [#uses=1]
-	%tmp45 = getelementptr %struct.CGColorSpaceLabData* %tmp3031, i32 0, i32 2		; <[4 x float]*> [#uses=1]
-	%tmp46 = getelementptr [4 x float]* %tmp45, i32 0, i32 0		; <float*> [#uses=1]
-	%tmp4648 = bitcast float* %tmp46 to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %tmp4648, i8* null, i32 16, i32 4 )
-	ret %struct.CGColorSpace* %tmp17
-}
diff --git a/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll b/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
index 710aff2..fe935f9 100644
--- a/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
+++ b/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
@@ -6,16 +6,15 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1
 
 define void @foo(i8* %P) {
 entry:
-	%P_addr = alloca i8*		; <i8**> [#uses=2]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	store i8* %P, i8** %P_addr
-	%tmp = load i8** %P_addr, align 4		; <i8*> [#uses=1]
-	%tmp1 = getelementptr [4 x i8]* @.str, i32 0, i32 0		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %tmp, i8* %tmp1, i32 4, i32 1 )
-	br label %return
+  %P_addr = alloca i8*
+  store i8* %P, i8** %P_addr
+  %tmp = load i8** %P_addr, align 4
+  %tmp1 = getelementptr [4 x i8]* @.str, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* %tmp1, i32 4, i32 1, i1 false)
+  br label %return
 
-return:		; preds = %entry
-	ret void
+return:                                           ; preds = %entry
+  ret void
 }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll b/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
index b29d8d2..a51c47d 100644
--- a/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
+++ b/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
@@ -4,275 +4,276 @@
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.6"
-	%"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >" = type { i32* }
-	%"struct.std::_Vector_base<int,std::allocator<int> >" = type { %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl" }
-	%"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl" = type { i32*, i32*, i32* }
-	%"struct.std::bidirectional_iterator_tag" = type <{ i8 }>
-	%"struct.std::forward_iterator_tag" = type <{ i8 }>
-	%"struct.std::input_iterator_tag" = type <{ i8 }>
-	%"struct.std::random_access_iterator_tag" = type <{ i8 }>
-	%"struct.std::vector<int,std::allocator<int> >" = type { %"struct.std::_Vector_base<int,std::allocator<int> >" }
+
+%"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >" = type { i32* }
+%"struct.std::_Vector_base<int,std::allocator<int> >" = type { %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl" }
+%"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl" = type { i32*, i32*, i32* }
+%"struct.std::bidirectional_iterator_tag" = type <{ i8 }>
+%"struct.std::forward_iterator_tag" = type <{ i8 }>
+%"struct.std::input_iterator_tag" = type <{ i8 }>
+%"struct.std::random_access_iterator_tag" = type <{ i8 }>
+%"struct.std::vector<int,std::allocator<int> >" = type { %"struct.std::_Vector_base<int,std::allocator<int> >" }
 
 define i32* @_Z3fooRSt6vectorIiSaIiEE(%"struct.std::vector<int,std::allocator<int> >"* %X) {
 entry:
-	%0 = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"		; <%"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"*> [#uses=2]
-	%__first_addr.i.i = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"		; <%"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"*> [#uses=31]
-	%__last_addr.i.i = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"		; <%"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"*> [#uses=4]
-	%unnamed_arg.i = alloca %"struct.std::bidirectional_iterator_tag", align 8		; <%"struct.std::bidirectional_iterator_tag"*> [#uses=1]
-	%1 = alloca %"struct.std::bidirectional_iterator_tag"		; <%"struct.std::bidirectional_iterator_tag"*> [#uses=1]
-	%__first_addr.i = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"		; <%"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"*> [#uses=2]
-	%2 = alloca %"struct.std::bidirectional_iterator_tag"		; <%"struct.std::bidirectional_iterator_tag"*> [#uses=2]
-	%3 = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"		; <%"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"*> [#uses=2]
-	%4 = alloca i32		; <i32*> [#uses=8]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	store i32 42, i32* %4, align 4
-	%5 = getelementptr %"struct.std::vector<int,std::allocator<int> >"* %X, i32 0, i32 0		; <%"struct.std::_Vector_base<int,std::allocator<int> >"*> [#uses=1]
-	%6 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >"* %5, i32 0, i32 0		; <%"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl"*> [#uses=1]
-	%7 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl"* %6, i32 0, i32 1		; <i32**> [#uses=1]
-	%8 = load i32** %7, align 4		; <i32*> [#uses=1]
-	%9 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %3, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %8, i32** %9, align 4
-	%10 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %3, i32 0, i32 0		; <i32**> [#uses=1]
-	%11 = load i32** %10, align 4		; <i32*> [#uses=1]
-	%tmp2.i = ptrtoint i32* %11 to i32		; <i32> [#uses=1]
-	%tmp1.i = inttoptr i32 %tmp2.i to i32*		; <i32*> [#uses=1]
-	%tmp3 = ptrtoint i32* %tmp1.i to i32		; <i32> [#uses=1]
-	%tmp2 = inttoptr i32 %tmp3 to i32*		; <i32*> [#uses=1]
-	%12 = getelementptr %"struct.std::vector<int,std::allocator<int> >"* %X, i32 0, i32 0		; <%"struct.std::_Vector_base<int,std::allocator<int> >"*> [#uses=1]
-	%13 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >"* %12, i32 0, i32 0		; <%"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl"*> [#uses=1]
-	%14 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl"* %13, i32 0, i32 0		; <i32**> [#uses=1]
-	%15 = load i32** %14, align 4		; <i32*> [#uses=1]
-	%16 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %0, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %15, i32** %16, align 4
-	%17 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %0, i32 0, i32 0		; <i32**> [#uses=1]
-	%18 = load i32** %17, align 4		; <i32*> [#uses=1]
-	%tmp2.i17 = ptrtoint i32* %18 to i32		; <i32> [#uses=1]
-	%tmp1.i18 = inttoptr i32 %tmp2.i17 to i32*		; <i32*> [#uses=1]
-	%tmp8 = ptrtoint i32* %tmp1.i18 to i32		; <i32> [#uses=1]
-	%tmp6 = inttoptr i32 %tmp8 to i32*		; <i32*> [#uses=1]
-	%19 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %tmp6, i32** %19
-	%20 = getelementptr %"struct.std::bidirectional_iterator_tag"* %1, i32 0, i32 0		; <i8*> [#uses=1]
-	%21 = load i8* %20, align 1		; <i8> [#uses=1]
-	%22 = or i8 %21, 0		; <i8> [#uses=1]
-	%23 = or i8 %22, 0		; <i8> [#uses=1]
-	%24 = or i8 %23, 0		; <i8> [#uses=0]
-	%25 = getelementptr %"struct.std::bidirectional_iterator_tag"* %2, i32 0, i32 0		; <i8*> [#uses=1]
-	store i8 0, i8* %25, align 1
-	%elt.i = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%val.i = load i32** %elt.i		; <i32*> [#uses=1]
-	%tmp.i = bitcast %"struct.std::bidirectional_iterator_tag"* %unnamed_arg.i to i8*		; <i8*> [#uses=1]
-	%tmp9.i = bitcast %"struct.std::bidirectional_iterator_tag"* %2 to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i64(i8* %tmp.i, i8* %tmp9.i, i64 1, i32 1)
-	%26 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %val.i, i32** %26
-	%27 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %tmp2, i32** %27
-	%28 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%29 = load i32** %28, align 4		; <i32*> [#uses=1]
-	%30 = ptrtoint i32* %29 to i32		; <i32> [#uses=1]
-	%31 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%32 = load i32** %31, align 4		; <i32*> [#uses=1]
-	%33 = ptrtoint i32* %32 to i32		; <i32> [#uses=1]
-	%34 = sub i32 %30, %33		; <i32> [#uses=1]
-	%35 = ashr i32 %34, 2		; <i32> [#uses=1]
-	%36 = ashr i32 %35, 2		; <i32> [#uses=1]
-	br label %bb12.i.i
+  %0 = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"
+  %__first_addr.i.i = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"
+  %__last_addr.i.i = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"
+  %unnamed_arg.i = alloca %"struct.std::bidirectional_iterator_tag", align 8
+  %1 = alloca %"struct.std::bidirectional_iterator_tag"
+  %__first_addr.i = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"
+  %2 = alloca %"struct.std::bidirectional_iterator_tag"
+  %3 = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"
+  %4 = alloca i32
+  %"alloca point" = bitcast i32 0 to i32
+  store i32 42, i32* %4, align 4
+  %5 = getelementptr %"struct.std::vector<int,std::allocator<int> >"* %X, i32 0, i32 0
+  %6 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >"* %5, i32 0, i32 0
+  %7 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl"* %6, i32 0, i32 1
+  %8 = load i32** %7, align 4
+  %9 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %3, i32 0, i32 0
+  store i32* %8, i32** %9, align 4
+  %10 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %3, i32 0, i32 0
+  %11 = load i32** %10, align 4
+  %tmp2.i = ptrtoint i32* %11 to i32
+  %tmp1.i = inttoptr i32 %tmp2.i to i32*
+  %tmp3 = ptrtoint i32* %tmp1.i to i32
+  %tmp2 = inttoptr i32 %tmp3 to i32*
+  %12 = getelementptr %"struct.std::vector<int,std::allocator<int> >"* %X, i32 0, i32 0
+  %13 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >"* %12, i32 0, i32 0
+  %14 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl"* %13, i32 0, i32 0
+  %15 = load i32** %14, align 4
+  %16 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %0, i32 0, i32 0
+  store i32* %15, i32** %16, align 4
+  %17 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %0, i32 0, i32 0
+  %18 = load i32** %17, align 4
+  %tmp2.i17 = ptrtoint i32* %18 to i32
+  %tmp1.i18 = inttoptr i32 %tmp2.i17 to i32*
+  %tmp8 = ptrtoint i32* %tmp1.i18 to i32
+  %tmp6 = inttoptr i32 %tmp8 to i32*
+  %19 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i, i32 0, i32 0
+  store i32* %tmp6, i32** %19
+  %20 = getelementptr %"struct.std::bidirectional_iterator_tag"* %1, i32 0, i32 0
+  %21 = load i8* %20, align 1
+  %22 = or i8 %21, 0
+  %23 = or i8 %22, 0
+  %24 = or i8 %23, 0
+  %25 = getelementptr %"struct.std::bidirectional_iterator_tag"* %2, i32 0, i32 0
+  store i8 0, i8* %25, align 1
+  %elt.i = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i, i32 0, i32 0
+  %val.i = load i32** %elt.i
+  %tmp.i = bitcast %"struct.std::bidirectional_iterator_tag"* %unnamed_arg.i to i8*
+  %tmp9.i = bitcast %"struct.std::bidirectional_iterator_tag"* %2 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp.i, i8* %tmp9.i, i64 1, i32 1, i1 false)
+  %26 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  store i32* %val.i, i32** %26
+  %27 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0
+  store i32* %tmp2, i32** %27
+  %28 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0
+  %29 = load i32** %28, align 4
+  %30 = ptrtoint i32* %29 to i32
+  %31 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %32 = load i32** %31, align 4
+  %33 = ptrtoint i32* %32 to i32
+  %34 = sub i32 %30, %33
+  %35 = ashr i32 %34, 2
+  %36 = ashr i32 %35, 2
+  br label %bb12.i.i
 
-bb.i.i:		; preds = %bb12.i.i
-	%37 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%38 = load i32** %37, align 4		; <i32*> [#uses=1]
-	%39 = load i32* %38, align 4		; <i32> [#uses=1]
-	%40 = load i32* %4, align 4		; <i32> [#uses=1]
-	%41 = icmp eq i32 %39, %40		; <i1> [#uses=1]
-	%42 = zext i1 %41 to i8		; <i8> [#uses=1]
-	%toBool.i.i = icmp ne i8 %42, 0		; <i1> [#uses=1]
-	br i1 %toBool.i.i, label %bb1.i.i, label %bb2.i.i
+bb.i.i:                                           ; preds = %bb12.i.i
+  %37 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %38 = load i32** %37, align 4
+  %39 = load i32* %38, align 4
+  %40 = load i32* %4, align 4
+  %41 = icmp eq i32 %39, %40
+  %42 = zext i1 %41 to i8
+  %toBool.i.i = icmp ne i8 %42, 0
+  br i1 %toBool.i.i, label %bb1.i.i, label %bb2.i.i
 
-bb1.i.i:		; preds = %bb.i.i
-	%43 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%44 = load i32** %43, align 4		; <i32*> [#uses=1]
-	br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+bb1.i.i:                                          ; preds = %bb.i.i
+  %43 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %44 = load i32** %43, align 4
+  br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
 
-bb2.i.i:		; preds = %bb.i.i
-	%45 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%46 = load i32** %45, align 4		; <i32*> [#uses=1]
-	%47 = getelementptr i32* %46, i64 1		; <i32*> [#uses=1]
-	%48 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %47, i32** %48, align 4
-	%49 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%50 = load i32** %49, align 4		; <i32*> [#uses=1]
-	%51 = load i32* %50, align 4		; <i32> [#uses=1]
-	%52 = load i32* %4, align 4		; <i32> [#uses=1]
-	%53 = icmp eq i32 %51, %52		; <i1> [#uses=1]
-	%54 = zext i1 %53 to i8		; <i8> [#uses=1]
-	%toBool3.i.i = icmp ne i8 %54, 0		; <i1> [#uses=1]
-	br i1 %toBool3.i.i, label %bb4.i.i, label %bb5.i.i
+bb2.i.i:                                          ; preds = %bb.i.i
+  %45 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %46 = load i32** %45, align 4
+  %47 = getelementptr i32* %46, i64 1
+  %48 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  store i32* %47, i32** %48, align 4
+  %49 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %50 = load i32** %49, align 4
+  %51 = load i32* %50, align 4
+  %52 = load i32* %4, align 4
+  %53 = icmp eq i32 %51, %52
+  %54 = zext i1 %53 to i8
+  %toBool3.i.i = icmp ne i8 %54, 0
+  br i1 %toBool3.i.i, label %bb4.i.i, label %bb5.i.i
 
-bb4.i.i:		; preds = %bb2.i.i
-	%55 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%56 = load i32** %55, align 4		; <i32*> [#uses=1]
-	br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+bb4.i.i:                                          ; preds = %bb2.i.i
+  %55 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %56 = load i32** %55, align 4
+  br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
 
-bb5.i.i:		; preds = %bb2.i.i
-	%57 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%58 = load i32** %57, align 4		; <i32*> [#uses=1]
-	%59 = getelementptr i32* %58, i64 1		; <i32*> [#uses=1]
-	%60 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %59, i32** %60, align 4
-	%61 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%62 = load i32** %61, align 4		; <i32*> [#uses=1]
-	%63 = load i32* %62, align 4		; <i32> [#uses=1]
-	%64 = load i32* %4, align 4		; <i32> [#uses=1]
-	%65 = icmp eq i32 %63, %64		; <i1> [#uses=1]
-	%66 = zext i1 %65 to i8		; <i8> [#uses=1]
-	%toBool6.i.i = icmp ne i8 %66, 0		; <i1> [#uses=1]
-	br i1 %toBool6.i.i, label %bb7.i.i, label %bb8.i.i
+bb5.i.i:                                          ; preds = %bb2.i.i
+  %57 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %58 = load i32** %57, align 4
+  %59 = getelementptr i32* %58, i64 1
+  %60 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  store i32* %59, i32** %60, align 4
+  %61 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %62 = load i32** %61, align 4
+  %63 = load i32* %62, align 4
+  %64 = load i32* %4, align 4
+  %65 = icmp eq i32 %63, %64
+  %66 = zext i1 %65 to i8
+  %toBool6.i.i = icmp ne i8 %66, 0
+  br i1 %toBool6.i.i, label %bb7.i.i, label %bb8.i.i
 
-bb7.i.i:		; preds = %bb5.i.i
-	%67 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%68 = load i32** %67, align 4		; <i32*> [#uses=1]
-	br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+bb7.i.i:                                          ; preds = %bb5.i.i
+  %67 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %68 = load i32** %67, align 4
+  br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
 
-bb8.i.i:		; preds = %bb5.i.i
-	%69 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%70 = load i32** %69, align 4		; <i32*> [#uses=1]
-	%71 = getelementptr i32* %70, i64 1		; <i32*> [#uses=1]
-	%72 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %71, i32** %72, align 4
-	%73 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%74 = load i32** %73, align 4		; <i32*> [#uses=1]
-	%75 = load i32* %74, align 4		; <i32> [#uses=1]
-	%76 = load i32* %4, align 4		; <i32> [#uses=1]
-	%77 = icmp eq i32 %75, %76		; <i1> [#uses=1]
-	%78 = zext i1 %77 to i8		; <i8> [#uses=1]
-	%toBool9.i.i = icmp ne i8 %78, 0		; <i1> [#uses=1]
-	br i1 %toBool9.i.i, label %bb10.i.i, label %bb11.i.i
+bb8.i.i:                                          ; preds = %bb5.i.i
+  %69 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %70 = load i32** %69, align 4
+  %71 = getelementptr i32* %70, i64 1
+  %72 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  store i32* %71, i32** %72, align 4
+  %73 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %74 = load i32** %73, align 4
+  %75 = load i32* %74, align 4
+  %76 = load i32* %4, align 4
+  %77 = icmp eq i32 %75, %76
+  %78 = zext i1 %77 to i8
+  %toBool9.i.i = icmp ne i8 %78, 0
+  br i1 %toBool9.i.i, label %bb10.i.i, label %bb11.i.i
 
-bb10.i.i:		; preds = %bb8.i.i
-	%79 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%80 = load i32** %79, align 4		; <i32*> [#uses=1]
-	br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+bb10.i.i:                                         ; preds = %bb8.i.i
+  %79 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %80 = load i32** %79, align 4
+  br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
 
-bb11.i.i:		; preds = %bb8.i.i
-	%81 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%82 = load i32** %81, align 4		; <i32*> [#uses=1]
-	%83 = getelementptr i32* %82, i64 1		; <i32*> [#uses=1]
-	%84 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %83, i32** %84, align 4
-	%85 = sub i32 %__trip_count.0.i.i, 1		; <i32> [#uses=1]
-	br label %bb12.i.i
+bb11.i.i:                                         ; preds = %bb8.i.i
+  %81 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %82 = load i32** %81, align 4
+  %83 = getelementptr i32* %82, i64 1
+  %84 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  store i32* %83, i32** %84, align 4
+  %85 = sub i32 %__trip_count.0.i.i, 1
+  br label %bb12.i.i
 
-bb12.i.i:		; preds = %bb11.i.i, %entry
-	%__trip_count.0.i.i = phi i32 [ %36, %entry ], [ %85, %bb11.i.i ]		; <i32> [#uses=2]
-	%86 = icmp sgt i32 %__trip_count.0.i.i, 0		; <i1> [#uses=1]
-	br i1 %86, label %bb.i.i, label %bb13.i.i
+bb12.i.i:                                         ; preds = %bb11.i.i, %entry
+  %__trip_count.0.i.i = phi i32 [ %36, %entry ], [ %85, %bb11.i.i ]
+  %86 = icmp sgt i32 %__trip_count.0.i.i, 0
+  br i1 %86, label %bb.i.i, label %bb13.i.i
 
-bb13.i.i:		; preds = %bb12.i.i
-	%87 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%88 = load i32** %87, align 4		; <i32*> [#uses=1]
-	%89 = ptrtoint i32* %88 to i32		; <i32> [#uses=1]
-	%90 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%91 = load i32** %90, align 4		; <i32*> [#uses=1]
-	%92 = ptrtoint i32* %91 to i32		; <i32> [#uses=1]
-	%93 = sub i32 %89, %92		; <i32> [#uses=1]
-	%94 = ashr i32 %93, 2		; <i32> [#uses=1]
-	switch i32 %94, label %bb26.i.i [
-		i32 1, label %bb22.i.i
-		i32 2, label %bb18.i.i
-		i32 3, label %bb14.i.i
-	]
+bb13.i.i:                                         ; preds = %bb12.i.i
+  %87 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0
+  %88 = load i32** %87, align 4
+  %89 = ptrtoint i32* %88 to i32
+  %90 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %91 = load i32** %90, align 4
+  %92 = ptrtoint i32* %91 to i32
+  %93 = sub i32 %89, %92
+  %94 = ashr i32 %93, 2
+  switch i32 %94, label %bb26.i.i [
+    i32 1, label %bb22.i.i
+    i32 2, label %bb18.i.i
+    i32 3, label %bb14.i.i
+  ]
 
-bb14.i.i:		; preds = %bb13.i.i
-	%95 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%96 = load i32** %95, align 4		; <i32*> [#uses=1]
-	%97 = load i32* %96, align 4		; <i32> [#uses=1]
-	%98 = load i32* %4, align 4		; <i32> [#uses=1]
-	%99 = icmp eq i32 %97, %98		; <i1> [#uses=1]
-	%100 = zext i1 %99 to i8		; <i8> [#uses=1]
-	%toBool15.i.i = icmp ne i8 %100, 0		; <i1> [#uses=1]
-	br i1 %toBool15.i.i, label %bb16.i.i, label %bb17.i.i
+bb14.i.i:                                         ; preds = %bb13.i.i
+  %95 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %96 = load i32** %95, align 4
+  %97 = load i32* %96, align 4
+  %98 = load i32* %4, align 4
+  %99 = icmp eq i32 %97, %98
+  %100 = zext i1 %99 to i8
+  %toBool15.i.i = icmp ne i8 %100, 0
+  br i1 %toBool15.i.i, label %bb16.i.i, label %bb17.i.i
 
-bb16.i.i:		; preds = %bb14.i.i
-	%101 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%102 = load i32** %101, align 4		; <i32*> [#uses=1]
-	br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+bb16.i.i:                                         ; preds = %bb14.i.i
+  %101 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %102 = load i32** %101, align 4
+  br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
 
-bb17.i.i:		; preds = %bb14.i.i
-	%103 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%104 = load i32** %103, align 4		; <i32*> [#uses=1]
-	%105 = getelementptr i32* %104, i64 1		; <i32*> [#uses=1]
-	%106 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %105, i32** %106, align 4
-	br label %bb18.i.i
+bb17.i.i:                                         ; preds = %bb14.i.i
+  %103 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %104 = load i32** %103, align 4
+  %105 = getelementptr i32* %104, i64 1
+  %106 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  store i32* %105, i32** %106, align 4
+  br label %bb18.i.i
 
-bb18.i.i:		; preds = %bb17.i.i, %bb13.i.i
-	%107 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%108 = load i32** %107, align 4		; <i32*> [#uses=1]
-	%109 = load i32* %108, align 4		; <i32> [#uses=1]
-	%110 = load i32* %4, align 4		; <i32> [#uses=1]
-	%111 = icmp eq i32 %109, %110		; <i1> [#uses=1]
-	%112 = zext i1 %111 to i8		; <i8> [#uses=1]
-	%toBool19.i.i = icmp ne i8 %112, 0		; <i1> [#uses=1]
-	br i1 %toBool19.i.i, label %bb20.i.i, label %bb21.i.i
+bb18.i.i:                                         ; preds = %bb17.i.i, %bb13.i.i
+  %107 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %108 = load i32** %107, align 4
+  %109 = load i32* %108, align 4
+  %110 = load i32* %4, align 4
+  %111 = icmp eq i32 %109, %110
+  %112 = zext i1 %111 to i8
+  %toBool19.i.i = icmp ne i8 %112, 0
+  br i1 %toBool19.i.i, label %bb20.i.i, label %bb21.i.i
 
-bb20.i.i:		; preds = %bb18.i.i
-	%113 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%114 = load i32** %113, align 4		; <i32*> [#uses=1]
-	br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+bb20.i.i:                                         ; preds = %bb18.i.i
+  %113 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %114 = load i32** %113, align 4
+  br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
 
-bb21.i.i:		; preds = %bb18.i.i
-	%115 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%116 = load i32** %115, align 4		; <i32*> [#uses=1]
-	%117 = getelementptr i32* %116, i64 1		; <i32*> [#uses=1]
-	%118 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %117, i32** %118, align 4
-	br label %bb22.i.i
+bb21.i.i:                                         ; preds = %bb18.i.i
+  %115 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %116 = load i32** %115, align 4
+  %117 = getelementptr i32* %116, i64 1
+  %118 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  store i32* %117, i32** %118, align 4
+  br label %bb22.i.i
 
-bb22.i.i:		; preds = %bb21.i.i, %bb13.i.i
-	%119 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%120 = load i32** %119, align 4		; <i32*> [#uses=1]
-	%121 = load i32* %120, align 4		; <i32> [#uses=1]
-	%122 = load i32* %4, align 4		; <i32> [#uses=1]
-	%123 = icmp eq i32 %121, %122		; <i1> [#uses=1]
-	%124 = zext i1 %123 to i8		; <i8> [#uses=1]
-	%toBool23.i.i = icmp ne i8 %124, 0		; <i1> [#uses=1]
-	br i1 %toBool23.i.i, label %bb24.i.i, label %bb25.i.i
+bb22.i.i:                                         ; preds = %bb21.i.i, %bb13.i.i
+  %119 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %120 = load i32** %119, align 4
+  %121 = load i32* %120, align 4
+  %122 = load i32* %4, align 4
+  %123 = icmp eq i32 %121, %122
+  %124 = zext i1 %123 to i8
+  %toBool23.i.i = icmp ne i8 %124, 0
+  br i1 %toBool23.i.i, label %bb24.i.i, label %bb25.i.i
 
-bb24.i.i:		; preds = %bb22.i.i
-	%125 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%126 = load i32** %125, align 4		; <i32*> [#uses=1]
-	br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+bb24.i.i:                                         ; preds = %bb22.i.i
+  %125 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %126 = load i32** %125, align 4
+  br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
 
-bb25.i.i:		; preds = %bb22.i.i
-	%127 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%128 = load i32** %127, align 4		; <i32*> [#uses=1]
-	%129 = getelementptr i32* %128, i64 1		; <i32*> [#uses=1]
-	%130 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %129, i32** %130, align 4
-	br label %bb26.i.i
+bb25.i.i:                                         ; preds = %bb22.i.i
+  %127 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %128 = load i32** %127, align 4
+  %129 = getelementptr i32* %128, i64 1
+  %130 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  store i32* %129, i32** %130, align 4
+  br label %bb26.i.i
 
-bb26.i.i:		; preds = %bb25.i.i, %bb13.i.i
-	%131 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%132 = load i32** %131, align 4		; <i32*> [#uses=1]
-	br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+bb26.i.i:                                         ; preds = %bb25.i.i, %bb13.i.i
+  %131 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0
+  %132 = load i32** %131, align 4
+  br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
 
-_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit:		; preds = %bb26.i.i, %bb24.i.i, %bb20.i.i, %bb16.i.i, %bb10.i.i, %bb7.i.i, %bb4.i.i, %bb1.i.i
-	%.0.0.i.i = phi i32* [ %132, %bb26.i.i ], [ %126, %bb24.i.i ], [ %114, %bb20.i.i ], [ %102, %bb16.i.i ], [ %80, %bb10.i.i ], [ %68, %bb7.i.i ], [ %56, %bb4.i.i ], [ %44, %bb1.i.i ]		; <i32*> [#uses=1]
-	%tmp2.i.i = ptrtoint i32* %.0.0.i.i to i32		; <i32> [#uses=1]
-	%tmp1.i.i = inttoptr i32 %tmp2.i.i to i32*		; <i32*> [#uses=1]
-	%tmp4.i = ptrtoint i32* %tmp1.i.i to i32		; <i32> [#uses=1]
-	%tmp3.i = inttoptr i32 %tmp4.i to i32*		; <i32*> [#uses=1]
-	%tmp8.i = ptrtoint i32* %tmp3.i to i32		; <i32> [#uses=1]
-	%tmp6.i = inttoptr i32 %tmp8.i to i32*		; <i32*> [#uses=1]
-	%tmp12 = ptrtoint i32* %tmp6.i to i32		; <i32> [#uses=1]
-	%tmp10 = inttoptr i32 %tmp12 to i32*		; <i32*> [#uses=1]
-	%tmp16 = ptrtoint i32* %tmp10 to i32		; <i32> [#uses=1]
-	br label %return
+_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit: ; preds = %bb26.i.i, %bb24.i.i, %bb20.i.i, %bb16.i.i, %bb10.i.i, %bb7.i.i, %bb4.i.i, %bb1.i.i
+  %.0.0.i.i = phi i32* [ %132, %bb26.i.i ], [ %126, %bb24.i.i ], [ %114, %bb20.i.i ], [ %102, %bb16.i.i ], [ %80, %bb10.i.i ], [ %68, %bb7.i.i ], [ %56, %bb4.i.i ], [ %44, %bb1.i.i ]
+  %tmp2.i.i = ptrtoint i32* %.0.0.i.i to i32
+  %tmp1.i.i = inttoptr i32 %tmp2.i.i to i32*
+  %tmp4.i = ptrtoint i32* %tmp1.i.i to i32
+  %tmp3.i = inttoptr i32 %tmp4.i to i32*
+  %tmp8.i = ptrtoint i32* %tmp3.i to i32
+  %tmp6.i = inttoptr i32 %tmp8.i to i32*
+  %tmp12 = ptrtoint i32* %tmp6.i to i32
+  %tmp10 = inttoptr i32 %tmp12 to i32*
+  %tmp16 = ptrtoint i32* %tmp10 to i32
+  br label %return
 
-return:		; preds = %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
-	%tmp14 = inttoptr i32 %tmp16 to i32*		; <i32*> [#uses=1]
-	ret i32* %tmp14
+return:                                           ; preds = %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+  %tmp14 = inttoptr i32 %tmp16 to i32*
+  ret i32* %tmp14
 }
 
-declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/Transforms/InstCombine/memcpy-to-load.ll b/test/Transforms/InstCombine/memcpy-to-load.ll
index ebb8711..04aac98 100644
--- a/test/Transforms/InstCombine/memcpy-to-load.ll
+++ b/test/Transforms/InstCombine/memcpy-to-load.ll
@@ -4,11 +4,10 @@ target triple = "i686-apple-darwin8"
 
 define void @foo(double* %X, double* %Y) {
 entry:
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	%tmp2 = bitcast double* %X to i8*		; <i8*> [#uses=1]
-	%tmp13 = bitcast double* %Y to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %tmp2, i8* %tmp13, i32 8, i32 1 )
-	ret void
+  %tmp2 = bitcast double* %X to i8*
+  %tmp13 = bitcast double* %Y to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp13, i32 8, i32 1, i1 false)
+  ret void
 }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/InstCombine/memmove.ll b/test/Transforms/InstCombine/memmove.ll
index 1806cfc..4602c12 100644
--- a/test/Transforms/InstCombine/memmove.ll
+++ b/test/Transforms/InstCombine/memmove.ll
@@ -1,24 +1,20 @@
 ; This test makes sure that memmove instructions are properly eliminated.
 ;
-; RUN: opt < %s -instcombine -S | \
-; RUN:    not grep {call void @llvm.memmove}
+; RUN: opt < %s -instcombine -S | not grep {call void @llvm.memmove}
 
 @S = internal constant [33 x i8] c"panic: restorelist inconsistency\00"		; <[33 x i8]*> [#uses=1]
 @h = constant [2 x i8] c"h\00"		; <[2 x i8]*> [#uses=1]
 @hel = constant [4 x i8] c"hel\00"		; <[4 x i8]*> [#uses=1]
 @hello_u = constant [8 x i8] c"hello_u\00"		; <[8 x i8]*> [#uses=1]
 
-
-declare void @llvm.memmove.i32(i8*, i8*, i32, i32)
-
 define void @test1(i8* %A, i8* %B, i32 %N) {
-	call void @llvm.memmove.i32( i8* %A, i8* %B, i32 0, i32 1 )
+	call void @llvm.memmove.p0i8.p0i8.i32(i8* %A, i8* %B, i32 0, i32 1, i1 false)
 	ret void
 }
 
 define void @test2(i8* %A, i32 %N) {
         ;; dest can't alias source since we can't write to source!
-	call void @llvm.memmove.i32( i8* %A, i8* getelementptr ([33 x i8]* @S, i32 0, i32 0), i32 %N, i32 1 )
+	call void @llvm.memmove.p0i8.p0i8.i32(i8* %A, i8* getelementptr inbounds ([33 x i8]* @S, i32 0, i32 0), i32 %N, i32 1, i1 false)
 	ret void
 }
 
@@ -28,15 +24,16 @@ define i32 @test3() {
 	%hello_u_p = getelementptr [8 x i8]* @hello_u, i32 0, i32 0		; <i8*> [#uses=1]
 	%target = alloca [1024 x i8]		; <[1024 x i8]*> [#uses=1]
 	%target_p = getelementptr [1024 x i8]* %target, i32 0, i32 0		; <i8*> [#uses=3]
-	call void @llvm.memmove.i32( i8* %target_p, i8* %h_p, i32 2, i32 2 )
-	call void @llvm.memmove.i32( i8* %target_p, i8* %hel_p, i32 4, i32 4 )
-	call void @llvm.memmove.i32( i8* %target_p, i8* %hello_u_p, i32 8, i32 8 )
+        call void @llvm.memmove.p0i8.p0i8.i32(i8* %target_p, i8* %h_p, i32 2, i32 2, i1 false)
+        call void @llvm.memmove.p0i8.p0i8.i32(i8* %target_p, i8* %hel_p, i32 4, i32 4, i1 false)
+        call void @llvm.memmove.p0i8.p0i8.i32(i8* %target_p, i8* %hello_u_p, i32 8, i32 8, i1 false)
 	ret i32 0
 }
 
 ; PR2370
 define void @test4(i8* %a) {
-        tail call void @llvm.memmove.i32( i8* %a, i8* %a, i32 100, i32 1 )
-        ret void
+  tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %a, i8* %a, i32 100, i32 1, i1 false)
+  ret void
 }
 
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/InstCombine/memset.ll b/test/Transforms/InstCombine/memset.ll
index 8e85694..7f7bc9f 100644
--- a/test/Transforms/InstCombine/memset.ll
+++ b/test/Transforms/InstCombine/memset.ll
@@ -1,15 +1,14 @@
 ; RUN: opt < %s -instcombine -S | not grep {call.*llvm.memset}
 
-declare void @llvm.memset.i32(i8*, i8, i32, i32)
-
 define i32 @main() {
-	%target = alloca [1024 x i8]		; <[1024 x i8]*> [#uses=1]
-	%target_p = getelementptr [1024 x i8]* %target, i32 0, i32 0		; <i8*> [#uses=5]
-	call void @llvm.memset.i32( i8* %target_p, i8 1, i32 0, i32 1 )
-	call void @llvm.memset.i32( i8* %target_p, i8 1, i32 1, i32 1 )
-	call void @llvm.memset.i32( i8* %target_p, i8 1, i32 2, i32 2 )
-	call void @llvm.memset.i32( i8* %target_p, i8 1, i32 4, i32 4 )
-	call void @llvm.memset.i32( i8* %target_p, i8 1, i32 8, i32 8 )
-	ret i32 0
+  %target = alloca [1024 x i8]
+  %target_p = getelementptr [1024 x i8]* %target, i32 0, i32 0
+  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 0, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 1, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 2, i32 2, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 4, i32 4, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 8, i32 8, i1 false)
+  ret i32 0
 }
 
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/Transforms/InstCombine/stack-overalign.ll b/test/Transforms/InstCombine/stack-overalign.ll
index 88b4114..2fc8414 100644
--- a/test/Transforms/InstCombine/stack-overalign.ll
+++ b/test/Transforms/InstCombine/stack-overalign.ll
@@ -17,13 +17,13 @@
 
 define void @foo() nounwind {
 entry:
-	%src = alloca [1024 x i8], align 1
-	%src1 = getelementptr [1024 x i8]* %src, i32 0, i32 0
-	call void @llvm.memcpy.i32(i8* getelementptr ([1024 x i8]* @dst, i32 0, i32 0), i8* %src1, i32 1024, i32 1)
-	call void @frob(i8* %src1) nounwind
-	ret void
+  %src = alloca [1024 x i8], align 1
+  %src1 = getelementptr [1024 x i8]* %src, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([1024 x i8]* @dst, i32 0, i32 0), i8* %src1, i32 1024, i32 1, i1 false)
+  call void @frob(i8* %src1) nounwind
+  ret void
 }
 
-declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
-
 declare void @frob(i8*)
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll b/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
index 9f1e280..b95ad91 100644
--- a/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
+++ b/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
@@ -4,31 +4,33 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i386-pc-linux-gnu"
 
-define internal fastcc void @initialize({ x86_fp80, x86_fp80 }* noalias sret  %agg.result) nounwind  {
+%0 = type { x86_fp80, x86_fp80 }
+
+define internal fastcc void @initialize(%0* noalias sret %agg.result) nounwind {
 entry:
-	%agg.result.03 = getelementptr { x86_fp80, x86_fp80 }* %agg.result, i32 0, i32 0		; <x86_fp80*> [#uses=1]
-	store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.03
-	%agg.result.15 = getelementptr { x86_fp80, x86_fp80 }* %agg.result, i32 0, i32 1		; <x86_fp80*> [#uses=1]
-	store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.15
-	ret void
+  %agg.result.03 = getelementptr %0* %agg.result, i32 0, i32 0
+  store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.03
+  %agg.result.15 = getelementptr %0* %agg.result, i32 0, i32 1
+  store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.15
+  ret void
 }
 
-declare fastcc x86_fp80 @passed_uninitialized({ x86_fp80, x86_fp80 }* %x) nounwind
+declare fastcc x86_fp80 @passed_uninitialized(%0*) nounwind
 
-define fastcc void @badly_optimized() nounwind  {
+define fastcc void @badly_optimized() nounwind {
 entry:
-	%z = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
-	%tmp = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
-	%memtmp = alloca { x86_fp80, x86_fp80 }, align 8		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
-	call fastcc void @initialize( { x86_fp80, x86_fp80 }* noalias sret  %memtmp )
-	%tmp1 = bitcast { x86_fp80, x86_fp80 }* %tmp to i8*		; <i8*> [#uses=1]
-	%memtmp2 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %tmp1, i8* %memtmp2, i32 24, i32 8 )
-	%z3 = bitcast { x86_fp80, x86_fp80 }* %z to i8*		; <i8*> [#uses=1]
-	%tmp4 = bitcast { x86_fp80, x86_fp80 }* %tmp to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %z3, i8* %tmp4, i32 24, i32 8 )
-	%tmp5 = call fastcc x86_fp80 @passed_uninitialized( { x86_fp80, x86_fp80 }* %z )		; <x86_fp80> [#uses=0]
-	ret void
+  %z = alloca %0
+  %tmp = alloca %0
+  %memtmp = alloca %0, align 8
+  call fastcc void @initialize(%0* noalias sret %memtmp)
+  %tmp1 = bitcast %0* %tmp to i8*
+  %memtmp2 = bitcast %0* %memtmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp1, i8* %memtmp2, i32 24, i32 8, i1 false)
+  %z3 = bitcast %0* %z to i8*
+  %tmp4 = bitcast %0* %tmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %z3, i8* %tmp4, i32 24, i32 8, i1 false)
+  %tmp5 = call fastcc x86_fp80 @passed_uninitialized(%0* %z)
+  ret void
 }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll b/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
index 418761e..24cf576 100644
--- a/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
+++ b/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
@@ -1,20 +1,22 @@
 ; RUN: opt < %s -basicaa -memcpyopt -S | not grep {call.*memcpy.}
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-	%a = type { i32 }
-	%b = type { float }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
+%a = type { i32 }
+%b = type { float }
+
 declare void @g(%a*)
 
 define float @f() {
 entry:
-	%a_var = alloca %a
-	%b_var = alloca %b
-	call void @g(%a *%a_var)
-	%a_i8 = bitcast %a* %a_var to i8*
-	%b_i8 = bitcast %b* %b_var to i8*
-	call void @llvm.memcpy.i32(i8* %b_i8, i8* %a_i8, i32 4, i32 4)
-	%tmp1 = getelementptr %b* %b_var, i32 0, i32 0
-	%tmp2 = load float* %tmp1
-	ret float %tmp2
+  %a_var = alloca %a
+  %b_var = alloca %b
+  call void @g(%a* %a_var)
+  %a_i8 = bitcast %a* %a_var to i8*
+  %b_i8 = bitcast %b* %b_var to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b_i8, i8* %a_i8, i32 4, i32 4, i1 false)
+  %tmp1 = getelementptr %b* %b_var, i32 0, i32 0
+  %tmp2 = load float* %tmp1
+  ret float %tmp2
 }
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/MemCpyOpt/memcpy.ll b/test/Transforms/MemCpyOpt/memcpy.ll
index fb97913..12519ef 100644
--- a/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/test/Transforms/MemCpyOpt/memcpy.ll
@@ -3,17 +3,21 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin9"
 
+%0 = type { x86_fp80, x86_fp80 }
+%1 = type { i32, i32 }
+
 define void @test1({ x86_fp80, x86_fp80 }* sret  %agg.result, x86_fp80 %z.0, x86_fp80 %z.1) nounwind  {
 entry:
-	%tmp2 = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=1]
-	%memtmp = alloca { x86_fp80, x86_fp80 }, align 16		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
-	%tmp5 = fsub x86_fp80 0xK80000000000000000000, %z.1		; <x86_fp80> [#uses=1]
-	call void @ccoshl( { x86_fp80, x86_fp80 }* sret  %memtmp, x86_fp80 %tmp5, x86_fp80 %z.0 ) nounwind 
-	%tmp219 = bitcast { x86_fp80, x86_fp80 }* %tmp2 to i8*		; <i8*> [#uses=2]
-	%memtmp20 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %tmp219, i8* %memtmp20, i32 32, i32 16 )
-	%agg.result21 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %agg.result21, i8* %tmp219, i32 32, i32 16 )
+  %tmp2 = alloca %0
+  %memtmp = alloca %0, align 16
+  %tmp5 = fsub x86_fp80 0xK80000000000000000000, %z.1
+  call void @ccoshl(%0* sret %memtmp, x86_fp80 %tmp5, x86_fp80 %z.0) nounwind
+  %tmp219 = bitcast %0* %tmp2 to i8*
+  %memtmp20 = bitcast %0* %memtmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp219, i8* %memtmp20, i32 32, i32 16, i1 false)
+  %agg.result21 = bitcast %0* %agg.result to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %agg.result21, i8* %tmp219, i32 32, i32 16, i1 false)
+  ret void
 
 ; Check that one of the memcpy's are removed.
 ;; FIXME: PR 8643 We should be able to eliminate the last memcpy here.
@@ -23,22 +27,19 @@ entry:
 ; CHECK: call void @llvm.memcpy
 ; CHECK-NOT: llvm.memcpy
 ; CHECK: ret void
-	ret void
 }
 
 declare void @ccoshl({ x86_fp80, x86_fp80 }* sret , x86_fp80, x86_fp80) nounwind 
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
-
 
 ; The intermediate alloca and one of the memcpy's should be eliminated, the
 ; other should be related with a memmove.
 define void @test2(i8* %P, i8* %Q) nounwind  {
-	%memtmp = alloca { x86_fp80, x86_fp80 }, align 16
-	%R = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*
-	call void @llvm.memcpy.i32( i8* %R, i8* %P, i32 32, i32 16 )
-	call void @llvm.memcpy.i32( i8* %Q, i8* %R, i32 32, i32 16 )
-        ret void
+  %memtmp = alloca %0, align 16
+  %R = bitcast %0* %memtmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %R, i8* %P, i32 32, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %Q, i8* %R, i32 32, i32 16, i1 false)
+  ret void
         
 ; CHECK: @test2
 ; CHECK-NEXT: call void @llvm.memmove{{.*}}(i8* %Q, i8* %P
@@ -51,12 +52,12 @@ define void @test2(i8* %P, i8* %Q) nounwind  {
 @x = external global { x86_fp80, x86_fp80 }
 
 define void @test3({ x86_fp80, x86_fp80 }* noalias sret %agg.result) nounwind  {
-	%x.0 = alloca { x86_fp80, x86_fp80 }
-	%x.01 = bitcast { x86_fp80, x86_fp80 }* %x.0 to i8*
-	call void @llvm.memcpy.i32( i8* %x.01, i8* bitcast ({ x86_fp80, x86_fp80 }* @x to i8*), i32 32, i32 16 )
-	%agg.result2 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8*
-	call void @llvm.memcpy.i32( i8* %agg.result2, i8* %x.01, i32 32, i32 16 )
-	ret void
+  %x.0 = alloca %0
+  %x.01 = bitcast %0* %x.0 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x.01, i8* bitcast (%0* @x to i8*), i32 32, i32 16, i1 false)
+  %agg.result2 = bitcast %0* %agg.result to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %agg.result2, i8* %x.01, i32 32, i32 16, i1 false)
+  ret void
 ; CHECK: @test3
 ; CHECK-NEXT: %agg.result2 = bitcast 
 ; CHECK-NEXT: call void @llvm.memcpy
@@ -66,10 +67,10 @@ define void @test3({ x86_fp80, x86_fp80 }* noalias sret %agg.result) nounwind  {
 
 ; PR8644
 define void @test4(i8 *%P) {
-  %A = alloca {i32, i32}
-  %a = bitcast {i32, i32}* %A to i8*
+  %A = alloca %1
+  %a = bitcast %1* %A to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %P, i64 8, i32 4, i1 false)
-  call void @test4a(i8* byval align 1 %a) 
+  call void @test4a(i8* byval align 1 %a)
   ret void
 ; CHECK: @test4
 ; CHECK-NEXT: call void @test4a(
@@ -127,4 +128,5 @@ entry:
 
 declare i32 @g(%struct.p* byval align 8)
 
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
diff --git a/test/Transforms/MemCpyOpt/memmove.ll b/test/Transforms/MemCpyOpt/memmove.ll
index 8d3fbd2..7f1667a 100644
--- a/test/Transforms/MemCpyOpt/memmove.ll
+++ b/test/Transforms/MemCpyOpt/memmove.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin9.0"
 
-declare void @llvm.memmove.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 define i8* @test1(i8* nocapture %src) nounwind {
 entry:
@@ -13,8 +13,8 @@ entry:
 
   %malloccall = tail call i8* @malloc(i32 trunc (i64 mul nuw (i64 ptrtoint (i8* getelementptr (i8* null, i32 1) to i64), i64 13) to i32))
   %call3 = bitcast i8* %malloccall to [13 x i8]*
-  %call3.sub = getelementptr inbounds [13 x i8]* %call3, i64 0, i64 0 ; <i8*> [#uses=2]
-  tail call void @llvm.memmove.i64(i8* %call3.sub, i8* %src, i64 13, i32 1)
+  %call3.sub = getelementptr inbounds [13 x i8]* %call3, i64 0, i64 0
+  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %call3.sub, i8* %src, i64 13, i32 1, i1 false)
   ret i8* %call3.sub
 }
 declare noalias i8* @malloc(i32)
@@ -24,8 +24,8 @@ define void @test2(i8* %P) nounwind {
 entry:
 ; CHECK: @test2
 ; CHECK: call void @llvm.memcpy
-  %add.ptr = getelementptr i8* %P, i64 16         ; <i8*> [#uses=1]
-  tail call void @llvm.memmove.i64(i8* %P, i8* %add.ptr, i64 16, i32 1)
+  %add.ptr = getelementptr i8* %P, i64 16
+  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 16, i32 1, i1 false)
   ret void
 }
 
@@ -34,7 +34,7 @@ define void @test3(i8* %P) nounwind {
 entry:
 ; CHECK: @test3
 ; CHECK: call void @llvm.memmove
-  %add.ptr = getelementptr i8* %P, i64 16         ; <i8*> [#uses=1]
-  tail call void @llvm.memmove.i64(i8* %P, i8* %add.ptr, i64 17, i32 1)
+  %add.ptr = getelementptr i8* %P, i64 16
+  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 17, i32 1, i1 false)
   ret void
 }
diff --git a/test/Transforms/MemCpyOpt/sret.ll b/test/Transforms/MemCpyOpt/sret.ll
index ddfd0fd..8eac7da 100644
--- a/test/Transforms/MemCpyOpt/sret.ll
+++ b/test/Transforms/MemCpyOpt/sret.ll
@@ -3,26 +3,28 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin9"
 
-define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret  %agg.result, { x86_fp80, x86_fp80 }* byval  align 8 %z) nounwind  {
+%0 = type { x86_fp80, x86_fp80 }
+
+define void @ccosl(%0* noalias sret %agg.result, %0* byval align 8 %z) nounwind {
 entry:
-	%iz = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=3]
-	%memtmp = alloca { x86_fp80, x86_fp80 }, align 16		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
-	%tmp1 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 1		; <x86_fp80*> [#uses=1]
-	%tmp2 = load x86_fp80* %tmp1, align 16		; <x86_fp80> [#uses=1]
-	%tmp3 = fsub x86_fp80 0xK80000000000000000000, %tmp2		; <x86_fp80> [#uses=1]
-	%tmp4 = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 1		; <x86_fp80*> [#uses=1]
-	%real = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 0		; <x86_fp80*> [#uses=1]
-	%tmp7 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 0		; <x86_fp80*> [#uses=1]
-	%tmp8 = load x86_fp80* %tmp7, align 16		; <x86_fp80> [#uses=1]
-	store x86_fp80 %tmp3, x86_fp80* %real, align 16
-	store x86_fp80 %tmp8, x86_fp80* %tmp4, align 16
-	call void @ccoshl( { x86_fp80, x86_fp80 }* noalias sret  %memtmp, { x86_fp80, x86_fp80 }* byval align 8 %iz ) nounwind 
-	%memtmp14 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*		; <i8*> [#uses=1]
-	%agg.result15 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %agg.result15, i8* %memtmp14, i32 32, i32 16 )
-	ret void
+  %iz = alloca %0
+  %memtmp = alloca %0, align 16
+  %tmp1 = getelementptr %0* %z, i32 0, i32 1
+  %tmp2 = load x86_fp80* %tmp1, align 16
+  %tmp3 = fsub x86_fp80 0xK80000000000000000000, %tmp2
+  %tmp4 = getelementptr %0* %iz, i32 0, i32 1
+  %real = getelementptr %0* %iz, i32 0, i32 0
+  %tmp7 = getelementptr %0* %z, i32 0, i32 0
+  %tmp8 = load x86_fp80* %tmp7, align 16
+  store x86_fp80 %tmp3, x86_fp80* %real, align 16
+  store x86_fp80 %tmp8, x86_fp80* %tmp4, align 16
+  call void @ccoshl(%0* noalias sret %memtmp, %0* byval align 8 %iz) nounwind
+  %memtmp14 = bitcast %0* %memtmp to i8*
+  %agg.result15 = bitcast %0* %agg.result to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %agg.result15, i8* %memtmp14, i32 32, i32 16, i1 false)
+  ret void
 }
 
-declare void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret , { x86_fp80, x86_fp80 }* byval ) nounwind 
+declare void @ccoshl(%0* noalias sret, %0* byval) nounwind
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/2007-05-24-LargeAggregate.ll b/test/Transforms/ScalarRepl/2007-05-24-LargeAggregate.ll
deleted file mode 100644
index e67b610..0000000
--- a/test/Transforms/ScalarRepl/2007-05-24-LargeAggregate.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | grep {alloca.*client_t}
-; PR1446
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "i686-pc-linux-gnu"
-
-	%struct.clientSnapshot_t = type { i32, [32 x i8], %struct.playerState_t, i32, i32, i32, i32, i32 }
-	%struct.client_t = type { i32, [1024 x i8], [64 x [1024 x i8]], i32, i32, i32, i32, i32, i32, %struct.usercmd_t, i32, i32, [1024 x i8], %struct.sharedEntity_t*, [32 x i8], [64 x i8], i32, i32, i32, i32, i32, i32, [8 x i8*], [8 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, [32 x %struct.clientSnapshot_t], i32, i32, i32, i32, i32, %struct.netchan_t, %struct.netchan_buffer_t*, %struct.netchan_buffer_t**, i32, [1025 x i32] }
-	%struct.entityShared_t = type { %struct.entityState_t, i32, i32, i32, i32, i32, [3 x float], [3 x float], i32, [3 x float], [3 x float], [3 x float], [3 x float], i32 }
-	%struct.entityState_t = type { i32, i32, i32, %struct.trajectory_t, %struct.trajectory_t, i32, i32, [3 x float], [3 x float], [3 x float], [3 x float], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
-	%struct.msg_t = type { i32, i32, i32, i8*, i32, i32, i32, i32 }
-	%struct.netadr_t = type { i32, [4 x i8], [10 x i8], i16 }
-	%struct.netchan_buffer_t = type { %struct.msg_t, [16384 x i8], %struct.netchan_buffer_t* }
-	%struct.netchan_t = type { i32, i32, %struct.netadr_t, i32, i32, i32, i32, i32, [16384 x i8], i32, i32, i32, [16384 x i8] }
-	%struct.playerState_t = type { i32, i32, i32, i32, i32, [3 x float], [3 x float], i32, i32, i32, [3 x i32], i32, i32, i32, i32, i32, i32, [3 x float], i32, i32, [2 x i32], [2 x i32], i32, i32, i32, i32, i32, i32, [3 x float], i32, i32, i32, i32, i32, [16 x i32], [16 x i32], [16 x i32], [16 x i32], i32, i32, i32, i32, i32, i32, i32 }
-	%struct.sharedEntity_t = type { %struct.entityState_t, %struct.entityShared_t }
-	%struct.trajectory_t = type { i32, i32, i32, [3 x float], [3 x float] }
-	%struct.usercmd_t = type { i32, [3 x i32], i32, i8, i8, i8, i8 }
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-define void @SV_DirectConnect(i64 %from.0.0, i64 %from.0.1, i32 %from.1) {
-entry:
-	%temp = alloca %struct.client_t, align 16		; <%struct.client_t*> [#uses=1]
-	%temp586 = bitcast %struct.client_t* %temp to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* null, i8* %temp586, i32 121596, i32 0 )
-	unreachable
-}
diff --git a/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll b/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll
index f1b8b80..cf96c4c 100644
--- a/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll
+++ b/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll
@@ -3,21 +3,22 @@
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
-	%struct.LongestMember = type { i8, i32 }
-	%struct.MyString = type { i32 }
-	%struct.UnionType = type { %struct.LongestMember }
+
+%struct.LongestMember = type { i8, i32 }
+%struct.MyString = type { i32 }
+%struct.UnionType = type { %struct.LongestMember }
 
 define void @_Z4testP9UnionTypePS0_(%struct.UnionType* %p, %struct.UnionType** %pointerToUnion) {
 entry:
-	%tmp = alloca %struct.UnionType, align 8		; <%struct.UnionType*> [#uses=2]
-	%tmp2 = getelementptr %struct.UnionType* %tmp, i32 0, i32 0, i32 0		; <i8*> [#uses=1]
-	%tmp13 = getelementptr %struct.UnionType* %p, i32 0, i32 0, i32 0		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %tmp2, i8* %tmp13, i32 8, i32 0 )
-	%tmp5 = load %struct.UnionType** %pointerToUnion		; <%struct.UnionType*> [#uses=1]
-	%tmp56 = getelementptr %struct.UnionType* %tmp5, i32 0, i32 0, i32 0		; <i8*> [#uses=1]
-	%tmp7 = getelementptr %struct.UnionType* %tmp, i32 0, i32 0, i32 0		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %tmp56, i8* %tmp7, i32 8, i32 0 )
-	ret void
+  %tmp = alloca %struct.UnionType, align 8
+  %tmp2 = getelementptr %struct.UnionType* %tmp, i32 0, i32 0, i32 0
+  %tmp13 = getelementptr %struct.UnionType* %p, i32 0, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp13, i32 8, i32 0, i1 false)
+  %tmp5 = load %struct.UnionType** %pointerToUnion
+  %tmp56 = getelementptr %struct.UnionType* %tmp5, i32 0, i32 0, i32 0
+  %tmp7 = getelementptr %struct.UnionType* %tmp, i32 0, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp56, i8* %tmp7, i32 8, i32 0, i1 false)
+  ret void
 }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll b/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
index b704727..71ba601 100644
--- a/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
+++ b/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
@@ -4,14 +4,14 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
 
-define void @memtest1(i8* %dst, i8* %src) nounwind  {
+define void @memtest1(i8* %dst, i8* %src) nounwind {
 entry:
-	%temp = alloca [200 x i8]		; <[100 x i8]*> [#uses=2]
-	%temp1 = bitcast [200 x i8]* %temp to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %temp1, i8* %src, i32 200, i32 1 )
-	%temp3 = bitcast [200 x i8]* %temp to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %dst, i8* %temp3, i32 200, i32 1 )
-	ret void
+  %temp = alloca [200 x i8]
+  %temp1 = bitcast [200 x i8]* %temp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %temp1, i8* %src, i32 200, i32 1, i1 false)
+  %temp3 = bitcast [200 x i8]* %temp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %temp3, i32 200, i32 1, i1 false)
+  ret void
 }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll b/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll
index 1df01c1..7cccb19 100644
--- a/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll
+++ b/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll
@@ -2,21 +2,22 @@
 ; PR2423
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
-	%struct.x = type { [1 x i32], i32, i32 }
+
+%struct.x = type { [1 x i32], i32, i32 }
 
 define i32 @b() nounwind {
 entry:
-	%s = alloca %struct.x		; <%struct.x*> [#uses=2]
-	%r = alloca %struct.x		; <%struct.x*> [#uses=2]
-	call i32 @a( %struct.x* %s ) nounwind		; <i32>:0 [#uses=0]
-	%r1 = bitcast %struct.x* %r to i8*		; <i8*> [#uses=1]
-	%s2 = bitcast %struct.x* %s to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %r1, i8* %s2, i32 12, i32 8 )
-	getelementptr %struct.x* %r, i32 0, i32 0, i32 1		; <i32*>:1 [#uses=1]
-	load i32* %1, align 4		; <i32>:2 [#uses=1]
-	ret i32 %2
+  %s = alloca %struct.x
+  %r = alloca %struct.x
+  %0 = call i32 @a(%struct.x* %s) nounwind
+  %r1 = bitcast %struct.x* %r to i8*
+  %s2 = bitcast %struct.x* %s to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %r1, i8* %s2, i32 12, i32 8, i1 false)
+  %1 = getelementptr %struct.x* %r, i32 0, i32 0, i32 1
+  %2 = load i32* %1, align 4
+  ret i32 %2
 }
 
 declare i32 @a(%struct.x*)
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll b/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll
index e32e683..e7a58f1 100644
--- a/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll
+++ b/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll
@@ -18,8 +18,8 @@ entry:
         ; because the type of the first element in %struct.two is i8.
 	%tmpS = getelementptr %struct.two* %S, i32 0, i32 0, i32 0 
 	%tmpD = bitcast %struct.two* %D to i8*
-        call void @llvm.memmove.i32(i8* %tmpD, i8* %tmpS, i32 4, i32 1)
+        call void @llvm.memmove.p0i8.p0i8.i32(i8* %tmpD, i8* %tmpS, i32 4, i32 1, i1 false)
         ret void
 }
 
-declare void @llvm.memmove.i32(i8*, i8*, i32, i32) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+\ No newline at end of file
diff --git a/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll b/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll
index 526457b..3218d59 100644
--- a/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll
+++ b/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll
@@ -12,9 +12,8 @@ entry:
         %0 = getelementptr %struct.st* %s, i32 0, i32 0  ; <i16*> [#uses=1]
         store i16 1, i16* %0, align 4
         %s1 = bitcast %struct.st* %s to i8*  ; <i8*> [#uses=1]
-        call void @llvm.memcpy.i32(i8* %p, i8* %s1, i32 2, i32 1)
+        call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %s1, i32 2, i32 1, i1 false)
         ret void
 }
 
-declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
-
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll b/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll
index 31d9bae..1993e4f 100644
--- a/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll
+++ b/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll
@@ -13,54 +13,54 @@ define void @test(<8 x i16> %tmp.0, %struct.int16x8x2_t* %dst) nounwind {
 ; CHECK: @test
 ; CHECK-NOT: alloca
 ; CHECK: "alloca point"
+; CHECK: store <8 x i16>
+; CHECK: store <8 x i16>
+
 entry:
-  %tmp_addr = alloca %struct.int16x8_t            ; <%struct.int16x8_t*> [#uses=3]
-  %dst_addr = alloca %struct.int16x8x2_t*         ; <%struct.int16x8x2_t**> [#uses=2]
-  %__rv = alloca %union..0anon                    ; <%union..0anon*> [#uses=2]
-  %__bx = alloca %struct.int16x8_t                ; <%struct.int16x8_t*> [#uses=2]
-  %__ax = alloca %struct.int16x8_t                ; <%struct.int16x8_t*> [#uses=2]
-  %tmp2 = alloca %struct.int16x8x2_t              ; <%struct.int16x8x2_t*> [#uses=2]
-  %0 = alloca %struct.int16x8x2_t                 ; <%struct.int16x8x2_t*> [#uses=2]
-  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  %1 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
+  %tmp_addr = alloca %struct.int16x8_t
+  %dst_addr = alloca %struct.int16x8x2_t*
+  %__rv = alloca %union..0anon
+  %__bx = alloca %struct.int16x8_t
+  %__ax = alloca %struct.int16x8_t
+  %tmp2 = alloca %struct.int16x8x2_t
+  %0 = alloca %struct.int16x8x2_t
+  %"alloca point" = bitcast i32 0 to i32
+  %1 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0
   store <8 x i16> %tmp.0, <8 x i16>* %1
   store %struct.int16x8x2_t* %dst, %struct.int16x8x2_t** %dst_addr
-  %2 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
-  %3 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
-  %4 = load <8 x i16>* %3, align 16               ; <<8 x i16>> [#uses=1]
+  %2 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0
+  %3 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0
+  %4 = load <8 x i16>* %3, align 16
   store <8 x i16> %4, <8 x i16>* %2, align 16
-  %5 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
-  %6 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
-  %7 = load <8 x i16>* %6, align 16               ; <<8 x i16>> [#uses=1]
+  %5 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0
+  %6 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0
+  %7 = load <8 x i16>* %6, align 16
   store <8 x i16> %7, <8 x i16>* %5, align 16
-  %8 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
-  %9 = load <8 x i16>* %8, align 16               ; <<8 x i16>> [#uses=2]
-  %10 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
-  %11 = load <8 x i16>* %10, align 16             ; <<8 x i16>> [#uses=2]
-  %12 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0 ; <%struct.int16x8x2_t*> [#uses=1]
-  %13 = bitcast %struct.int16x8x2_t* %12 to %struct.__neon_int16x8x2_t* ; <%struct.__neon_int16x8x2_t*> [#uses=2]
-  %14 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> ; <<8 x i16>> [#uses=1]
-  %15 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
+  %8 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0
+  %9 = load <8 x i16>* %8, align 16
+  %10 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0
+  %11 = load <8 x i16>* %10, align 16
+  %12 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0
+  %13 = bitcast %struct.int16x8x2_t* %12 to %struct.__neon_int16x8x2_t*
+  %14 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  %15 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 0
   store <8 x i16> %14, <8 x i16>* %15
-  %16 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> ; <<8 x i16>> [#uses=1]
-  %17 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 1 ; <<8 x i16>*> [#uses=1]
+  %16 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  %17 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 1
   store <8 x i16> %16, <8 x i16>* %17
-  %18 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0 ; <%struct.int16x8x2_t*> [#uses=1]
-  %19 = bitcast %struct.int16x8x2_t* %0 to i8*    ; <i8*> [#uses=1]
-  %20 = bitcast %struct.int16x8x2_t* %18 to i8*   ; <i8*> [#uses=1]
-  call void @llvm.memcpy.i32(i8* %19, i8* %20, i32 32, i32 16)
-  %tmp21 = bitcast %struct.int16x8x2_t* %tmp2 to i8* ; <i8*> [#uses=1]
-  %21 = bitcast %struct.int16x8x2_t* %0 to i8*    ; <i8*> [#uses=1]
-  call void @llvm.memcpy.i32(i8* %tmp21, i8* %21, i32 32, i32 16)
-  %22 = load %struct.int16x8x2_t** %dst_addr, align 4 ; <%struct.int16x8x2_t*> [#uses=1]
-  %23 = bitcast %struct.int16x8x2_t* %22 to i8*   ; <i8*> [#uses=1]
-  %tmp22 = bitcast %struct.int16x8x2_t* %tmp2 to i8* ; <i8*> [#uses=1]
-  call void @llvm.memcpy.i32(i8* %23, i8* %tmp22, i32 32, i32 16)
+  %18 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0
+  %19 = bitcast %struct.int16x8x2_t* %0 to i8*
+  %20 = bitcast %struct.int16x8x2_t* %18 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %19, i8* %20, i32 32, i32 16, i1 false)
+  %tmp21 = bitcast %struct.int16x8x2_t* %tmp2 to i8*
+  %21 = bitcast %struct.int16x8x2_t* %0 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp21, i8* %21, i32 32, i32 16, i1 false)
+  %22 = load %struct.int16x8x2_t** %dst_addr, align 4
+  %23 = bitcast %struct.int16x8x2_t* %22 to i8*
+  %tmp22 = bitcast %struct.int16x8x2_t* %tmp2 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %23, i8* %tmp22, i32 32, i32 16, i1 false)
   br label %return
 
-; CHECK: store <8 x i16>
-; CHECK: store <8 x i16>
-
 return:                                           ; preds = %entry
   ret void
 }
@@ -69,21 +69,22 @@ return:                                           ; preds = %entry
 %struct._NSRange = type { i64 }
 
 define void @test_memcpy_self() nounwind {
-; CHECK: @test_memcpy_self
-; CHECK-NOT: alloca
-; CHECK: br i1
 entry:
-  %range = alloca %struct._NSRange                ; <%struct._NSRange*> [#uses=2]
+  %range = alloca %struct._NSRange
   br i1 undef, label %cond.true, label %cond.false
 
 cond.true:                                        ; preds = %entry
-  %tmp3 = bitcast %struct._NSRange* %range to i8* ; <i8*> [#uses=1]
-  %tmp4 = bitcast %struct._NSRange* %range to i8* ; <i8*> [#uses=1]
-  call void @llvm.memcpy.i32(i8* %tmp3, i8* %tmp4, i32 8, i32 8)
+  %tmp3 = bitcast %struct._NSRange* %range to i8*
+  %tmp4 = bitcast %struct._NSRange* %range to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp3, i8* %tmp4, i32 8, i32 8, i1 false)
   ret void
 
 cond.false:                                       ; preds = %entry
   ret void
+
+; CHECK: @test_memcpy_self
+; CHECK-NOT: alloca
+; CHECK: br i1
 }
 
-declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll b/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll
index 3aee399..52df6d5 100644
--- a/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll
+++ b/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll
@@ -3,7 +3,7 @@
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
 
-%struct.test = type { [3 x double ] }
+%struct.test = type { [3 x double] }
 
 define void @test_memcpy_self() nounwind {
 ; CHECK: @test_memcpy_self
@@ -11,8 +11,8 @@ define void @test_memcpy_self() nounwind {
 ; CHECK: ret void
   %1 = alloca %struct.test
   %2 = bitcast %struct.test* %1 to i8*
-  call void @llvm.memcpy.i32(i8* %2, i8* %2, i32 24, i32 4)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %2, i8* %2, i32 24, i32 4, i1 false)
   ret void
 }
 
-declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/badarray.ll b/test/Transforms/ScalarRepl/badarray.ll
index 3ec3c01..768fec6 100644
--- a/test/Transforms/ScalarRepl/badarray.ll
+++ b/test/Transforms/ScalarRepl/badarray.ll
@@ -48,10 +48,10 @@ entry:
   %callret = call %padded *@test3f() ; <i32> [#uses=2]
   %callretcast = bitcast %padded* %callret to i8*                     ; <i8*> [#uses=1]
   %var_11 = bitcast %padded* %var_1 to i8*        ; <i8*> [#uses=1]
-  call void @llvm.memcpy.i32(i8* %callretcast, i8* %var_11, i32 8, i32 4)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %callretcast, i8* %var_11, i32 8, i32 4, i1 false)
   ret void
 }
 
-declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
 declare %padded* @test3f()
diff --git a/test/Transforms/ScalarRepl/crash.ll b/test/Transforms/ScalarRepl/crash.ll
index 83daaaf..e06d0c1 100644
--- a/test/Transforms/ScalarRepl/crash.ll
+++ b/test/Transforms/ScalarRepl/crash.ll
@@ -143,7 +143,6 @@ entry:
         %struct.anon = type { %struct.aal_spanarray_t }
 
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
 
 define fastcc void @test7() {
 entry:
@@ -158,7 +157,7 @@ cond_next114.i:         ; preds = %cond_true
 
 cond_next:              ; preds = %cond_true
         %SB19 = bitcast %struct.aal_spanbucket_t* %SB to i8*            ; <i8*> [#uses=1]
-        call void @llvm.memcpy.i32( i8* %SB19, i8* null, i32 12, i32 0 )
+        call void @llvm.memcpy.p0i8.p0i8.i32(i8* %SB19, i8* null, i32 12, i32 0, i1 false)
         br i1 false, label %cond_next34, label %cond_next79
 
 cond_next34:            ; preds = %cond_next
@@ -196,7 +195,7 @@ entry:
         %.compoundliteral = alloca %0           
         %tmp228 = getelementptr %0* %.compoundliteral, i32 0, i32 7
         %tmp229 = bitcast [0 x i16]* %tmp228 to i8*             
-        call void @llvm.memset.i64(i8* %tmp229, i8 0, i64 0, i32 2)
+        call void @llvm.memset.p0i8.i64(i8* %tmp229, i8 0, i64 0, i32 2, i1 false)
         unreachable
 }
 
@@ -260,3 +259,6 @@ entry:
   call void %0() nounwind
   ret void
 }
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/memset-aggregate.ll b/test/Transforms/ScalarRepl/memset-aggregate.ll
index 5aeefcd..42e7a0f 100644
--- a/test/Transforms/ScalarRepl/memset-aggregate.ll
+++ b/test/Transforms/ScalarRepl/memset-aggregate.ll
@@ -14,31 +14,29 @@ entry:
 	%L = alloca %struct.foo, align 8		; <%struct.foo*> [#uses=2]
 	%L2 = bitcast %struct.foo* %L to i8*		; <i8*> [#uses=1]
 	%tmp13 = bitcast %struct.foo* %P to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %L2, i8* %tmp13, i32 8, i32 4 )
+        call void @llvm.memcpy.p0i8.p0i8.i32(i8* %L2, i8* %tmp13, i32 8, i32 4, i1 false)
 	%tmp4 = getelementptr %struct.foo* %L, i32 0, i32 0		; <i32*> [#uses=1]
 	%tmp5 = load i32* %tmp4		; <i32> [#uses=1]
 	ret i32 %tmp5
 }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
 
 define i32 @test2() {
 entry:
 	%L = alloca [4 x %struct.foo], align 16		; <[4 x %struct.foo]*> [#uses=2]
 	%L12 = bitcast [4 x %struct.foo]* %L to i8*		; <i8*> [#uses=1]
-	call void @llvm.memset.i32( i8* %L12, i8 0, i32 32, i32 16 )
+        call void @llvm.memset.p0i8.i32(i8* %L12, i8 0, i32 32, i32 16, i1 false)
 	%tmp4 = getelementptr [4 x %struct.foo]* %L, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
 	%tmp5 = load i32* %tmp4		; <i32> [#uses=1]
 	ret i32 %tmp5
 }
 
-declare void @llvm.memset.i32(i8*, i8, i32, i32)
 
 define i32 @test3() {
 entry:
 	%B = alloca %struct.bar, align 16		; <%struct.bar*> [#uses=4]
 	%B1 = bitcast %struct.bar* %B to i8*		; <i8*> [#uses=1]
-	call void @llvm.memset.i32( i8* %B1, i8 1, i32 24, i32 16 )
+	call void @llvm.memset.p0i8.i32(i8* %B1, i8 1, i32 24, i32 16, i1 false)
 	%tmp3 = getelementptr %struct.bar* %B, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
 	store i32 1, i32* %tmp3
 	%tmp4 = getelementptr %struct.bar* %B, i32 0, i32 2		; <double*> [#uses=1]
@@ -58,9 +56,12 @@ entry:
 	store i32 1, i32* %0, align 8
 	%1 = getelementptr %struct.f* %A, i32 0, i32 1		; <i32*> [#uses=1]
 	%2 = bitcast i32* %1 to i8*		; <i8*> [#uses=1]
-	call void @llvm.memset.i32(i8* %2, i8 2, i32 12, i32 4)
+	call void @llvm.memset.p0i8.i32(i8* %2, i8 2, i32 12, i32 4, i1 false)
 	%3 = getelementptr %struct.f* %A, i32 0, i32 2		; <i32*> [#uses=1]
 	%4 = load i32* %3, align 8		; <i32> [#uses=1]
 	%retval12 = trunc i32 %4 to i16		; <i16> [#uses=1]
 	ret i16 %retval12
 }
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+\ No newline at end of file
diff --git a/test/Transforms/SimplifyLibCalls/MemCpy.ll b/test/Transforms/SimplifyLibCalls/MemCpy.ll
index 39662b1..c711178 100644
--- a/test/Transforms/SimplifyLibCalls/MemCpy.ll
+++ b/test/Transforms/SimplifyLibCalls/MemCpy.ll
@@ -4,17 +4,16 @@
 @hel = constant [4 x i8] c"hel\00"		; <[4 x i8]*> [#uses=1]
 @hello_u = constant [8 x i8] c"hello_u\00"		; <[8 x i8]*> [#uses=1]
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
 define i32 @main() {
-	%h_p = getelementptr [2 x i8]* @h, i32 0, i32 0		; <i8*> [#uses=1]
-	%hel_p = getelementptr [4 x i8]* @hel, i32 0, i32 0		; <i8*> [#uses=1]
-	%hello_u_p = getelementptr [8 x i8]* @hello_u, i32 0, i32 0		; <i8*> [#uses=1]
-	%target = alloca [1024 x i8]		; <[1024 x i8]*> [#uses=1]
-	%target_p = getelementptr [1024 x i8]* %target, i32 0, i32 0		; <i8*> [#uses=3]
-	call void @llvm.memcpy.i32( i8* %target_p, i8* %h_p, i32 2, i32 2 )
-	call void @llvm.memcpy.i32( i8* %target_p, i8* %hel_p, i32 4, i32 4 )
-	call void @llvm.memcpy.i32( i8* %target_p, i8* %hello_u_p, i32 8, i32 8 )
-	ret i32 0
+  %h_p = getelementptr [2 x i8]* @h, i32 0, i32 0
+  %hel_p = getelementptr [4 x i8]* @hel, i32 0, i32 0
+  %hello_u_p = getelementptr [8 x i8]* @hello_u, i32 0, i32 0
+  %target = alloca [1024 x i8]
+  %target_p = getelementptr [1024 x i8]* %target, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %target_p, i8* %h_p, i32 2, i32 2, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %target_p, i8* %hel_p, i32 4, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %target_p, i8* %hello_u_p, i32 8, i32 8, i1 false)
+  ret i32 0
 }
 
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind