aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2008-01-14 00:28:35 +0000
committerChris Lattner <sabre@nondot.org>2008-01-14 00:28:35 +0000
commitc669fb6b8281d6a82151a0d7471166fb9a71ed4d (patch)
tree7229bf42341ba987be4848f5ffeeaa9076ae727f /lib/Transforms
parent00ae513bc4009f0d8b2e813d8168e6f279bed44b (diff)
downloadexternal_llvm-c669fb6b8281d6a82151a0d7471166fb9a71ed4d.zip
external_llvm-c669fb6b8281d6a82151a0d7471166fb9a71ed4d.tar.gz
external_llvm-c669fb6b8281d6a82151a0d7471166fb9a71ed4d.tar.bz2
Turn a memcpy from a double* into a load/store of double instead of
a load/store of i64. The later prevents promotion/scalarrepl of the source and dest in many cases. This fixes the 300% performance regression of the byval stuff on stepanov_v1p2. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@45945 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/Scalar/InstructionCombining.cpp55
1 files changed, 46 insertions, 9 deletions
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index bdd3e51..63919a8 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -7826,16 +7826,49 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getOperand(3));
if (MemOpLength == 0) return 0;
- // Source and destination pointer types are always "i8*" for intrinsic.
- // If Size is 8 then use Int64Ty
- // If Size is 4 then use Int32Ty
- // If Size is 2 then use Int16Ty
- // If Size is 1 then use Int8Ty
+ // Source and destination pointer types are always "i8*" for intrinsic. See
+ // if the size is something we can handle with a single primitive load/store.
+ // A single load+store correctly handles overlapping memory in the memmove
+ // case.
unsigned Size = MemOpLength->getZExtValue();
if (Size == 0 || Size > 8 || (Size&(Size-1)))
- return 0; // If not 1/2/4/8, exit.
+ return 0; // If not 1/2/4/8 bytes, exit.
+ // Use an integer load+store unless we can find something better.
Type *NewPtrTy = PointerType::getUnqual(IntegerType::get(Size<<3));
+
+ // Memcpy forces the use of i8* for the source and destination. That means
+ // that if you're using memcpy to move one double around, you'll get a cast
+ // from double* to i8*. We'd much rather use a double load+store rather than
+ // an i64 load+store, here because this improves the odds that the source or
+ // dest address will be promotable. See if we can find a better type than the
+ // integer datatype.
+ if (Value *Op = getBitCastOperand(MI->getOperand(1))) {
+ const Type *SrcETy = cast<PointerType>(Op->getType())->getElementType();
+ if (SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) {
+ // The SrcETy might be something like {{{double}}} or [1 x double]. Rip
+ // down through these levels if so.
+ while (!SrcETy->isFirstClassType()) {
+ if (const StructType *STy = dyn_cast<StructType>(SrcETy)) {
+ if (STy->getNumElements() == 1)
+ SrcETy = STy->getElementType(0);
+ else
+ break;
+ } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) {
+ if (ATy->getNumElements() == 1)
+ SrcETy = ATy->getElementType();
+ else
+ break;
+ } else
+ break;
+ }
+
+ if (SrcETy->isFirstClassType())
+ NewPtrTy = PointerType::getUnqual(SrcETy);
+ }
+ }
+
+
// If the memcpy/memmove provides better alignment info than we can
// infer, use it.
SrcAlign = std::max(SrcAlign, CopyAlign);
@@ -7843,9 +7876,13 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
Value *Src = InsertBitCastBefore(MI->getOperand(2), NewPtrTy, *MI);
Value *Dest = InsertBitCastBefore(MI->getOperand(1), NewPtrTy, *MI);
- Value *L = new LoadInst(Src, "tmp", false, SrcAlign, MI);
- new StoreInst(L, Dest, false, DstAlign, MI);
- return EraseInstFromFunction(*MI);
+ Instruction *L = new LoadInst(Src, "tmp", false, SrcAlign);
+ InsertNewInstBefore(L, *MI);
+ InsertNewInstBefore(new StoreInst(L, Dest, false, DstAlign), *MI);
+
+ // Set the size of the copy to 0, it will be deleted on the next iteration.
+ MI->setOperand(3, Constant::getNullValue(MemOpLength->getType()));
+ return MI;
}
/// visitCallInst - CallInst simplification. This mostly only handles folding