aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp78
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp2
-rw-r--r--test/CodeGen/X86/memmove-4.ll12
3 files changed, 86 insertions, 6 deletions
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 24954d7..2d2ae06 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2695,8 +2695,8 @@ static SDOperand getMemcpyLoadsAndStores(SelectionDAG &DAG,
const Value *SrcSV, uint64_t SrcSVOff){
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- // Expand memcpy to a series of store ops if the size operand falls below
- // a certain threshold.
+ // Expand memcpy to a series of load and store ops if the size operand falls
+ // below a certain threshold.
std::vector<MVT::ValueType> MemOps;
uint64_t Limit = -1;
if (!AlwaysInline)
@@ -2743,6 +2743,63 @@ static SDOperand getMemcpyLoadsAndStores(SelectionDAG &DAG,
&OutChains[0], OutChains.size());
}
+static SDOperand getMemmoveLoadsAndStores(SelectionDAG &DAG,
+ SDOperand Chain, SDOperand Dst,
+ SDOperand Src, uint64_t Size,
+ unsigned Align, bool AlwaysInline,
+ const Value *DstSV, uint64_t DstSVOff,
+ const Value *SrcSV, uint64_t SrcSVOff){
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Expand memmove to a series of load and store ops if the size operand falls
+ // below a certain threshold.
+ std::vector<MVT::ValueType> MemOps;
+ uint64_t Limit = -1;
+ if (!AlwaysInline)
+ Limit = TLI.getMaxStoresPerMemmove();
+ unsigned DstAlign = Align; // Destination alignment can change.
+ if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign,
+ DAG, TLI))
+ return SDOperand();
+
+ std::string Str;
+ uint64_t SrcOff = 0, DstOff = 0;
+
+ SmallVector<SDOperand, 8> LoadValues;
+ SmallVector<SDOperand, 8> LoadChains;
+ SmallVector<SDOperand, 8> OutChains;
+ unsigned NumMemOps = MemOps.size();
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ MVT::ValueType VT = MemOps[i];
+ unsigned VTSize = MVT::getSizeInBits(VT) / 8;
+ SDOperand Value, Store;
+
+ Value = DAG.getLoad(VT, Chain,
+ getMemBasePlusOffset(Src, SrcOff, DAG),
+ SrcSV, SrcSVOff + SrcOff, false, Align);
+ LoadValues.push_back(Value);
+ LoadChains.push_back(Value.getValue(1));
+ SrcOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &LoadChains[0], LoadChains.size());
+ OutChains.clear();
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ MVT::ValueType VT = MemOps[i];
+ unsigned VTSize = MVT::getSizeInBits(VT) / 8;
+ SDOperand Value, Store;
+
+ Store = DAG.getStore(Chain, LoadValues[i],
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstSV, DstSVOff + DstOff, false, DstAlign);
+ OutChains.push_back(Store);
+ DstOff += VTSize;
+ }
+
+ return DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &OutChains[0], OutChains.size());
+}
+
static SDOperand getMemsetStores(SelectionDAG &DAG,
SDOperand Chain, SDOperand Dst,
SDOperand Src, uint64_t Size,
@@ -2836,9 +2893,20 @@ SDOperand SelectionDAG::getMemmove(SDOperand Chain, SDOperand Dst,
const Value *DstSV, uint64_t DstSVOff,
const Value *SrcSV, uint64_t SrcSVOff) {
- // TODO: Optimize small memmove cases with simple loads and stores,
- // ensuring that all loads precede all stores. This can cause severe
- // register pressure, so targets should be careful with the size limit.
+ // Check to see if we should lower the memmove to loads and stores first.
+ // For cases within the target-specified limits, this is the best choice.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ // Memmove with size zero? Just return the original chain.
+ if (ConstantSize->isNullValue())
+ return Chain;
+
+ SDOperand Result =
+ getMemmoveLoadsAndStores(*this, Chain, Dst, Src, ConstantSize->getValue(),
+ Align, false, DstSV, DstSVOff, SrcSV, SrcSVOff);
+ if (Result.Val)
+ return Result;
+ }
// Then check to see if we should lower the memmove with target-specific
// code. If the target chooses to do this, this is the next best.
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index d194d38..c4307b8 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -737,7 +737,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// be smaller when we are in optimizing for size mode.
maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores
maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores
- maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores
+ maxStoresPerMemmove = 3; // For %llvm.memmove -> sequence of stores
allowUnalignedMemoryAccesses = true; // x86 supports it!
setPrefLoopAlignment(16);
}
diff --git a/test/CodeGen/X86/memmove-4.ll b/test/CodeGen/X86/memmove-4.ll
new file mode 100644
index 0000000..f23c7d5
--- /dev/null
+++ b/test/CodeGen/X86/memmove-4.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-as < %s | llc | not grep call
+
+target triple = "i686-pc-linux-gnu"
+
+define void @a(i8* %a, i8* %b) nounwind {
+ %tmp2 = bitcast i8* %a to i8*
+ %tmp3 = bitcast i8* %b to i8*
+ tail call void @llvm.memmove.i32( i8* %tmp2, i8* %tmp3, i32 12, i32 4 )
+ ret void
+}
+
+declare void @llvm.memmove.i32(i8*, i8*, i32, i32)