diff options
author | Evan Cheng <evan.cheng@apple.com> | 2010-04-08 07:37:57 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2010-04-08 07:37:57 +0000 |
commit | c3b0c341e731b27b550ee9dcded9c17232b296b8 (patch) | |
tree | f3b7545cc43d3f374b20ac331424afc413d83d70 | |
parent | 8ef5caa80a5f40ba34d02f32f6ba4b4601268f89 (diff) | |
download | external_llvm-c3b0c341e731b27b550ee9dcded9c17232b296b8.zip external_llvm-c3b0c341e731b27b550ee9dcded9c17232b296b8.tar.gz external_llvm-c3b0c341e731b27b550ee9dcded9c17232b296b8.tar.bz2 |
Avoid using f64 to lower memcpy from constant string. It's cheaper to use i32 store of immediates.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@100751 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/llvm/Target/TargetLowering.h | 8 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 20 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.cpp | 7 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.h | 12 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 29 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 12 | ||||
-rw-r--r-- | test/CodeGen/X86/memcpy-2.ll | 63 | ||||
-rw-r--r-- | test/CodeGen/X86/unaligned-load.ll | 4 |
8 files changed, 109 insertions, 46 deletions
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 1a94b44..91d7dfa 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -638,11 +638,13 @@ public: /// probably because the source does not need to be loaded. If /// 'NonScalarIntSafe' is true, that means it's safe to return a /// non-scalar-integer type, e.g. empty string source, constant, or loaded - /// from memory. It returns EVT::Other if SelectionDAG should be responsible - /// for determining it. + /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is + /// constant so it does not need to be loaded. + /// It returns EVT::Other if SelectionDAG should be responsible for + /// determining the type. virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool NonScalarIntSafe, + bool NonScalarIntSafe, bool MemcpyStrSrc, SelectionDAG &DAG) const { return MVT::Other; } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index afb429e..81fc48c 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3210,6 +3210,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool NonScalarIntSafe, + bool MemcpyStrSrc, SelectionDAG &DAG, const TargetLowering &TLI) { assert((SrcAlign == 0 || SrcAlign >= DstAlign) && @@ -3218,9 +3219,11 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, // the value, i.e. memset or memcpy from constant string. Otherwise, it's // the inferred alignment of the source. 'DstAlign', on the other hand, is the // specified alignment of the memory operation. If it is zero, that means - // it's possible to change the alignment of the destination. + // it's possible to change the alignment of the destination. 'MemcpyStrSrc' + // indicates whether the memcpy source is constant so it does not need to be + // loaded. EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, - NonScalarIntSafe, DAG); + NonScalarIntSafe, MemcpyStrSrc, DAG); if (VT == MVT::Other) { if (DstAlign >= TLI.getTargetData()->getPointerPrefAlignment() || @@ -3286,9 +3289,6 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, // below a certain threshold. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); std::vector<EVT> MemOps; - uint64_t Limit = -1ULL; - if (!AlwaysInline) - Limit = TLI.getMaxStoresPerMemcpy(); bool DstAlignCanChange = false; MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); @@ -3300,9 +3300,13 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, std::string Str; bool CopyFromStr = isMemSrcFromString(Src, Str); bool isZeroStr = CopyFromStr && Str.empty(); + uint64_t Limit = -1ULL; + if (!AlwaysInline) + Limit = TLI.getMaxStoresPerMemcpy(); if (!FindOptimalMemOpLowering(MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), - (isZeroStr ? 0 : SrcAlign), true, DAG, TLI)) + (isZeroStr ? 0 : SrcAlign), + true, CopyFromStr, DAG, TLI)) return SDValue(); if (DstAlignCanChange) { @@ -3390,7 +3394,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, if (!FindOptimalMemOpLowering(MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), - SrcAlign, true, DAG, TLI)) + SrcAlign, true, false, DAG, TLI)) return SDValue(); if (DstAlignCanChange) { @@ -3462,7 +3466,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue(); if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(), Size, (DstAlignCanChange ? 0 : Align), 0, - NonScalarIntSafe, DAG, TLI)) + NonScalarIntSafe, false, DAG, TLI)) return SDValue(); if (DstAlignCanChange) { diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 9cd01be..992cc5a 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -5547,11 +5547,14 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { /// probably because the source does not need to be loaded. If /// 'NonScalarIntSafe' is true, that means it's safe to return a /// non-scalar-integer type, e.g. empty string source, constant, or loaded -/// from memory. It returns EVT::Other if SelectionDAG should be responsible -/// for determining it. +/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is +/// constant so it does not need to be loaded. +/// It returns EVT::Other if SelectionDAG should be responsible for +/// determining the type. EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool NonScalarIntSafe, + bool MemcpyStrSrc, SelectionDAG &DAG) const { if (this->PPCSubTarget.isPPC64()) { return MVT::i64; diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index f816bdd..f7d629b 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -355,12 +355,14 @@ namespace llvm { /// probably because the source does not need to be loaded. If /// 'NonScalarIntSafe' is true, that means it's safe to return a /// non-scalar-integer type, e.g. empty string source, constant, or loaded - /// from memory. It returns EVT::Other if SelectionDAG should be responsible - /// for determining it. + /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is + /// constant so it does not need to be loaded. + /// It returns EVT::Other if SelectionDAG should be responsible for + /// determining the type. virtual EVT - getOptimalMemOpType(uint64_t Size, - unsigned DstAlign, unsigned SrcAlign, - bool NonScalarIntSafe, SelectionDAG &DAG) const; + getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, + bool NonScalarIntSafe, bool MemcpyStrSrc, + SelectionDAG &DAG) const; /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *F) const; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 678f636..9362d37 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1067,18 +1067,22 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const { } /// getOptimalMemOpType - Returns the target specific optimal type for load -/// and store operations as a result of memset, memcpy, and memmove lowering. -/// If DstAlign is zero that means it's safe to destination alignment can -/// satisfy any constraint. Similarly if SrcAlign is zero it means there -/// isn't a need to check it against alignment requirement, probably because -/// the source does not need to be loaded. If 'NonScalarIntSafe' is true, that -/// means it's safe to return a non-scalar-integer type, e.g. constant string -/// source or loaded from memory. It returns EVT::Other if SelectionDAG should -/// be responsible for determining it. +/// and store operations as a result of memset, memcpy, and memmove +/// lowering. If DstAlign is zero that means it's safe to destination +/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it +/// means there isn't a need to check it against alignment requirement, +/// probably because the source does not need to be loaded. If +/// 'NonScalarIntSafe' is true, that means it's safe to return a +/// non-scalar-integer type, e.g. empty string source, constant, or loaded +/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is +/// constant so it does not need to be loaded. +/// It returns EVT::Other if SelectionDAG should be responsible for +/// determining the type. EVT X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool NonScalarIntSafe, + bool MemcpyStrSrc, SelectionDAG &DAG) const { // FIXME: This turns off use of xmm stores for memset/memcpy on targets like // linux. This is because the stack realignment code can't handle certain @@ -1095,11 +1099,14 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, return MVT::v4i32; if (Subtarget->hasSSE1()) return MVT::v4f32; - } else if (Size >= 8 && + } else if (!MemcpyStrSrc && Size >= 8 && !Subtarget->is64Bit() && Subtarget->getStackAlignment() >= 8 && - Subtarget->hasSSE2()) + Subtarget->hasSSE2()) { + // Do not use f64 to lower memcpy if source is string constant. It's + // better to use i32 to avoid the loads. return MVT::f64; + } } if (Subtarget->is64Bit() && Size >= 8) return MVT::i64; @@ -6721,7 +6728,7 @@ X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, Count, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : - X86::EDI, + X86::EDI, Dst, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI : diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 1026480..d978676 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -424,12 +424,14 @@ namespace llvm { /// probably because the source does not need to be loaded. If /// 'NonScalarIntSafe' is true, that means it's safe to return a /// non-scalar-integer type, e.g. empty string source, constant, or loaded - /// from memory. It returns EVT::Other if SelectionDAG should be responsible - /// for determining it. + /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is + /// constant so it does not need to be loaded. + /// It returns EVT::Other if SelectionDAG should be responsible for + /// determining the type. virtual EVT - getOptimalMemOpType(uint64_t Size, - unsigned DstAlign, unsigned SrcAlign, - bool NonScalarIntSafe, SelectionDAG &DAG) const; + getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, + bool NonScalarIntSafe, bool MemcpyStrSrc, + SelectionDAG &DAG) const; /// allowsUnalignedMemoryAccesses - Returns true if the target allows /// unaligned memory accesses. of the specified type. diff --git a/test/CodeGen/X86/memcpy-2.ll b/test/CodeGen/X86/memcpy-2.ll index 4fe32d9..17cd8e8 100644 --- a/test/CodeGen/X86/memcpy-2.ll +++ b/test/CodeGen/X86/memcpy-2.ll @@ -3,20 +3,20 @@ ; RUN: llc < %s -mattr=-sse -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=NOSSE ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=X86-64 - %struct.ParmT = type { [25 x i8], i8, i8* } -@.str12 = internal constant [25 x i8] c"image\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00" ; <[25 x i8]*> [#uses=1] +@.str = internal constant [25 x i8] c"image\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00" +@.str2 = internal constant [30 x i8] c"xxxxxxxxxxxxxxxxxxxxxxxxxxxxx\00", align 4 define void @t1(i32 %argc, i8** %argv) nounwind { entry: ; SSE2: t1: -; SSE2: movaps _.str12, %xmm0 +; SSE2: movaps _.str, %xmm0 ; SSE2: movaps %xmm0 ; SSE2: movb $0 ; SSE2: movl $0 ; SSE2: movl $0 ; SSE1: t1: -; SSE1: movaps _.str12, %xmm0 +; SSE1: movaps _.str, %xmm0 ; SSE1: movaps %xmm0 ; SSE1: movb $0 ; SSE1: movl $0 @@ -32,14 +32,14 @@ entry: ; NOSSE: movl $1734438249 ; X86-64: t1: -; X86-64: movaps _.str12(%rip), %xmm0 +; X86-64: movaps _.str(%rip), %xmm0 ; X86-64: movaps %xmm0 ; X86-64: movb $0 ; X86-64: movq $0 - %parms.i = alloca [13 x %struct.ParmT] ; <[13 x %struct.ParmT]*> [#uses=1] - %parms1.i = getelementptr [13 x %struct.ParmT]* %parms.i, i32 0, i32 0, i32 0, i32 0 ; <i8*> [#uses=1] - call void @llvm.memcpy.i32( i8* %parms1.i, i8* getelementptr ([25 x i8]* @.str12, i32 0, i32 0), i32 25, i32 1 ) nounwind - unreachable + %tmp1 = alloca [25 x i8] + %tmp2 = bitcast [25 x i8]* %tmp1 to i8* + call void @llvm.memcpy.i32( i8* %tmp2, i8* getelementptr ([25 x i8]* @.str, i32 0, i32 0), i32 25, i32 1 ) nounwind + unreachable } ;rdar://7774704 @@ -119,4 +119,49 @@ entry: ret void } +define void @t4() nounwind { +entry: +; SSE2: t4: +; SSE2: movw $120 +; SSE2: movl $2021161080 +; SSE2: movl $2021161080 +; SSE2: movl $2021161080 +; SSE2: movl $2021161080 +; SSE2: movl $2021161080 +; SSE2: movl $2021161080 +; SSE2: movl $2021161080 + +; SSE1: t4: +; SSE1: movw $120 +; SSE1: movl $2021161080 +; SSE1: movl $2021161080 +; SSE1: movl $2021161080 +; SSE1: movl $2021161080 +; SSE1: movl $2021161080 +; SSE1: movl $2021161080 +; SSE1: movl $2021161080 + +; NOSSE: t4: +; NOSSE: movw $120 +; NOSSE: movl $2021161080 +; NOSSE: movl $2021161080 +; NOSSE: movl $2021161080 +; NOSSE: movl $2021161080 +; NOSSE: movl $2021161080 +; NOSSE: movl $2021161080 +; NOSSE: movl $2021161080 + +; X86-64: t4: +; X86-64: movabsq $8680820740569200760, %rax +; X86-64: movq %rax +; X86-64: movq %rax +; X86-64: movq %rax +; X86-64: movw $120 +; X86-64: movl $2021161080 + %tmp1 = alloca [30 x i8] + %tmp2 = bitcast [30 x i8]* %tmp1 to i8* + call void @llvm.memcpy.i32(i8* %tmp2, i8* getelementptr inbounds ([30 x i8]* @.str2, i32 0, i32 0), i32 30, i32 1) + unreachable +} + declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind diff --git a/test/CodeGen/X86/unaligned-load.ll b/test/CodeGen/X86/unaligned-load.ll index 47b7896..e210531 100644 --- a/test/CodeGen/X86/unaligned-load.ll +++ b/test/CodeGen/X86/unaligned-load.ll @@ -13,9 +13,7 @@ entry: bb: %String2Loc9 = getelementptr inbounds [31 x i8]* %String2Loc, i64 0, i64 0 call void @llvm.memcpy.i64(i8* %String2Loc9, i8* getelementptr inbounds ([31 x i8]* @.str3, i64 0, i64 0), i64 31, i32 1) -; I386: movsd _.str3+16 -; I386: movsd _.str3+8 -; I386: movsd _.str3 +; I386: call {{_?}}memcpy ; CORE2: movabsq ; CORE2: movabsq |