diff options
author | Eli Friedman <eli.friedman@gmail.com> | 2011-12-26 22:49:32 +0000 |
---|---|---|
committer | Eli Friedman <eli.friedman@gmail.com> | 2011-12-26 22:49:32 +0000 |
commit | d6e2560e7aea0e6c6bca950966252552f1caec63 (patch) | |
tree | ec87c137c9ee4417b7e3b351fe466d2bc4d20f5a | |
parent | c9a1aed7fefe627820bb92154361ede0568229fc (diff) | |
download | external_llvm-d6e2560e7aea0e6c6bca950966252552f1caec63.zip external_llvm-d6e2560e7aea0e6c6bca950966252552f1caec63.tar.gz external_llvm-d6e2560e7aea0e6c6bca950966252552f1caec63.tar.bz2 |
Make sure DAGCombiner doesn't introduce multiple loads from the same memory location. PR10747, part 2.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147283 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 24 | ||||
-rw-r--r-- | test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll | 10 | ||||
-rw-r--r-- | test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll | 16 |
3 files changed, 44 insertions, 6 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 80cf0a8..8166185 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6905,6 +6905,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { EVT LVT = ExtVT; if (InVec.getOpcode() == ISD::BITCAST) { + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + EVT BCVT = InVec.getOperand(0).getValueType(); if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) return SDValue(); @@ -6922,12 +6926,20 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && InVec.getOperand(0).getValueType() == ExtVT && ISD::isNormalLoad(InVec.getOperand(0).getNode())) { + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + LN0 = cast<LoadSDNode>(InVec.getOperand(0)); } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) { // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) // => // (load $addr+1*size) + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + // If the bit convert changed the number of elements, it is unsafe // to examine the mask. if (BCNumEltsChanged) @@ -6938,14 +6950,21 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt); InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); - if (InVec.getOpcode() == ISD::BITCAST) + if (InVec.getOpcode() == ISD::BITCAST) { + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + InVec = InVec.getOperand(0); + } if (ISD::isNormalLoad(InVec.getNode())) { LN0 = cast<LoadSDNode>(InVec); Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; } } + // Make sure we found a non-volatile load and the extractelement is + // the only use. if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) return SDValue(); @@ -6982,6 +7001,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // The replacement we need to do here is a little tricky: we need to // replace an extractelement of a load with a load. // Use ReplaceAllUsesOfValuesWith to do the replacement. + // Note that this replacement assumes that the extractvalue is the only + // use of the load; that's okay because we don't want to perform this + // transformation in other cases anyway. SDValue Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), LN0->isVolatile(), LN0->isNonTemporal(), diff --git a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll index 86e8712..6fbae19 100644 --- a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll +++ b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll @@ -8,11 +8,11 @@ define void @test_sqrt(<4 x float>* %X) nounwind { ; CHECK: movw r1, :lower16:{{.*}} ; CHECK: movt r1, :upper16:{{.*}} -; CHECK: vldmia r1, {[[short0:s[0-9]+]], [[short1:s[0-9]+]], [[short2:s[0-9]+]], [[short3:s[0-9]+]]} -; CHECK: vsqrt.f32 {{s[0-9]+}}, [[short3]] -; CHECK: vsqrt.f32 {{s[0-9]+}}, [[short2]] -; CHECK: vsqrt.f32 {{s[0-9]+}}, [[short1]] -; CHECK: vsqrt.f32 {{s[0-9]+}}, [[short0]] +; CHECK: vldmia r1 +; CHECK: vsqrt.f32 {{s[0-9]+}}, {{s[0-9]+}} +; CHECK: vsqrt.f32 {{s[0-9]+}}, {{s[0-9]+}} +; CHECK: vsqrt.f32 {{s[0-9]+}}, {{s[0-9]+}} +; CHECK: vsqrt.f32 {{s[0-9]+}}, {{s[0-9]+}} ; CHECK: vstmia {{.*}} L.entry: diff --git a/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll b/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll new file mode 100644 index 0000000..39c213f --- /dev/null +++ b/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll @@ -0,0 +1,16 @@ +; RUN: llc -march=x86-64 -mattr=-sse42,+sse41 < %s | FileCheck %s +; Make sure we don't load from the location pointed to by %p +; twice: it has non-obvious performance implications, and +; the relevant transformation doesn't know how to update +; the chains correctly. +; PR10747 + +; CHECK: test: +; CHECK: pextrd $2, %xmm +define <4 x i32> @test(<4 x i32>* %p) { + %v = load <4 x i32>* %p + %e = extractelement <4 x i32> %v, i32 2 + %cmp = icmp eq i32 %e, 3 + %sel = select i1 %cmp, <4 x i32> %v, <4 x i32> zeroinitializer + ret <4 x i32> %sel +} |