aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target
diff options
context:
space:
mode:
authorDale Johannesen <dalej@apple.com>2008-02-25 19:20:14 +0000
committerDale Johannesen <dalej@apple.com>2008-02-25 19:20:14 +0000
commit079f2a66ae0478a1153306d7afb1b361e56b4a02 (patch)
treeeb2ce2918277b4c7b297406b4f80bf7c3b3b59da /lib/Target
parent4fc3d5dac255120e2f0c0b537044fcf56a30fa34 (diff)
downloadexternal_llvm-079f2a66ae0478a1153306d7afb1b361e56b4a02.zip
external_llvm-079f2a66ae0478a1153306d7afb1b361e56b4a02.tar.gz
external_llvm-079f2a66ae0478a1153306d7afb1b361e56b4a02.tar.bz2
Expand removal of MMX memory copies to allow 1 level
of TokenFactor underneath chain (seems to be enough) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@47554 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp114
1 files changed, 75 insertions, 39 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 3e890a5..31f80ec 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -5878,50 +5878,86 @@ static SDOperand PerformSTORECombine(StoreSDNode *St, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
// Turn load->store of MMX types into GPR load/stores. This avoids clobbering
// the FP state in cases where an emms may be missing.
+ // A preferable solution to the general problem is to figure out the right
+ // places to insert EMMS. This qualifies as a quick hack.
if (MVT::isVector(St->getValue().getValueType()) &&
MVT::getSizeInBits(St->getValue().getValueType()) == 64 &&
- // Must be a store of a load.
- isa<LoadSDNode>(St->getChain()) &&
- St->getChain().Val == St->getValue().Val &&
- St->getValue().hasOneUse() && St->getChain().hasOneUse() &&
- !St->isVolatile() && !cast<LoadSDNode>(St->getChain())->isVolatile()) {
- LoadSDNode *Ld = cast<LoadSDNode>(St->getChain());
-
- // If we are a 64-bit capable x86, lower to a single movq load/store pair.
- if (Subtarget->is64Bit()) {
- SDOperand NewLd = DAG.getLoad(MVT::i64, Ld->getChain(), Ld->getBasePtr(),
- Ld->getSrcValue(), Ld->getSrcValueOffset(),
- Ld->isVolatile(), Ld->getAlignment());
- return DAG.getStore(NewLd.getValue(1), NewLd, St->getBasePtr(),
+ isa<LoadSDNode>(St->getValue()) &&
+ !cast<LoadSDNode>(St->getValue())->isVolatile() &&
+ St->getChain().hasOneUse() && !St->isVolatile()) {
+ LoadSDNode *Ld = 0;
+ int TokenFactorIndex = -1;
+ SmallVector<SDOperand, 8> Ops;
+ SDNode* ChainVal = St->getChain().Val;
+ // Must be a store of a load. We currently handle two cases: the load
+ // is a direct child, and it's under an intervening TokenFactor. It is
+ // possible to dig deeper under nested TokenFactors.
+ if (ChainVal == St->getValue().Val)
+ Ld = cast<LoadSDNode>(St->getChain());
+ else if (St->getValue().hasOneUse() &&
+ ChainVal->getOpcode() == ISD::TokenFactor) {
+ for (unsigned i=0, e = ChainVal->getNumOperands(); i != e; ++i) {
+ if (ChainVal->getOperand(i).Val == St->getValue().Val) {
+ if (TokenFactorIndex != -1)
+ return SDOperand();
+ TokenFactorIndex = i;
+ Ld = cast<LoadSDNode>(St->getValue());
+ } else
+ Ops.push_back(ChainVal->getOperand(i));
+ }
+ }
+ if (Ld) {
+ // If we are a 64-bit capable x86, lower to a single movq load/store pair.
+ if (Subtarget->is64Bit()) {
+ SDOperand NewLd = DAG.getLoad(MVT::i64, Ld->getChain(),
+ Ld->getBasePtr(), Ld->getSrcValue(),
+ Ld->getSrcValueOffset(), Ld->isVolatile(),
+ Ld->getAlignment());
+ SDOperand NewChain = NewLd.getValue(1);
+ if (TokenFactorIndex != -1) {
+ Ops.push_back(NewLd);
+ NewChain = DAG.getNode(ISD::TokenFactor, MVT::Other, &Ops[0],
+ Ops.size());
+ }
+ return DAG.getStore(NewChain, NewLd, St->getBasePtr(),
+ St->getSrcValue(), St->getSrcValueOffset(),
+ St->isVolatile(), St->getAlignment());
+ }
+
+ // Otherwise, lower to two 32-bit copies.
+ SDOperand LoAddr = Ld->getBasePtr();
+ SDOperand HiAddr = DAG.getNode(ISD::ADD, MVT::i32, LoAddr,
+ DAG.getConstant(MVT::i32, 4));
+
+ SDOperand LoLd = DAG.getLoad(MVT::i32, Ld->getChain(), LoAddr,
+ Ld->getSrcValue(), Ld->getSrcValueOffset(),
+ Ld->isVolatile(), Ld->getAlignment());
+ SDOperand HiLd = DAG.getLoad(MVT::i32, Ld->getChain(), HiAddr,
+ Ld->getSrcValue(), Ld->getSrcValueOffset()+4,
+ Ld->isVolatile(),
+ MinAlign(Ld->getAlignment(), 4));
+
+ SDOperand NewChain = LoLd.getValue(1);
+ if (TokenFactorIndex != -1) {
+ Ops.push_back(LoLd);
+ Ops.push_back(HiLd);
+ NewChain = DAG.getNode(ISD::TokenFactor, MVT::Other, &Ops[0],
+ Ops.size());
+ }
+
+ LoAddr = St->getBasePtr();
+ HiAddr = DAG.getNode(ISD::ADD, MVT::i32, LoAddr,
+ DAG.getConstant(MVT::i32, 4));
+
+ SDOperand LoSt = DAG.getStore(NewChain, LoLd, LoAddr,
St->getSrcValue(), St->getSrcValueOffset(),
St->isVolatile(), St->getAlignment());
+ SDOperand HiSt = DAG.getStore(NewChain, HiLd, HiAddr,
+ St->getSrcValue(), St->getSrcValueOffset()+4,
+ St->isVolatile(),
+ MinAlign(St->getAlignment(), 4));
+ return DAG.getNode(ISD::TokenFactor, MVT::Other, LoSt, HiSt);
}
-
- // Otherwise, lower to two 32-bit copies.
- SDOperand LoAddr = Ld->getBasePtr();
- SDOperand HiAddr = DAG.getNode(ISD::ADD, MVT::i32, LoAddr,
- DAG.getConstant(MVT::i32, 4));
-
- SDOperand LoLd = DAG.getLoad(MVT::i32, Ld->getChain(), LoAddr,
- Ld->getSrcValue(), Ld->getSrcValueOffset(),
- Ld->isVolatile(), Ld->getAlignment());
- SDOperand HiLd = DAG.getLoad(MVT::i32, Ld->getChain(), HiAddr,
- Ld->getSrcValue(), Ld->getSrcValueOffset()+4,
- Ld->isVolatile(),
- MinAlign(Ld->getAlignment(), 4));
-
- LoAddr = St->getBasePtr();
- HiAddr = DAG.getNode(ISD::ADD, MVT::i32, LoAddr,
- DAG.getConstant(MVT::i32, 4));
-
- SDOperand LoSt = DAG.getStore(LoLd.getValue(1), LoLd, LoAddr,
- St->getSrcValue(), St->getSrcValueOffset(),
- St->isVolatile(), St->getAlignment());
- SDOperand HiSt = DAG.getStore(HiLd.getValue(1), HiLd, HiAddr,
- St->getSrcValue(), St->getSrcValueOffset()+4,
- St->isVolatile(),
- MinAlign(St->getAlignment(), 4));
- return DAG.getNode(ISD::TokenFactor, MVT::Other, LoSt, HiSt);
}
return SDOperand();
}