diff options
author | Nadav Rotem <nrotem@apple.com> | 2013-01-19 08:38:41 +0000 |
---|---|---|
committer | Nadav Rotem <nrotem@apple.com> | 2013-01-19 08:38:41 +0000 |
commit | ba9586544164e69754039a25cb0ef7907d27382d (patch) | |
tree | 98ebe27964ae162fa9c5dd28f27e7bfbdb4182ad /lib/Target/X86 | |
parent | cfcab21e4d0e4d7444b147898d6aed1348df3043 (diff) | |
download | external_llvm-ba9586544164e69754039a25cb0ef7907d27382d.zip external_llvm-ba9586544164e69754039a25cb0ef7907d27382d.tar.gz external_llvm-ba9586544164e69754039a25cb0ef7907d27382d.tar.bz2 |
On Sandybridge split unaligned 256bit stores into two xmm-sized stores.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172894 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86')
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 27 |
1 files changed, 18 insertions, 9 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b6b10e2..ca8cd74 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16344,12 +16344,15 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, ISD::LoadExtType Ext = Ld->getExtensionType(); unsigned Alignment = Ld->getAlignment(); + bool IsAligned = Alignment == 0 || Alignment == MemVT.getSizeInBits()/8; // On Sandybridge unaligned 256bit loads are inefficient. if (RegVT.is256BitVector() && !Subtarget->hasInt256() && - !DCI.isBeforeLegalizeOps() && Alignment < 32 && - Ext == ISD::NON_EXTLOAD) { + !DCI.isBeforeLegalizeOps() && !IsAligned && Ext == ISD::NON_EXTLOAD) { unsigned NumElems = RegVT.getVectorNumElements(); + if (NumElems < 2) + return SDValue(); + SDValue Ptr = Ld->getBasePtr(); SDValue Increment = DAG.getConstant(16, TLI.getPointerTy()); @@ -16363,7 +16366,7 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), Ld->isInvariant(), - Alignment); + std::max(Alignment/2U, 1U)); SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Load1.getValue(1), Load2.getValue(1)); @@ -16536,16 +16539,21 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, DebugLoc dl = St->getDebugLoc(); SDValue StoredVal = St->getOperand(1); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + unsigned Alignment = St->getAlignment(); + bool IsAligned = Alignment == 0 || Alignment == VT.getSizeInBits()/8; // If we are saving a concatenation of two XMM registers, perform two stores. // On Sandy Bridge, 256-bit memory operations are executed by two // 128-bit ports. However, on Haswell it is better to issue a single 256-bit // memory operation. if (VT.is256BitVector() && !Subtarget->hasInt256() && - StoredVal.getNode()->getOpcode() == ISD::CONCAT_VECTORS && - StoredVal.getNumOperands() == 2) { - SDValue Value0 = StoredVal.getOperand(0); - SDValue Value1 = StoredVal.getOperand(1); + StVT == VT && !IsAligned) { + unsigned NumElems = VT.getVectorNumElements(); + if (NumElems < 2) + return SDValue(); + + SDValue Value0 = Extract128BitVector(StoredVal, 0, DAG, dl); + SDValue Value1 = Extract128BitVector(StoredVal, NumElems/2, DAG, dl); SDValue Stride = DAG.getConstant(16, TLI.getPointerTy()); SDValue Ptr0 = St->getBasePtr(); @@ -16553,10 +16561,11 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue Ch0 = DAG.getStore(St->getChain(), dl, Value0, Ptr0, St->getPointerInfo(), St->isVolatile(), - St->isNonTemporal(), St->getAlignment()); + St->isNonTemporal(), Alignment); SDValue Ch1 = DAG.getStore(St->getChain(), dl, Value1, Ptr1, St->getPointerInfo(), St->isVolatile(), - St->isNonTemporal(), St->getAlignment()); + St->isNonTemporal(), + std::max(Alignment/2U, 1U)); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1); } |