diff options
-rw-r--r-- | include/llvm/CodeGen/SelectionDAGNodes.h | 4 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 11 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.cpp | 4 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/vec_splat_constant.ll | 24 |
4 files changed, 35 insertions, 8 deletions
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index f960851..d4d40b1 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -1953,10 +1953,10 @@ public: /// that value are zero, and the corresponding bits in the SplatUndef mask /// are set. The SplatBitSize value is set to the splat element size in /// bits. HasAnyUndefs is set to true if any bits in the vector are - /// undefined. + /// undefined. isBigEndian describes the endianness of the target. bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, - unsigned MinSplatBits = 0); + unsigned MinSplatBits = 0, bool isBigEndian = false); static inline bool classof(const BuildVectorSDNode *) { return true; } static inline bool classof(const SDNode *N) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index d9c273d..4530ffc 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5916,7 +5916,8 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, - unsigned MinSplatBits) { + unsigned MinSplatBits, + bool isBigEndian) { EVT VT = getValueType(0); assert(VT.isVector() && "Expected a vector type"); unsigned sz = VT.getSizeInBits(); @@ -5933,12 +5934,14 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, unsigned int nOps = getNumOperands(); assert(nOps > 0 && "isConstantSplat has 0-size build vector"); unsigned EltBitSize = VT.getVectorElementType().getSizeInBits(); - for (unsigned i = 0; i < nOps; ++i) { + + for (unsigned j = 0; j < nOps; ++j) { + unsigned i = isBigEndian ? nOps-1-j : j; SDValue OpVal = getOperand(i); - unsigned BitPos = i * EltBitSize; + unsigned BitPos = j * EltBitSize; if (OpVal.getOpcode() == ISD::UNDEF) - SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos +EltBitSize); + SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize); else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) SplatValue |= (APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize). zextOrTrunc(sz) << BitPos); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 3346246..099fcb5 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -637,7 +637,7 @@ bool PPC::isAllNegativeZeroVector(SDNode *N) { unsigned BitSize; bool HasAnyUndefs; - if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32)) + if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true)) if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) return CFP->getValueAPF().isNegZero(); @@ -3672,7 +3672,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { unsigned SplatBitSize; bool HasAnyUndefs; if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, - HasAnyUndefs) || SplatBitSize > 32) + HasAnyUndefs, 0, true) || SplatBitSize > 32) return SDValue(); unsigned SplatBits = APSplatBits.getZExtValue(); diff --git a/test/CodeGen/PowerPC/vec_splat_constant.ll b/test/CodeGen/PowerPC/vec_splat_constant.ll new file mode 100644 index 0000000..b227794 --- /dev/null +++ b/test/CodeGen/PowerPC/vec_splat_constant.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin -mcpu=g5 | FileCheck %s +; Formerly incorrectly inserted vsldoi (endian confusion) + +@baz = common global <16 x i8> zeroinitializer ; <<16 x i8>*> [#uses=1] + +define void @foo(<16 x i8> %x) nounwind ssp { +entry: +; CHECK: _foo: +; CHECK-NOT: vsldoi + %x_addr = alloca <16 x i8> ; <<16 x i8>*> [#uses=2] + %temp = alloca <16 x i8> ; <<16 x i8>*> [#uses=2] + %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + store <16 x i8> %x, <16 x i8>* %x_addr + store <16 x i8> <i8 0, i8 0, i8 0, i8 14, i8 0, i8 0, i8 0, i8 14, i8 0, i8 0, i8 0, i8 14, i8 0, i8 0, i8 0, i8 14>, <16 x i8>* %temp, align 16 + %0 = load <16 x i8>* %x_addr, align 16 ; <<16 x i8>> [#uses=1] + %1 = load <16 x i8>* %temp, align 16 ; <<16 x i8>> [#uses=1] + %tmp = add <16 x i8> %0, %1 ; <<16 x i8>> [#uses=1] + store <16 x i8> %tmp, <16 x i8>* @baz, align 16 + br label %return + +return: ; preds = %entry + ret void +; CHECK: blr +} |