diff options
author | Scott Michel <scottm@aero.org> | 2008-12-04 21:01:44 +0000 |
---|---|---|
committer | Scott Michel <scottm@aero.org> | 2008-12-04 21:01:44 +0000 |
commit | e2641a1721cd4cb21b3d247c986159a266f8b8ca (patch) | |
tree | e22bd5a13cb529b7f4fed5e5219a461399ff2d34 /lib/Target/CellSPU | |
parent | 760997765ebcc690c5db3e0f0258f7ff19855b1d (diff) | |
download | external_llvm-e2641a1721cd4cb21b3d247c986159a266f8b8ca.zip external_llvm-e2641a1721cd4cb21b3d247c986159a266f8b8ca.tar.gz external_llvm-e2641a1721cd4cb21b3d247c986159a266f8b8ca.tar.bz2 |
CellSPU: Fix bug 3055
- Add v4f32, v2f64 to LowerVECTOR_SHUFFLE
- Look for vector rotate in shuffle elements, generate a vector rotate
instead of a full-blown shuffle when opportunity presents itself.
- Generate larger test harness and fix a few interesting but obscure bugs.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@60552 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/CellSPU')
-rw-r--r-- | lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp | 1 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUISelLowering.cpp | 77 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUInstrInfo.td | 4 |
3 files changed, 59 insertions, 23 deletions
diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp index 31b7e60..94d8cd5 100644 --- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp +++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp @@ -221,7 +221,6 @@ namespace { void printPCRelativeOperand(const MachineInstr *MI, unsigned OpNo) { printOp(MI->getOperand(OpNo)); - O << "-."; } void printSymbolHi(const MachineInstr *MI, unsigned OpNo) { diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 17d88a9..2b35e76 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -548,7 +548,6 @@ AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST, alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize()); prefSlotOffs = (int) (alignOffs & 0xf); prefSlotOffs -= vtm->prefslot_byte; - basePtr = DAG.getRegister(SPU::R1, VT); } else { alignOffs = 0; prefSlotOffs = -vtm->prefslot_byte; @@ -1127,6 +1126,8 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { ArgOffset += StackSlotSize; } break; + case MVT::v2i64: + case MVT::v2f64: case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: @@ -1255,6 +1256,7 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { NumResults = 1; break; case MVT::v2f64: + case MVT::v2i64: case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: @@ -1747,38 +1749,64 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // If we have a single element being moved from V1 to V2, this can be handled // using the C*[DX] compute mask instructions, but the vector elements have // to be monotonically increasing with one exception element. - MVT EltVT = V1.getValueType().getVectorElementType(); + MVT VecVT = V1.getValueType(); + MVT EltVT = VecVT.getVectorElementType(); unsigned EltsFromV2 = 0; unsigned V2Elt = 0; unsigned V2EltIdx0 = 0; unsigned CurrElt = 0; + unsigned MaxElts = VecVT.getVectorNumElements(); + unsigned PrevElt = 0; + unsigned V0Elt = 0; bool monotonic = true; - if (EltVT == MVT::i8) + bool rotate = true; + + if (EltVT == MVT::i8) { V2EltIdx0 = 16; - else if (EltVT == MVT::i16) + } else if (EltVT == MVT::i16) { V2EltIdx0 = 8; - else if (EltVT == MVT::i32) + } else if (EltVT == MVT::i32 || EltVT == MVT::f32) { V2EltIdx0 = 4; - else + } else if (EltVT == MVT::i64 || EltVT == MVT::f64) { + V2EltIdx0 = 2; + } else assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE"); - for (unsigned i = 0, e = PermMask.getNumOperands(); - EltsFromV2 <= 1 && monotonic && i != e; - ++i) { - unsigned SrcElt; - if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF) - SrcElt = 0; - else - SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue(); + for (unsigned i = 0; i != PermMask.getNumOperands(); ++i) { + if (PermMask.getOperand(i).getOpcode() != ISD::UNDEF) { + unsigned SrcElt = cast<ConstantSDNode > (PermMask.getOperand(i))->getZExtValue(); - if (SrcElt >= V2EltIdx0) { - ++EltsFromV2; - V2Elt = (V2EltIdx0 - SrcElt) << 2; - } else if (CurrElt != SrcElt) { - monotonic = false; - } + if (monotonic) { + if (SrcElt >= V2EltIdx0) { + if (1 >= (++EltsFromV2)) { + V2Elt = (V2EltIdx0 - SrcElt) << 2; + } + } else if (CurrElt != SrcElt) { + monotonic = false; + } - ++CurrElt; + ++CurrElt; + } + + if (rotate) { + if (PrevElt > 0 && SrcElt < MaxElts) { + if ((PrevElt == SrcElt - 1) + || (PrevElt == MaxElts - 1 && SrcElt == 0)) { + PrevElt = SrcElt; + if (SrcElt == 0) + V0Elt = i; + } else { + rotate = false; + } + } else if (PrevElt == 0) { + // First time through, need to keep track of previous element + PrevElt = SrcElt; + } else { + // This isn't a rotation, takes elements from vector 2 + rotate = false; + } + } + } } if (EltsFromV2 == 1 && monotonic) { @@ -1797,6 +1825,11 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { DAG.getCopyFromReg(InitTempReg, VReg, PtrVT)); // Use shuffle mask in SHUFB synthetic instruction: return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp); + } else if (rotate) { + int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8; + + return DAG.getNode(SPUISD::ROTBYTES_LEFT, V1.getValueType(), + V1, DAG.getConstant(rotamt, MVT::i16)); } else { // Convert the SHUFFLE_VECTOR mask's input element units to the // actual bytes. @@ -2127,7 +2160,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { SDValue ShufMask[4]; for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) { - unsigned bidx = i / 4; + unsigned bidx = i * 4; unsigned int bits = ((ShufBytes[bidx] << 24) | (ShufBytes[bidx+1] << 16) | (ShufBytes[bidx+2] << 8) | diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index 03f79d3..678f8e9 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -2124,7 +2124,9 @@ multiclass RotateQuadLeftByBytes def v16i8: ROTQBYVecInst<v16i8>; def v8i16: ROTQBYVecInst<v8i16>; def v4i32: ROTQBYVecInst<v4i32>; + def v4f32: ROTQBYVecInst<v4f32>; def v2i64: ROTQBYVecInst<v2i64>; + def v2f64: ROTQBYVecInst<v2f64>; } defm ROTQBY: RotateQuadLeftByBytes; @@ -2147,7 +2149,9 @@ multiclass RotateQuadByBytesImm def v16i8: ROTQBYIVecInst<v16i8>; def v8i16: ROTQBYIVecInst<v8i16>; def v4i32: ROTQBYIVecInst<v4i32>; + def v4f32: ROTQBYIVecInst<v4f32>; def v2i64: ROTQBYIVecInst<v2i64>; + def vfi64: ROTQBYIVecInst<v2f64>; } defm ROTQBYI: RotateQuadByBytesImm; |