diff options
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.cpp | 38 | ||||
-rw-r--r-- | test/CodeGen/ARM/vector-DAGCombine.ll | 27 |
2 files changed, 64 insertions, 1 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 4c9d2da..9d7a379 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -4294,6 +4294,21 @@ static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ return true; } +/// \return true if this is a reverse operation on an vector. +static bool isReverseMask(ArrayRef<int> M, EVT VT) { + unsigned NumElts = VT.getVectorNumElements(); + // Make sure the mask has the right size. + if (NumElts != M.size()) + return false; + + // Look for <15, ..., 3, -1, 1, 0>. + for (unsigned i = 0; i != NumElts; ++i) + if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i)) + return false; + + return true; +} + // If N is an integer constant that can be moved into a register in one // instruction, return an SDValue of such a constant (will become a MOV // instruction). Otherwise return null. @@ -4689,7 +4704,8 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, isVZIPMask(M, VT, WhichResult) || isVTRN_v_undef_Mask(M, VT, WhichResult) || isVUZP_v_undef_Mask(M, VT, WhichResult) || - isVZIP_v_undef_Mask(M, VT, WhichResult)); + isVZIP_v_undef_Mask(M, VT, WhichResult) || + ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT))); } /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit @@ -4793,6 +4809,23 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, &VTBLMask[0], 8)); } +static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, + SelectionDAG &DAG) { + DebugLoc DL = Op.getDebugLoc(); + SDValue OpLHS = Op.getOperand(0); + EVT VT = OpLHS.getValueType(); + + assert((VT == MVT::v8i16 || VT == MVT::v16i8) && + "Expect an v8i16/v16i8 type"); + OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS); + // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now, + // extract the first 8 bytes into the top double word and the last 8 bytes + // into the bottom double word. The v8i16 case is similar. + unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4; + return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS, + DAG.getConstant(ExtractNum, MVT::i32)); +} + static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); @@ -4930,6 +4963,9 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::BITCAST, dl, VT, Val); } + if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT)) + return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG); + if (VT == MVT::v8i8) { SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG); if (NewOp.getNode()) diff --git a/test/CodeGen/ARM/vector-DAGCombine.ll b/test/CodeGen/ARM/vector-DAGCombine.ll index a38a0fe..42964de 100644 --- a/test/CodeGen/ARM/vector-DAGCombine.ll +++ b/test/CodeGen/ARM/vector-DAGCombine.ll @@ -133,3 +133,30 @@ define i16 @foldBuildVectors() { %3 = extractelement <8 x i16> %2, i32 0 ret i16 %3 } + +; Test that we are generating vrev and vext for reverse shuffles of v8i16 +; shuffles. +; CHECK: reverse_v8i16 +define void @reverse_v8i16(<8 x i16>* %loadaddr, <8 x i16>* %storeaddr) { + %v0 = load <8 x i16>* %loadaddr + ; CHECK: vrev64.16 + ; CHECK: vext.16 + %v1 = shufflevector <8 x i16> %v0, <8 x i16> undef, + <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + store <8 x i16> %v1, <8 x i16>* %storeaddr + ret void +} + +; Test that we are generating vrev and vext for reverse shuffles of v16i8 +; shuffles. +; CHECK: reverse_v16i8 +define void @reverse_v16i8(<16 x i8>* %loadaddr, <16 x i8>* %storeaddr) { + %v0 = load <16 x i8>* %loadaddr + ; CHECK: vrev64.8 + ; CHECK: vext.8 + %v1 = shufflevector <16 x i8> %v0, <16 x i8> undef, + <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, + i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + store <16 x i8> %v1, <16 x i8>* %storeaddr + ret void +} |