diff options
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 19 | ||||
-rw-r--r-- | lib/Target/X86/X86TargetTransformInfo.cpp | 4 | ||||
-rw-r--r-- | test/Analysis/CostModel/X86/cast.ll | 4 | ||||
-rwxr-xr-x | test/CodeGen/X86/avx-sext.ll | 21 |
4 files changed, 42 insertions, 6 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 10cf13f..5c2f651 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -11827,8 +11827,23 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, // fall through case MVT::v4i32: case MVT::v8i16: { - SDValue Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT, - Op.getOperand(0), ShAmt, DAG); + // (sext (vzext x)) -> (vsext x) + SDValue Op0 = Op.getOperand(0); + SDValue Op00 = Op0.getOperand(0); + SDValue Tmp1; + // Hopefully, this VECTOR_SHUFFLE is just a VZEXT. + if (Op0.getOpcode() == ISD::BITCAST && + Op00.getOpcode() == ISD::VECTOR_SHUFFLE) + Tmp1 = LowerVectorIntExtend(Op00, DAG); + if (Tmp1.getNode()) { + SDValue Tmp1Op0 = Tmp1.getOperand(0); + assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT && + "This optimization is invalid without a VZEXT."); + return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0)); + } + + // If the above didn't work, then just use Shift-Left + Shift-Right. + Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT, Op0, ShAmt, DAG); return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, Tmp1, ShAmt, DAG); } } diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index be2a997..777ef50 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -257,8 +257,8 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 8 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 8 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 8 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 6 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 }, { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 }, }; diff --git a/test/Analysis/CostModel/X86/cast.ll b/test/Analysis/CostModel/X86/cast.ll index bacc778..c8d0f6f 100644 --- a/test/Analysis/CostModel/X86/cast.ll +++ b/test/Analysis/CostModel/X86/cast.ll @@ -44,9 +44,9 @@ define i32 @zext_sext(<8 x i1> %in) { %B = zext <8 x i16> undef to <8 x i32> ;CHECK: cost of 1 {{.*}} sext %C = sext <4 x i32> undef to <4 x i64> - ;CHECK: cost of 8 {{.*}} sext + ;CHECK: cost of 6 {{.*}} sext %C1 = sext <4 x i8> undef to <4 x i64> - ;CHECK: cost of 8 {{.*}} sext + ;CHECK: cost of 6 {{.*}} sext %C2 = sext <4 x i16> undef to <4 x i64> ;CHECK: cost of 1 {{.*}} zext diff --git a/test/CodeGen/X86/avx-sext.ll b/test/CodeGen/X86/avx-sext.ll index 7ae0d36..b9c7000 100755 --- a/test/CodeGen/X86/avx-sext.ll +++ b/test/CodeGen/X86/avx-sext.ll @@ -165,3 +165,24 @@ define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) { ret <4 x i64> %extmask } +; AVX: sext_4i8_to_4i64 +; AVX: vpmovsxbd +; AVX: vpmovsxdq +; AVX: vpmovsxdq +; AVX: ret +define <4 x i64> @load_sext_4i8_to_4i64(<4 x i8> *%ptr) { + %X = load <4 x i8>* %ptr + %Y = sext <4 x i8> %X to <4 x i64> + ret <4 x i64>%Y +} + +; AVX: sext_4i16_to_4i64 +; AVX: vpmovsxwd +; AVX: vpmovsxdq +; AVX: vpmovsxdq +; AVX: ret +define <4 x i64> @load_sext_4i16_to_4i64(<4 x i16> *%ptr) { + %X = load <4 x i16>* %ptr + %Y = sext <4 x i16> %X to <4 x i64> + ret <4 x i64>%Y +} |