diff options
author | Rafael Espindola <rafael.espindola@gmail.com> | 2013-05-31 14:27:15 +0000 |
---|---|---|
committer | Rafael Espindola <rafael.espindola@gmail.com> | 2013-05-31 14:27:15 +0000 |
commit | 4f3d7eea048c5d665436b8bd7a59739bcba5ec0b (patch) | |
tree | 0eada9be29a965d98abb7bf50274ba9ffa99c7b2 | |
parent | e93c701cac2ac62bcd390b978604da76be9967d0 (diff) | |
download | external_llvm-4f3d7eea048c5d665436b8bd7a59739bcba5ec0b.zip external_llvm-4f3d7eea048c5d665436b8bd7a59739bcba5ec0b.tar.gz external_llvm-4f3d7eea048c5d665436b8bd7a59739bcba5ec0b.tar.bz2 |
Simplify multiplications by vectors whose elements are powers of 2.
Patch by Andrea Di Biagio.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183005 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineMulDivRem.cpp | 64 | ||||
-rw-r--r-- | test/Transforms/InstCombine/vector-mul.ll | 408 |
2 files changed, 456 insertions, 16 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 2628f4b..2761bc2 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -95,6 +95,25 @@ static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) { return MulExt.slt(Min) || MulExt.sgt(Max); } +/// \brief A helper routine of InstCombiner::visitMul(). +/// +/// If C is a vector of known powers of 2, then this function returns +/// a new vector obtained from C replacing each element with its logBase2. +/// Return a null pointer otherwise. +static Constant *getLogBase2Vector(ConstantDataVector *CV) { + const APInt *IVal; + SmallVector<Constant *, 4> Elts; + + for (unsigned I = 0, E = CV->getNumElements(); I != E; ++I) { + Constant *Elt = CV->getElementAsConstant(I); + if (!match(Elt, m_APInt(IVal)) || !IVal->isPowerOf2()) + return 0; + Elts.push_back(ConstantInt::get(Elt->getType(), IVal->logBase2())); + } + + return ConstantVector::get(Elts); +} + Instruction *InstCombiner::visitMul(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); @@ -108,24 +127,37 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { if (match(Op1, m_AllOnes())) // X * -1 == 0 - X return BinaryOperator::CreateNeg(Op0, I.getName()); - if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { - - // ((X << C1)*C2) == (X * (C2 << C1)) - if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0)) - if (SI->getOpcode() == Instruction::Shl) - if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1))) - return BinaryOperator::CreateMul(SI->getOperand(0), - ConstantExpr::getShl(CI, ShOp)); - - const APInt &Val = CI->getValue(); - if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C - Constant *NewCst = ConstantInt::get(Op0->getType(), Val.logBase2()); - BinaryOperator *Shl = BinaryOperator::CreateShl(Op0, NewCst); - if (I.hasNoSignedWrap()) Shl->setHasNoSignedWrap(); - if (I.hasNoUnsignedWrap()) Shl->setHasNoUnsignedWrap(); - return Shl; + // Also allow combining multiply instructions on vectors. + { + Value *NewOp; + Constant *C1, *C2; + const APInt *IVal; + if (match(&I, m_Mul(m_Shl(m_Value(NewOp), m_Constant(C2)), + m_Constant(C1))) && + match(C1, m_APInt(IVal))) + // ((X << C1)*C2) == (X * (C2 << C1)) + return BinaryOperator::CreateMul(NewOp, ConstantExpr::getShl(C1, C2)); + + if (match(&I, m_Mul(m_Value(NewOp), m_Constant(C1)))) { + Constant *NewCst = 0; + if (match(C1, m_APInt(IVal)) && IVal->isPowerOf2()) + // Replace X*(2^C) with X << C, where C is either a scalar or a splat. + NewCst = ConstantInt::get(NewOp->getType(), IVal->logBase2()); + else if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(C1)) + // Replace X*(2^C) with X << C, where C is a vector of known + // constant powers of 2. + NewCst = getLogBase2Vector(CV); + + if (NewCst) { + BinaryOperator *Shl = BinaryOperator::CreateShl(NewOp, NewCst); + if (I.hasNoSignedWrap()) Shl->setHasNoSignedWrap(); + if (I.hasNoUnsignedWrap()) Shl->setHasNoUnsignedWrap(); + return Shl; + } } + } + if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { // Canonicalize (X+C1)*CI -> X*CI+C1*CI. { Value *X; ConstantInt *C1; if (Op0->hasOneUse() && diff --git a/test/Transforms/InstCombine/vector-mul.ll b/test/Transforms/InstCombine/vector-mul.ll new file mode 100644 index 0000000..4e4417f --- /dev/null +++ b/test/Transforms/InstCombine/vector-mul.ll @@ -0,0 +1,408 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +; Check that instcombine rewrites multiply by a vector +; of known constant power-of-2 elements with vector shift. + +define <4 x i8> @Zero_i8(<4 x i8> %InVec) { +entry: + %mul = mul <4 x i8> %InVec, <i8 0, i8 0, i8 0, i8 0> + ret <4 x i8> %mul +} + +; CHECK: @Zero_i8 +; CHECK: ret <4 x i8> zeroinitializer + +define <4 x i8> @Identity_i8(<4 x i8> %InVec) { +entry: + %mul = mul <4 x i8> %InVec, <i8 1, i8 1, i8 1, i8 1> + ret <4 x i8> %mul +} + +; CHECK: @Identity_i8 +; CHECK: ret <4 x i8> %InVec + +define <4 x i8> @AddToSelf_i8(<4 x i8> %InVec) { +entry: + %mul = mul <4 x i8> %InVec, <i8 2, i8 2, i8 2, i8 2> + ret <4 x i8> %mul +} + +; CHECK: @AddToSelf_i8 +; CHECK: shl <4 x i8> %InVec, <i8 1, i8 1, i8 1, i8 1> +; CHECK: ret + +define <4 x i8> @SplatPow2Test1_i8(<4 x i8> %InVec) { +entry: + %mul = mul <4 x i8> %InVec, <i8 4, i8 4, i8 4, i8 4> + ret <4 x i8> %mul +} + +; CHECK: @SplatPow2Test1_i8 +; CHECK: shl <4 x i8> %InVec, <i8 2, i8 2, i8 2, i8 2> +; CHECK: ret + +define <4 x i8> @SplatPow2Test2_i8(<4 x i8> %InVec) { +entry: + %mul = mul <4 x i8> %InVec, <i8 8, i8 8, i8 8, i8 8> + ret <4 x i8> %mul +} + +; CHECK: @SplatPow2Test2_i8 +; CHECK: shl <4 x i8> %InVec, <i8 3, i8 3, i8 3, i8 3> +; CHECK: ret + +define <4 x i8> @MulTest1_i8(<4 x i8> %InVec) { +entry: + %mul = mul <4 x i8> %InVec, <i8 1, i8 2, i8 4, i8 8> + ret <4 x i8> %mul +} + +; CHECK: @MulTest1_i8 +; CHECK: shl <4 x i8> %InVec, <i8 0, i8 1, i8 2, i8 3> +; CHECK: ret + +define <4 x i8> @MulTest2_i8(<4 x i8> %InVec) { +entry: + %mul = mul <4 x i8> %InVec, <i8 3, i8 3, i8 3, i8 3> + ret <4 x i8> %mul +} + +; CHECK: @MulTest2_i8 +; CHECK: mul <4 x i8> %InVec, <i8 3, i8 3, i8 3, i8 3> +; CHECK: ret + +define <4 x i8> @MulTest3_i8(<4 x i8> %InVec) { +entry: + %mul = mul <4 x i8> %InVec, <i8 4, i8 4, i8 2, i8 2> + ret <4 x i8> %mul +} + +; CHECK: @MulTest3_i8 +; CHECK: shl <4 x i8> %InVec, <i8 2, i8 2, i8 1, i8 1> +; CHECK: ret + + +define <4 x i8> @MulTest4_i8(<4 x i8> %InVec) { +entry: + %mul = mul <4 x i8> %InVec, <i8 4, i8 4, i8 0, i8 1> + ret <4 x i8> %mul +} + +; CHECK: @MulTest4_i8 +; CHECK: mul <4 x i8> %InVec, <i8 4, i8 4, i8 0, i8 1> +; CHECK: ret + +define <4 x i16> @Zero_i16(<4 x i16> %InVec) { +entry: + %mul = mul <4 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0> + ret <4 x i16> %mul +} + +; CHECK: @Zero_i16 +; CHECK: ret <4 x i16> zeroinitializer + +define <4 x i16> @Identity_i16(<4 x i16> %InVec) { +entry: + %mul = mul <4 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1> + ret <4 x i16> %mul +} + +; CHECK: @Identity_i16 +; CHECK: ret <4 x i16> %InVec + +define <4 x i16> @AddToSelf_i16(<4 x i16> %InVec) { +entry: + %mul = mul <4 x i16> %InVec, <i16 2, i16 2, i16 2, i16 2> + ret <4 x i16> %mul +} + +; CHECK: @AddToSelf_i16 +; CHECK: shl <4 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1> +; CHECK: ret + +define <4 x i16> @SplatPow2Test1_i16(<4 x i16> %InVec) { +entry: + %mul = mul <4 x i16> %InVec, <i16 4, i16 4, i16 4, i16 4> + ret <4 x i16> %mul +} + +; CHECK: @SplatPow2Test1_i16 +; CHECK: shl <4 x i16> %InVec, <i16 2, i16 2, i16 2, i16 2> +; CHECK: ret + +define <4 x i16> @SplatPow2Test2_i16(<4 x i16> %InVec) { +entry: + %mul = mul <4 x i16> %InVec, <i16 8, i16 8, i16 8, i16 8> + ret <4 x i16> %mul +} + +; CHECK: @SplatPow2Test2_i16 +; CHECK: shl <4 x i16> %InVec, <i16 3, i16 3, i16 3, i16 3> +; CHECK: ret + +define <4 x i16> @MulTest1_i16(<4 x i16> %InVec) { +entry: + %mul = mul <4 x i16> %InVec, <i16 1, i16 2, i16 4, i16 8> + ret <4 x i16> %mul +} + +; CHECK: @MulTest1_i16 +; CHECK: shl <4 x i16> %InVec, <i16 0, i16 1, i16 2, i16 3> +; CHECK: ret + +define <4 x i16> @MulTest2_i16(<4 x i16> %InVec) { +entry: + %mul = mul <4 x i16> %InVec, <i16 3, i16 3, i16 3, i16 3> + ret <4 x i16> %mul +} + +; CHECK: @MulTest2_i16 +; CHECK: mul <4 x i16> %InVec, <i16 3, i16 3, i16 3, i16 3> +; CHECK: ret + +define <4 x i16> @MulTest3_i16(<4 x i16> %InVec) { +entry: + %mul = mul <4 x i16> %InVec, <i16 4, i16 4, i16 2, i16 2> + ret <4 x i16> %mul +} + +; CHECK: @MulTest3_i16 +; CHECK: shl <4 x i16> %InVec, <i16 2, i16 2, i16 1, i16 1> +; CHECK: ret + +define <4 x i16> @MulTest4_i16(<4 x i16> %InVec) { +entry: + %mul = mul <4 x i16> %InVec, <i16 4, i16 4, i16 0, i16 2> + ret <4 x i16> %mul +} + +; CHECK: @MulTest4_i16 +; CHECK: mul <4 x i16> %InVec, <i16 4, i16 4, i16 0, i16 2> +; CHECK: ret + +define <4 x i32> @Zero_i32(<4 x i32> %InVec) { +entry: + %mul = mul <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0> + ret <4 x i32> %mul +} + +; CHECK: @Zero_i32 +; CHECK: ret <4 x i32> zeroinitializer + +define <4 x i32> @Identity_i32(<4 x i32> %InVec) { +entry: + %mul = mul <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1> + ret <4 x i32> %mul +} + +; CHECK: @Identity_i32 +; CHECK: ret <4 x i32> %InVec + +define <4 x i32> @AddToSelf_i32(<4 x i32> %InVec) { +entry: + %mul = mul <4 x i32> %InVec, <i32 2, i32 2, i32 2, i32 2> + ret <4 x i32> %mul +} + +; CHECK: @AddToSelf_i32 +; CHECK: shl <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1> +; CHECK: ret + + +define <4 x i32> @SplatPow2Test1_i32(<4 x i32> %InVec) { +entry: + %mul = mul <4 x i32> %InVec, <i32 4, i32 4, i32 4, i32 4> + ret <4 x i32> %mul +} + +; CHECK: @SplatPow2Test1_i32 +; CHECK: shl <4 x i32> %InVec, <i32 2, i32 2, i32 2, i32 2> +; CHECK: ret + +define <4 x i32> @SplatPow2Test2_i32(<4 x i32> %InVec) { +entry: + %mul = mul <4 x i32> %InVec, <i32 8, i32 8, i32 8, i32 8> + ret <4 x i32> %mul +} + +; CHECK: @SplatPow2Test2_i32 +; CHECK: shl <4 x i32> %InVec, <i32 3, i32 3, i32 3, i32 3> +; CHECK: ret + +define <4 x i32> @MulTest1_i32(<4 x i32> %InVec) { +entry: + %mul = mul <4 x i32> %InVec, <i32 1, i32 2, i32 4, i32 8> + ret <4 x i32> %mul +} + +; CHECK: @MulTest1_i32 +; CHECK: shl <4 x i32> %InVec, <i32 0, i32 1, i32 2, i32 3> +; CHECK: ret + +define <4 x i32> @MulTest2_i32(<4 x i32> %InVec) { +entry: + %mul = mul <4 x i32> %InVec, <i32 3, i32 3, i32 3, i32 3> + ret <4 x i32> %mul +} + +; CHECK: @MulTest2_i32 +; CHECK: mul <4 x i32> %InVec, <i32 3, i32 3, i32 3, i32 3> +; CHECK: ret + +define <4 x i32> @MulTest3_i32(<4 x i32> %InVec) { +entry: + %mul = mul <4 x i32> %InVec, <i32 4, i32 4, i32 2, i32 2> + ret <4 x i32> %mul +} + +; CHECK: @MulTest3_i32 +; CHECK: shl <4 x i32> %InVec, <i32 2, i32 2, i32 1, i32 1> +; CHECK: ret + + +define <4 x i32> @MulTest4_i32(<4 x i32> %InVec) { +entry: + %mul = mul <4 x i32> %InVec, <i32 4, i32 4, i32 0, i32 1> + ret <4 x i32> %mul +} + +; CHECK: @MulTest4_i32 +; CHECK: mul <4 x i32> %InVec, <i32 4, i32 4, i32 0, i32 1> +; CHECK: ret + +define <4 x i64> @Zero_i64(<4 x i64> %InVec) { +entry: + %mul = mul <4 x i64> %InVec, <i64 0, i64 0, i64 0, i64 0> + ret <4 x i64> %mul +} + +; CHECK: @Zero_i64 +; CHECK: ret <4 x i64> zeroinitializer + +define <4 x i64> @Identity_i64(<4 x i64> %InVec) { +entry: + %mul = mul <4 x i64> %InVec, <i64 1, i64 1, i64 1, i64 1> + ret <4 x i64> %mul +} + +; CHECK: @Identity_i64 +; CHECK: ret <4 x i64> %InVec + +define <4 x i64> @AddToSelf_i64(<4 x i64> %InVec) { +entry: + %mul = mul <4 x i64> %InVec, <i64 2, i64 2, i64 2, i64 2> + ret <4 x i64> %mul +} + +; CHECK: @AddToSelf_i64 +; CHECK: shl <4 x i64> %InVec, <i64 1, i64 1, i64 1, i64 1> +; CHECK: ret + +define <4 x i64> @SplatPow2Test1_i64(<4 x i64> %InVec) { +entry: + %mul = mul <4 x i64> %InVec, <i64 4, i64 4, i64 4, i64 4> + ret <4 x i64> %mul +} + +; CHECK: @SplatPow2Test1_i64 +; CHECK: shl <4 x i64> %InVec, <i64 2, i64 2, i64 2, i64 2> +; CHECK: ret + +define <4 x i64> @SplatPow2Test2_i64(<4 x i64> %InVec) { +entry: + %mul = mul <4 x i64> %InVec, <i64 8, i64 8, i64 8, i64 8> + ret <4 x i64> %mul +} + +; CHECK: @SplatPow2Test2_i64 +; CHECK: shl <4 x i64> %InVec, <i64 3, i64 3, i64 3, i64 3> +; CHECK: ret + +define <4 x i64> @MulTest1_i64(<4 x i64> %InVec) { +entry: + %mul = mul <4 x i64> %InVec, <i64 1, i64 2, i64 4, i64 8> + ret <4 x i64> %mul +} + +; CHECK: @MulTest1_i64 +; CHECK: shl <4 x i64> %InVec, <i64 0, i64 1, i64 2, i64 3> +; CHECK: ret + +define <4 x i64> @MulTest2_i64(<4 x i64> %InVec) { +entry: + %mul = mul <4 x i64> %InVec, <i64 3, i64 3, i64 3, i64 3> + ret <4 x i64> %mul +} + +; CHECK: @MulTest2_i64 +; CHECK: mul <4 x i64> %InVec, <i64 3, i64 3, i64 3, i64 3> +; CHECK: ret + +define <4 x i64> @MulTest3_i64(<4 x i64> %InVec) { +entry: + %mul = mul <4 x i64> %InVec, <i64 4, i64 4, i64 2, i64 2> + ret <4 x i64> %mul +} + +; CHECK: @MulTest3_i64 +; CHECK: shl <4 x i64> %InVec, <i64 2, i64 2, i64 1, i64 1> +; CHECK: ret + +define <4 x i64> @MulTest4_i64(<4 x i64> %InVec) { +entry: + %mul = mul <4 x i64> %InVec, <i64 4, i64 4, i64 0, i64 1> + ret <4 x i64> %mul +} + +; CHECK: @MulTest4_i64 +; CHECK: mul <4 x i64> %InVec, <i64 4, i64 4, i64 0, i64 1> +; CHECK: ret + +; Test also that the following rewriting rule works with vectors +; of integers as well: +; ((X << C1)*C2) == (X * (C2 << C1)) + +define <4 x i8> @ShiftMulTest1(<4 x i8> %InVec) { +entry: + %shl = shl <4 x i8> %InVec, <i8 2, i8 2, i8 2, i8 2> + %mul = mul <4 x i8> %shl, <i8 3, i8 3, i8 3, i8 3> + ret <4 x i8> %mul +} + +; CHECK: @ShiftMulTest1 +; CHECK: mul <4 x i8> %InVec, <i8 12, i8 12, i8 12, i8 12> +; CHECK: ret + +define <4 x i16> @ShiftMulTest2(<4 x i16> %InVec) { +entry: + %shl = shl <4 x i16> %InVec, <i16 2, i16 2, i16 2, i16 2> + %mul = mul <4 x i16> %shl, <i16 3, i16 3, i16 3, i16 3> + ret <4 x i16> %mul +} + +; CHECK: @ShiftMulTest2 +; CHECK: mul <4 x i16> %InVec, <i16 12, i16 12, i16 12, i16 12> +; CHECK: ret + +define <4 x i32> @ShiftMulTest3(<4 x i32> %InVec) { +entry: + %shl = shl <4 x i32> %InVec, <i32 2, i32 2, i32 2, i32 2> + %mul = mul <4 x i32> %shl, <i32 3, i32 3, i32 3, i32 3> + ret <4 x i32> %mul +} + +; CHECK: @ShiftMulTest3 +; CHECK: mul <4 x i32> %InVec, <i32 12, i32 12, i32 12, i32 12> +; CHECK: ret + +define <4 x i64> @ShiftMulTest4(<4 x i64> %InVec) { +entry: + %shl = shl <4 x i64> %InVec, <i64 2, i64 2, i64 2, i64 2> + %mul = mul <4 x i64> %shl, <i64 3, i64 3, i64 3, i64 3> + ret <4 x i64> %mul +} + +; CHECK: @ShiftMulTest4 +; CHECK: mul <4 x i64> %InVec, <i64 12, i64 12, i64 12, i64 12> +; CHECK: ret + |