diff options
-rw-r--r-- | include/llvm/Constants.h | 5 | ||||
-rw-r--r-- | include/llvm/Instruction.h | 21 | ||||
-rw-r--r-- | lib/Transforms/Scalar/Reassociate.cpp | 229 | ||||
-rw-r--r-- | lib/VMCore/Constants.cpp | 20 | ||||
-rw-r--r-- | lib/VMCore/Instruction.cpp | 23 | ||||
-rw-r--r-- | test/Transforms/Reassociate/repeats.ll | 252 |
6 files changed, 525 insertions, 25 deletions
diff --git a/include/llvm/Constants.h b/include/llvm/Constants.h index 0abe17d..a3f5b95 100644 --- a/include/llvm/Constants.h +++ b/include/llvm/Constants.h @@ -917,6 +917,11 @@ public: return getLShr(C1, C2, true); } + /// getBinOpIdentity - Return the identity for the given binary operation, + /// i.e. a constant C such that X op C = X and C op X = X for every X. It + /// is an error to call this for an operation that doesn't have an identity. + static Constant *getBinOpIdentity(unsigned Opcode, Type *Ty); + /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant); diff --git a/include/llvm/Instruction.h b/include/llvm/Instruction.h index 9c5ac44..a386d1d 100644 --- a/include/llvm/Instruction.h +++ b/include/llvm/Instruction.h @@ -215,6 +215,27 @@ public: bool isCommutative() const { return isCommutative(getOpcode()); } static bool isCommutative(unsigned op); + /// isIdempotent - Return true if the instruction is idempotent: + /// + /// Idempotent operators satisfy: x op x === x + /// + /// In LLVM, the And and Or operators are idempotent. + /// + bool isIdempotent() const { return isIdempotent(getOpcode()); } + static bool isIdempotent(unsigned op); + + /// isNilpotent - Return true if the instruction is nilpotent: + /// + /// Nilpotent operators satisfy: x op x === Id, + /// + /// where Id is the identity for the operator, i.e. a constant such that + /// x op Id === x and Id op x === x for all x. + /// + /// In LLVM, the Xor operator is nilpotent. + /// + bool isNilpotent() const { return isNilpotent(getOpcode()); } + static bool isNilpotent(unsigned op); + /// mayWriteToMemory - Return true if this instruction may modify memory. /// bool mayWriteToMemory() const; diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index 275d19a..50de946 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -143,7 +143,6 @@ namespace { Value *buildMinimalMultiplyDAG(IRBuilder<> &Builder, SmallVectorImpl<Factor> &Factors); Value *OptimizeMul(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops); - void LinearizeExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops); Value *RemoveFactorFromExpression(Value *V, Value *Factor); void EraseInst(Instruction *I); void OptimizeInst(Instruction *I); @@ -251,10 +250,148 @@ static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) { return Res; } +/// CarmichaelShift - Returns k such that lambda(2^Bitwidth) = 2^k, where lambda +/// is the Carmichael function. This means that x^(2^k) === 1 mod 2^Bitwidth for +/// every odd x, i.e. x^(2^k) = 1 for every odd x in Bitwidth-bit arithmetic. +/// Note that 0 <= k < Bitwidth, and if Bitwidth > 3 then x^(2^k) = 0 for every +/// even x in Bitwidth-bit arithmetic. +static unsigned CarmichaelShift(unsigned Bitwidth) { + if (Bitwidth < 3) + return Bitwidth - 1; + return Bitwidth - 2; +} + +/// IncorporateWeight - Add the extra weight 'RHS' to the existing weight 'LHS', +/// reducing the combined weight using any special properties of the operation. +/// The existing weight LHS represents the computation X op X op ... op X where +/// X occurs LHS times. The combined weight represents X op X op ... op X with +/// X occurring LHS + RHS times. If op is "Xor" for example then the combined +/// operation is equivalent to X if LHS + RHS is odd, or 0 if LHS + RHS is even; +/// the routine returns 1 in LHS in the first case, and 0 in LHS in the second. +static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode) { + // If we were working with infinite precision arithmetic then the combined + // weight would be LHS + RHS. But we are using finite precision arithmetic, + // and the APInt sum LHS + RHS may not be correct if it wraps (it is correct + // for nilpotent operations and addition, but not for idempotent operations + // and multiplication), so it is important to correctly reduce the combined + // weight back into range if wrapping would be wrong. + + // If RHS is zero then the weight didn't change. + if (RHS.isMinValue()) + return; + // If LHS is zero then the combined weight is RHS. + if (LHS.isMinValue()) { + LHS = RHS; + return; + } + // From this point on we know that neither LHS nor RHS is zero. + + if (Instruction::isIdempotent(Opcode)) { + // Idempotent means X op X === X, so any non-zero weight is equivalent to a + // weight of 1. Keeping weights at zero or one also means that wrapping is + // not a problem. + assert(LHS == 1 && RHS == 1 && "Weights not reduced!"); + return; // Return a weight of 1. + } + if (Instruction::isNilpotent(Opcode)) { + // Nilpotent means X op X === 0, so reduce weights modulo 2. + assert(LHS == 1 && RHS == 1 && "Weights not reduced!"); + LHS = 0; // 1 + 1 === 0 modulo 2. + return; + } + if (Opcode == Instruction::Add) { + // TODO: Reduce the weight by exploiting nsw/nuw? + LHS += RHS; + return; + } + + assert(Opcode == Instruction::Mul && "Unknown associative operation!"); + unsigned Bitwidth = LHS.getBitWidth(); + // If CM is the Carmichael number then a weight W satisfying W >= CM+Bitwidth + // can be replaced with W-CM. That's because x^W=x^(W-CM) for every Bitwidth + // bit number x, since either x is odd in which case x^CM = 1, or x is even in + // which case both x^W and x^(W - CM) are zero. By subtracting off multiples + // of CM like this weights can always be reduced to the range [0, CM+Bitwidth) + // which by a happy accident means that they can always be represented using + // Bitwidth bits. + // TODO: Reduce the weight by exploiting nsw/nuw? (Could do much better than + // the Carmichael number). + if (Bitwidth > 3) { + /// CM - The value of Carmichael's lambda function. + APInt CM = APInt::getOneBitSet(Bitwidth, CarmichaelShift(Bitwidth)); + // Any weight W >= Threshold can be replaced with W - CM. + APInt Threshold = CM + Bitwidth; + assert(LHS.ult(Threshold) && RHS.ult(Threshold) && "Weights not reduced!"); + // For Bitwidth 4 or more the following sum does not overflow. + LHS += RHS; + while (LHS.uge(Threshold)) + LHS -= CM; + } else { + // To avoid problems with overflow do everything the same as above but using + // a larger type. + unsigned CM = 1U << CarmichaelShift(Bitwidth); + unsigned Threshold = CM + Bitwidth; + assert(LHS.getZExtValue() < Threshold && RHS.getZExtValue() < Threshold && + "Weights not reduced!"); + unsigned Total = LHS.getZExtValue() + RHS.getZExtValue(); + while (Total >= Threshold) + Total -= CM; + LHS = Total; + } +} + +/// EvaluateRepeatedConstant - Compute C op C op ... op C where the constant C +/// is repeated Weight times. +static Constant *EvaluateRepeatedConstant(unsigned Opcode, Constant *C, + APInt Weight) { + // For addition the result can be efficiently computed as the product of the + // constant and the weight. + if (Opcode == Instruction::Add) + return ConstantExpr::getMul(C, ConstantInt::get(C->getContext(), Weight)); + + // The weight might be huge, so compute by repeated squaring to ensure that + // compile time is proportional to the logarithm of the weight. + Constant *Result = 0; + Constant *Power = C; // Successively C, C op C, (C op C) op (C op C) etc. + // Visit the bits in Weight. + while (Weight != 0) { + // If the current bit in Weight is non-zero do Result = Result op Power. + if (Weight[0]) + Result = Result ? ConstantExpr::get(Opcode, Result, Power) : Power; + // Move on to the next bit if any more are non-zero. + Weight = Weight.lshr(1); + if (Weight.isMinValue()) + break; + // Square the power. + Power = ConstantExpr::get(Opcode, Power, Power); + } + + assert(Result && "Only positive weights supported!"); + return Result; +} + +typedef std::pair<Value*, APInt> RepeatedValue; + /// LinearizeExprTree - Given an associative binary expression, return the leaf -/// nodes in Ops. The original expression is the same as Ops[0] op ... Ops[N]. -/// Note that a node may occur multiple times in Ops, but if so all occurrences -/// are consecutive in the vector. +/// nodes in Ops along with their weights (how many times the leaf occurs). The +/// original expression is the same as +/// (Ops[0].first op Ops[0].first op ... Ops[0].first) <- Ops[0].second times +/// op +/// (Ops[1].first op Ops[1].first op ... Ops[1].first) <- Ops[1].second times +/// op +/// ... +/// op +/// (Ops[N].first op Ops[N].first op ... Ops[N].first) <- Ops[N].second times +/// +/// Note that the values Ops[0].first, ..., Ops[N].first are all distinct, and +/// they are all non-constant except possibly for the last one, which if it is +/// constant will have weight one (Ops[N].second === 1). +/// +/// This routine may modify the function, in which case it returns 'true'. The +/// changes it makes may well be destructive, changing the value computed by 'I' +/// to something completely different. Thus if the routine returns 'true' then +/// you MUST either replace I with a new expression computed from the Ops array, +/// or use RewriteExprTree to put the values back in. /// /// A leaf node is either not a binary operation of the same kind as the root /// node 'I' (i.e. is not a binary operator at all, or is, but with a different @@ -276,7 +413,7 @@ static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) { /// + * | F, G /// /// The leaf nodes are C, E, F and G. The Ops array will contain (maybe not in -/// that order) C, E, F, F, G, G. +/// that order) (C, 1), (E, 1), (F, 2), (G, 2). /// /// The expression is maximal: if some instruction is a binary operator of the /// same kind as 'I', and all of its uses are non-leaf nodes of the expression, @@ -287,7 +424,8 @@ static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) { /// order to ensure that every non-root node in the expression has *exactly one* /// use by a non-leaf node of the expression. This destruction means that the /// caller MUST either replace 'I' with a new expression or use something like -/// RewriteExprTree to put the values back in. +/// RewriteExprTree to put the values back in if the routine indicates that it +/// made a change by returning 'true'. /// /// In the above example either the right operand of A or the left operand of B /// will be replaced by undef. If it is B's operand then this gives: @@ -310,9 +448,14 @@ static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) { /// of the expression) if it can turn them into binary operators of the right /// type and thus make the expression bigger. -void Reassociate::LinearizeExprTree(BinaryOperator *I, - SmallVectorImpl<ValueEntry> &Ops) { +static bool LinearizeExprTree(BinaryOperator *I, + SmallVectorImpl<RepeatedValue> &Ops) { DEBUG(dbgs() << "LINEARIZE: " << *I << '\n'); + unsigned Bitwidth = I->getType()->getScalarType()->getPrimitiveSizeInBits(); + unsigned Opcode = I->getOpcode(); + assert(Instruction::isAssociative(Opcode) && + Instruction::isCommutative(Opcode) && + "Expected an associative and commutative operation!"); // Visit all operands of the expression, keeping track of their weight (the // number of paths from the expression root to the operand, or if you like @@ -324,9 +467,9 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I, // with their weights, representing a certain number of paths to the operator. // If an operator occurs in the worklist multiple times then we found multiple // ways to get to it. - SmallVector<std::pair<BinaryOperator*, unsigned>, 8> Worklist; // (Op, Weight) - Worklist.push_back(std::make_pair(I, 1)); - unsigned Opcode = I->getOpcode(); + SmallVector<std::pair<BinaryOperator*, APInt>, 8> Worklist; // (Op, Weight) + Worklist.push_back(std::make_pair(I, APInt(Bitwidth, 1))); + bool MadeChange = false; // Leaves of the expression are values that either aren't the right kind of // operation (eg: a constant, or a multiply in an add tree), or are, but have @@ -343,7 +486,7 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I, // Leaves - Keeps track of the set of putative leaves as well as the number of // paths to each leaf seen so far. - typedef SmallMap<Value*, unsigned, 8> LeafMap; + typedef SmallMap<Value*, APInt, 8> LeafMap; LeafMap Leaves; // Leaf -> Total weight so far. SmallVector<Value*, 8> LeafOrder; // Ensure deterministic leaf output order. @@ -351,13 +494,12 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I, SmallPtrSet<Value*, 8> Visited; // For sanity checking the iteration scheme. #endif while (!Worklist.empty()) { - std::pair<BinaryOperator*, unsigned> P = Worklist.pop_back_val(); + std::pair<BinaryOperator*, APInt> P = Worklist.pop_back_val(); I = P.first; // We examine the operands of this binary operator. - assert(P.second >= 1 && "No paths to here, so how did we get here?!"); for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) { // Visit operands. Value *Op = I->getOperand(OpIdx); - unsigned Weight = P.second; // Number of paths to this operand. + APInt Weight = P.second; // Number of paths to this operand. DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n"); assert(!Op->use_empty() && "No uses, so how did we get to it?!"); @@ -389,7 +531,7 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I, assert(Visited.count(Op) && "In leaf map but not visited!"); // Update the number of paths to the leaf. - It->second += Weight; + IncorporateWeight(It->second, Weight, Opcode); // The leaf already has one use from inside the expression. As we want // exactly one such use, drop this new use of the leaf. @@ -450,21 +592,44 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I, // The leaves, repeated according to their weights, represent the linearized // form of the expression. + Constant *Cst = 0; // Accumulate constants here. for (unsigned i = 0, e = LeafOrder.size(); i != e; ++i) { Value *V = LeafOrder[i]; LeafMap::iterator It = Leaves.find(V); if (It == Leaves.end()) - // Leaf already output, or node initially thought to be a leaf wasn't. + // Node initially thought to be a leaf wasn't. continue; assert(!isReassociableOp(V, Opcode) && "Shouldn't be a leaf!"); - unsigned Weight = It->second; - assert(Weight > 0 && "No paths to this value!"); - // FIXME: Rather than repeating values Weight times, use a vector of - // (ValueEntry, multiplicity) pairs. - Ops.append(Weight, ValueEntry(getRank(V), V)); + APInt Weight = It->second; + if (Weight.isMinValue()) + // Leaf already output or weight reduction eliminated it. + continue; // Ensure the leaf is only output once. - Leaves.erase(It); + It->second = 0; + // Glob all constants together into Cst. + if (Constant *C = dyn_cast<Constant>(V)) { + C = EvaluateRepeatedConstant(Opcode, C, Weight); + Cst = Cst ? ConstantExpr::get(Opcode, Cst, C) : C; + continue; + } + // Add non-constant + Ops.push_back(std::make_pair(V, Weight)); + } + + // Add any constants back into Ops, all globbed together and reduced to having + // weight 1 for the convenience of users. + if (Cst && Cst != ConstantExpr::getBinOpIdentity(Opcode, I->getType())) + Ops.push_back(std::make_pair(Cst, APInt(Bitwidth, 1))); + + // For nilpotent operations or addition there may be no operands, for example + // because the expression was "X xor X" or consisted of 2^Bitwidth additions: + // in both cases the weight reduces to 0 causing the value to be skipped. + if (Ops.empty()) { + Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType()); + Ops.push_back(std::make_pair(Identity, APInt(Bitwidth, 1))); } + + return MadeChange; } // RewriteExprTree - Now that the operands for this expression tree are @@ -775,8 +940,15 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) { BinaryOperator *BO = isReassociableOp(V, Instruction::Mul); if (!BO) return 0; + SmallVector<RepeatedValue, 8> Tree; + MadeChange |= LinearizeExprTree(BO, Tree); SmallVector<ValueEntry, 8> Factors; - LinearizeExprTree(BO, Factors); + Factors.reserve(Tree.size()); + for (unsigned i = 0, e = Tree.size(); i != e; ++i) { + RepeatedValue E = Tree[i]; + Factors.append(E.second.getZExtValue(), + ValueEntry(getRank(E.first), E.first)); + } bool FoundFactor = false; bool NeedsNegate = false; @@ -1439,8 +1611,15 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) { // First, walk the expression tree, linearizing the tree, collecting the // operand information. + SmallVector<RepeatedValue, 8> Tree; + MadeChange |= LinearizeExprTree(I, Tree); SmallVector<ValueEntry, 8> Ops; - LinearizeExprTree(I, Ops); + Ops.reserve(Tree.size()); + for (unsigned i = 0, e = Tree.size(); i != e; ++i) { + RepeatedValue E = Tree[i]; + Ops.append(E.second.getZExtValue(), + ValueEntry(getRank(E.first), E.first)); + } DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n'); diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp index 6dbc144..17bad97 100644 --- a/lib/VMCore/Constants.cpp +++ b/lib/VMCore/Constants.cpp @@ -2007,6 +2007,26 @@ Constant *ConstantExpr::getAShr(Constant *C1, Constant *C2, bool isExact) { isExact ? PossiblyExactOperator::IsExact : 0); } +/// getBinOpIdentity - Return the identity for the given binary operation, +/// i.e. a constant C such that X op C = X and C op X = X for every X. It +/// is an error to call this for an operation that doesn't have an identity. +Constant *ConstantExpr::getBinOpIdentity(unsigned Opcode, Type *Ty) { + switch (Opcode) { + default: + llvm_unreachable("Not a binary operation with identity"); + case Instruction::Add: + case Instruction::Or: + case Instruction::Xor: + return Constant::getNullValue(Ty); + + case Instruction::Mul: + return ConstantInt::get(Ty, 1); + + case Instruction::And: + return Constant::getAllOnesValue(Ty); + } +} + // destroyConstant - Remove the constant from the constant table... // void ConstantExpr::destroyConstant() { diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp index c1d6387..faa99db 100644 --- a/lib/VMCore/Instruction.cpp +++ b/lib/VMCore/Instruction.cpp @@ -395,6 +395,29 @@ bool Instruction::isCommutative(unsigned op) { } } +/// isIdempotent - Return true if the instruction is idempotent: +/// +/// Idempotent operators satisfy: x op x === x +/// +/// In LLVM, the And and Or operators are idempotent. +/// +bool Instruction::isIdempotent(unsigned Opcode) { + return Opcode == And || Opcode == Or; +} + +/// isNilpotent - Return true if the instruction is nilpotent: +/// +/// Nilpotent operators satisfy: x op x === Id, +/// +/// where Id is the identity for the operator, i.e. a constant such that +/// x op Id === x and Id op x === x for all x. +/// +/// In LLVM, the Xor operator is nilpotent. +/// +bool Instruction::isNilpotent(unsigned Opcode) { + return Opcode == Xor; +} + Instruction *Instruction::clone() const { Instruction *New = clone_impl(); New->SubclassOptionalData = SubclassOptionalData; diff --git a/test/Transforms/Reassociate/repeats.ll b/test/Transforms/Reassociate/repeats.ll new file mode 100644 index 0000000..6a02047 --- /dev/null +++ b/test/Transforms/Reassociate/repeats.ll @@ -0,0 +1,252 @@ +; RUN: opt < %s -reassociate -S | FileCheck %s + +; Tests involving repeated operations on the same value. + +define i8 @nilpotent(i8 %x) { +; CHECK: @nilpotent + %tmp = xor i8 %x, %x + ret i8 %tmp +; CHECK: ret i8 0 +} + +define i2 @idempotent(i2 %x) { +; CHECK: @idempotent + %tmp1 = and i2 %x, %x + %tmp2 = and i2 %tmp1, %x + %tmp3 = and i2 %tmp2, %x + ret i2 %tmp3 +; CHECK: ret i2 %x +} + +define i2 @add(i2 %x) { +; CHECK: @add + %tmp1 = add i2 %x, %x + %tmp2 = add i2 %tmp1, %x + %tmp3 = add i2 %tmp2, %x + ret i2 %tmp3 +; CHECK: ret i2 0 +} + +define i2 @cst_add() { +; CHECK: @cst_add + %tmp1 = add i2 1, 1 + %tmp2 = add i2 %tmp1, 1 + ret i2 %tmp2 +; CHECK: ret i2 -1 +} + +define i8 @cst_mul() { +; CHECK: @cst_mul + %tmp1 = mul i8 3, 3 + %tmp2 = mul i8 %tmp1, 3 + %tmp3 = mul i8 %tmp2, 3 + %tmp4 = mul i8 %tmp3, 3 + ret i8 %tmp4 +; CHECK: ret i8 -13 +} + +define i3 @foo3x5(i3 %x) { +; Can be done with two multiplies. +; CHECK: @foo3x5 +; CHECK-NEXT: mul +; CHECK-NEXT: mul +; CHECK-NEXT: ret + %tmp1 = mul i3 %x, %x + %tmp2 = mul i3 %tmp1, %x + %tmp3 = mul i3 %tmp2, %x + %tmp4 = mul i3 %tmp3, %x + ret i3 %tmp4 +} + +define i3 @foo3x6(i3 %x) { +; Can be done with two multiplies. +; CHECK: @foo3x6 +; CHECK-NEXT: mul +; CHECK-NEXT: mul +; CHECK-NEXT: ret + %tmp1 = mul i3 %x, %x + %tmp2 = mul i3 %tmp1, %x + %tmp3 = mul i3 %tmp2, %x + %tmp4 = mul i3 %tmp3, %x + %tmp5 = mul i3 %tmp4, %x + ret i3 %tmp5 +} + +define i3 @foo3x7(i3 %x) { +; Can be done with two multiplies. +; CHECK: @foo3x7 +; CHECK-NEXT: mul +; CHECK-NEXT: mul +; CHECK-NEXT: ret + %tmp1 = mul i3 %x, %x + %tmp2 = mul i3 %tmp1, %x + %tmp3 = mul i3 %tmp2, %x + %tmp4 = mul i3 %tmp3, %x + %tmp5 = mul i3 %tmp4, %x + %tmp6 = mul i3 %tmp5, %x + ret i3 %tmp6 +} + +define i4 @foo4x8(i4 %x) { +; Can be done with two multiplies. +; CHECK: @foo4x8 +; CHECK-NEXT: mul +; CHECK-NEXT: mul +; CHECK-NEXT: ret + %tmp1 = mul i4 %x, %x + %tmp2 = mul i4 %tmp1, %x + %tmp3 = mul i4 %tmp2, %x + %tmp4 = mul i4 %tmp3, %x + %tmp5 = mul i4 %tmp4, %x + %tmp6 = mul i4 %tmp5, %x + %tmp7 = mul i4 %tmp6, %x + ret i4 %tmp7 +} + +define i4 @foo4x9(i4 %x) { +; Can be done with three multiplies. +; CHECK: @foo4x9 +; CHECK-NEXT: mul +; CHECK-NEXT: mul +; CHECK-NEXT: mul +; CHECK-NEXT: ret + %tmp1 = mul i4 %x, %x + %tmp2 = mul i4 %tmp1, %x + %tmp3 = mul i4 %tmp2, %x + %tmp4 = mul i4 %tmp3, %x + %tmp5 = mul i4 %tmp4, %x + %tmp6 = mul i4 %tmp5, %x + %tmp7 = mul i4 %tmp6, %x + %tmp8 = mul i4 %tmp7, %x + ret i4 %tmp8 +} + +define i4 @foo4x10(i4 %x) { +; Can be done with three multiplies. +; CHECK: @foo4x10 +; CHECK-NEXT: mul +; CHECK-NEXT: mul +; CHECK-NEXT: mul +; CHECK-NEXT: ret + %tmp1 = mul i4 %x, %x + %tmp2 = mul i4 %tmp1, %x + %tmp3 = mul i4 %tmp2, %x + %tmp4 = mul i4 %tmp3, %x + %tmp5 = mul i4 %tmp4, %x + %tmp6 = mul i4 %tmp5, %x + %tmp7 = mul i4 %tmp6, %x + %tmp8 = mul i4 %tmp7, %x + %tmp9 = mul i4 %tmp8, %x + ret i4 %tmp9 +} + +define i4 @foo4x11(i4 %x) { +; Can be done with four multiplies. +; CHECK: @foo4x11 +; CHECK-NEXT: mul +; CHECK-NEXT: mul +; CHECK-NEXT: mul +; CHECK-NEXT: mul +; CHECK-NEXT: ret + %tmp1 = mul i4 %x, %x + %tmp2 = mul i4 %tmp1, %x + %tmp3 = mul i4 %tmp2, %x + %tmp4 = mul i4 %tmp3, %x + %tmp5 = mul i4 %tmp4, %x + %tmp6 = mul i4 %tmp5, %x + %tmp7 = mul i4 %tmp6, %x + %tmp8 = mul i4 %tmp7, %x + %tmp9 = mul i4 %tmp8, %x + %tmp10 = mul i4 %tmp9, %x + ret i4 %tmp10 +} + +define i4 @foo4x12(i4 %x) { +; Can be done with two multiplies. +; CHECK: @foo4x12 +; CHECK-NEXT: mul +; CHECK-NEXT: mul +; CHECK-NEXT: ret + %tmp1 = mul i4 %x, %x + %tmp2 = mul i4 %tmp1, %x + %tmp3 = mul i4 %tmp2, %x + %tmp4 = mul i4 %tmp3, %x + %tmp5 = mul i4 %tmp4, %x + %tmp6 = mul i4 %tmp5, %x + %tmp7 = mul i4 %tmp6, %x + %tmp8 = mul i4 %tmp7, %x + %tmp9 = mul i4 %tmp8, %x + %tmp10 = mul i4 %tmp9, %x + %tmp11 = mul i4 %tmp10, %x + ret i4 %tmp11 +} + +define i4 @foo4x13(i4 %x) { +; Can be done with three multiplies. +; CHECK: @foo4x13 +; CHECK-NEXT: mul +; CHECK-NEXT: mul +; CHECK-NEXT: mul +; CHECK-NEXT: ret + %tmp1 = mul i4 %x, %x + %tmp2 = mul i4 %tmp1, %x + %tmp3 = mul i4 %tmp2, %x + %tmp4 = mul i4 %tmp3, %x + %tmp5 = mul i4 %tmp4, %x + %tmp6 = mul i4 %tmp5, %x + %tmp7 = mul i4 %tmp6, %x + %tmp8 = mul i4 %tmp7, %x + %tmp9 = mul i4 %tmp8, %x + %tmp10 = mul i4 %tmp9, %x + %tmp11 = mul i4 %tmp10, %x + %tmp12 = mul i4 %tmp11, %x + ret i4 %tmp12 +} + +define i4 @foo4x14(i4 %x) { +; Can be done with three multiplies. +; CHECK: @foo4x14 +; CHECK-NEXT: mul +; CHECK-NEXT: mul +; CHECK-NEXT: mul +; CHECK-NEXT: ret + %tmp1 = mul i4 %x, %x + %tmp2 = mul i4 %tmp1, %x + %tmp3 = mul i4 %tmp2, %x + %tmp4 = mul i4 %tmp3, %x + %tmp5 = mul i4 %tmp4, %x + %tmp6 = mul i4 %tmp5, %x + %tmp7 = mul i4 %tmp6, %x + %tmp8 = mul i4 %tmp7, %x + %tmp9 = mul i4 %tmp8, %x + %tmp10 = mul i4 %tmp9, %x + %tmp11 = mul i4 %tmp10, %x + %tmp12 = mul i4 %tmp11, %x + %tmp13 = mul i4 %tmp12, %x + ret i4 %tmp13 +} + +define i4 @foo4x15(i4 %x) { +; Can be done with four multiplies. +; CHECK: @foo4x15 +; CHECK-NEXT: mul +; CHECK-NEXT: mul +; CHECK-NEXT: mul +; CHECK-NEXT: mul +; CHECK-NEXT: ret + %tmp1 = mul i4 %x, %x + %tmp2 = mul i4 %tmp1, %x + %tmp3 = mul i4 %tmp2, %x + %tmp4 = mul i4 %tmp3, %x + %tmp5 = mul i4 %tmp4, %x + %tmp6 = mul i4 %tmp5, %x + %tmp7 = mul i4 %tmp6, %x + %tmp8 = mul i4 %tmp7, %x + %tmp9 = mul i4 %tmp8, %x + %tmp10 = mul i4 %tmp9, %x + %tmp11 = mul i4 %tmp10, %x + %tmp12 = mul i4 %tmp11, %x + %tmp13 = mul i4 %tmp12, %x + %tmp14 = mul i4 %tmp13, %x + ret i4 %tmp14 +} |