diff options
20 files changed, 1423 insertions, 1 deletions
diff --git a/include/llvm/Analysis/DependenceAnalysis.h b/include/llvm/Analysis/DependenceAnalysis.h index 8fce2f6..ea8cecf 100644 --- a/include/llvm/Analysis/DependenceAnalysis.h +++ b/include/llvm/Analysis/DependenceAnalysis.h @@ -908,6 +908,10 @@ namespace llvm { /// based on the current constraint. void updateDirection(Dependence::DVEntry &Level, const Constraint &CurConstraint) const; + + bool tryDelinearize(const SCEV *SrcSCEV, const SCEV *DstSCEV, + SmallVectorImpl<Subscript> &Pair) const; + public: static char ID; // Class identification, replacement for typeinfo DependenceAnalysis() : FunctionPass(ID) { diff --git a/include/llvm/Analysis/Passes.h b/include/llvm/Analysis/Passes.h index 0b4e96f..a5d098e 100644 --- a/include/llvm/Analysis/Passes.h +++ b/include/llvm/Analysis/Passes.h @@ -136,6 +136,13 @@ namespace llvm { //===--------------------------------------------------------------------===// // + // createDelinearizationPass - This pass implements attempts to restore + // multidimensional array indices from linearized expressions. + // + FunctionPass *createDelinearizationPass(); + + //===--------------------------------------------------------------------===// + // // Minor pass prototypes, allowing us to expose them through bugpoint and // analyze. FunctionPass *createInstCountPass(); diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h index 1fc03be..9cd902a 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpressions.h +++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h @@ -351,8 +351,14 @@ namespace llvm { static inline bool classof(const SCEV *S) { return S->getSCEVType() == scAddRecExpr; } - }; + /// Splits the SCEV into two vectors of SCEVs representing the subscripts + /// and sizes of an array access. Returns the remainder of the + /// delinearization that is the offset start of the array. + const SCEV *delinearize(ScalarEvolution &SE, + SmallVectorImpl<const SCEV *> &Subscripts, + SmallVectorImpl<const SCEV *> &Sizes) const; + }; //===--------------------------------------------------------------------===// /// SCEVSMaxExpr - This class represents a signed maximum selection. diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index 68825a3..c650a6d 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -102,6 +102,7 @@ void initializeDSEPass(PassRegistry&); void initializeDebugIRPass(PassRegistry&); void initializeDeadInstEliminationPass(PassRegistry&); void initializeDeadMachineInstructionElimPass(PassRegistry&); +void initializeDelinearizationPass(PassRegistry &); void initializeDependenceAnalysisPass(PassRegistry&); void initializeDomOnlyPrinterPass(PassRegistry&); void initializeDomOnlyViewerPass(PassRegistry&); diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index 8062393..98f2a55 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -34,6 +34,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeCFGOnlyViewerPass(Registry); initializeCFGOnlyPrinterPass(Registry); initializeDependenceAnalysisPass(Registry); + initializeDelinearizationPass(Registry); initializeDominanceFrontierPass(Registry); initializeDomViewerPass(Registry); initializeDomPrinterPass(Registry); diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 3d7c0ed..3624aac 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -14,6 +14,7 @@ add_llvm_library(LLVMAnalysis CostModel.cpp CodeMetrics.cpp ConstantFolding.cpp + Delinearization.cpp DependenceAnalysis.cpp DomPrinter.cpp DominanceFrontier.cpp diff --git a/lib/Analysis/Delinearization.cpp b/lib/Analysis/Delinearization.cpp new file mode 100644 index 0000000..98a1ad3 --- /dev/null +++ b/lib/Analysis/Delinearization.cpp @@ -0,0 +1,125 @@ +//===---- Delinearization.cpp - MultiDimensional Index Delinearization ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements an analysis pass that tries to delinearize all GEP +// instructions in all loops using the SCEV analysis functionality. This pass is +// only used for testing purposes: if your pass needs delinearization, please +// use the on-demand SCEVAddRecExpr::delinearize() function. +// +//===----------------------------------------------------------------------===// + +#define DL_NAME "delinearize" +#define DEBUG_TYPE DL_NAME +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Pass.h" +#include "llvm/IR/Type.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +class Delinearization : public FunctionPass { + Delinearization(const Delinearization &); // do not implement +protected: + Function *F; + LoopInfo *LI; + ScalarEvolution *SE; + +public: + static char ID; // Pass identification, replacement for typeid + + Delinearization() : FunctionPass(ID) { + initializeDelinearizationPass(*PassRegistry::getPassRegistry()); + } + virtual bool runOnFunction(Function &F); + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual void print(raw_ostream &O, const Module *M = 0) const; +}; + +void Delinearization::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<LoopInfo>(); + AU.addRequired<ScalarEvolution>(); +} + +bool Delinearization::runOnFunction(Function &F) { + this->F = &F; + SE = &getAnalysis<ScalarEvolution>(); + LI = &getAnalysis<LoopInfo>(); + return false; +} + +static Value *getPointerOperand(Instruction &Inst) { + if (LoadInst *Load = dyn_cast<LoadInst>(&Inst)) + return Load->getPointerOperand(); + else if (StoreInst *Store = dyn_cast<StoreInst>(&Inst)) + return Store->getPointerOperand(); + else if (GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(&Inst)) + return Gep->getPointerOperand(); + return NULL; +} + +void Delinearization::print(raw_ostream &O, const Module *) const { + O << "Delinearization on function " << F->getName() << ":\n"; + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { + Instruction *Inst = &(*I); + if (!isa<StoreInst>(Inst) && !isa<LoadInst>(Inst) && + !isa<GetElementPtrInst>(Inst)) + continue; + + const BasicBlock *BB = Inst->getParent(); + // Delinearize the memory access as analyzed in all the surrounding loops. + // Do not analyze memory accesses outside loops. + for (Loop *L = LI->getLoopFor(BB); L != NULL; L = L->getParentLoop()) { + const SCEV *AccessFn = SE->getSCEVAtScope(getPointerOperand(*Inst), L); + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(AccessFn); + if (!AR) + break; + + O << "AddRec: " << *AR << "\n"; + + SmallVector<const SCEV *, 3> Subscripts, Sizes; + const SCEV *Res = AR->delinearize(*SE, Subscripts, Sizes); + int Size = Subscripts.size(); + if (Res == AR || Size == 0) { + O << "failed to delinearize\n"; + continue; + } + O << "Base offset: " << *Res << "\n"; + O << "ArrayDecl[UnknownSize]"; + for (int i = 0; i < Size - 1; i++) + O << "[" << *Sizes[i] << "]"; + O << " with elements of " << *Sizes[Size - 1] << " bytes.\n"; + + O << "ArrayRef"; + for (int i = 0; i < Size; i++) + O << "[" << *Subscripts[i] << "]"; + O << "\n"; + } + } +} + +char Delinearization::ID = 0; +static const char delinearization_name[] = "Delinearization"; +INITIALIZE_PASS_BEGIN(Delinearization, DL_NAME, delinearization_name, true, + true) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_END(Delinearization, DL_NAME, delinearization_name, true, true) + +FunctionPass *llvm::createDelinearizationPass() { return new Delinearization; } diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp index a0f1a69..39e4bd2 100644 --- a/lib/Analysis/DependenceAnalysis.cpp +++ b/lib/Analysis/DependenceAnalysis.cpp @@ -61,6 +61,7 @@ #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Operator.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/InstIterator.h" @@ -104,6 +105,10 @@ STATISTIC(BanerjeeApplications, "Banerjee applications"); STATISTIC(BanerjeeIndependence, "Banerjee independence"); STATISTIC(BanerjeeSuccesses, "Banerjee successes"); +static cl::opt<bool> +Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore, + cl::desc("Try to delinearize array references.")); + //===----------------------------------------------------------------------===// // basics @@ -3171,6 +3176,44 @@ void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level, llvm_unreachable("constraint has unexpected kind"); } +/// Check if we can delinearize the subscripts. If the SCEVs representing the +/// source and destination array references are recurrences on a nested loop, +/// this function flattens the nested recurrences into seperate recurrences +/// for each loop level. +bool +DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, const SCEV *DstSCEV, + SmallVectorImpl<Subscript> &Pair) const { + const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV); + const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV); + if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) + return false; + + SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts, SrcSizes, DstSizes; + SrcAR->delinearize(*SE, SrcSubscripts, SrcSizes); + DstAR->delinearize(*SE, DstSubscripts, DstSizes); + + int size = SrcSubscripts.size(); + int dstSize = DstSubscripts.size(); + if (size != dstSize || size < 2) + return false; + +#ifndef NDEBUG + DEBUG(errs() << "\nSrcSubscripts: "); + for (int i = 0; i < size; i++) + DEBUG(errs() << *SrcSubscripts[i]); + DEBUG(errs() << "\nDstSubscripts: "); + for (int i = 0; i < size; i++) + DEBUG(errs() << *DstSubscripts[i]); +#endif + + Pair.resize(size); + for (int i = 0; i < size; ++i) { + Pair[i].Src = SrcSubscripts[i]; + Pair[i].Dst = DstSubscripts[i]; + } + + return true; +} //===----------------------------------------------------------------------===// @@ -3280,6 +3323,12 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, Pair[0].Dst = DstSCEV; } + if (Delinearize && Pairs == 1 && CommonLevels > 1 && + tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair)) { + DEBUG(dbgs() << " delinerized GEP\n"); + Pairs = Pair.size(); + } + for (unsigned P = 0; P < Pairs; ++P) { Pair[P].Loops.resize(MaxLevels + 1); Pair[P].GroupLoops.resize(MaxLevels + 1); @@ -3698,6 +3747,12 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep, Pair[0].Dst = DstSCEV; } + if (Delinearize && Pairs == 1 && CommonLevels > 1 && + tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair)) { + DEBUG(dbgs() << " delinerized GEP\n"); + Pairs = Pair.size(); + } + for (unsigned P = 0; P < Pairs; ++P) { Pair[P].Loops.resize(MaxLevels + 1); Pair[P].GroupLoops.resize(MaxLevels + 1); diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 37be5db..9b2182f 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -6677,7 +6677,478 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, return SE.getCouldNotCompute(); } +static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) { + APInt A = C1->getValue()->getValue().abs(); + APInt B = C2->getValue()->getValue().abs(); + uint32_t ABW = A.getBitWidth(); + uint32_t BBW = B.getBitWidth(); + if (ABW > BBW) + B.zext(ABW); + else if (ABW < BBW) + A.zext(BBW); + + return APIntOps::GreatestCommonDivisor(A, B); +} + +static const APInt srem(const SCEVConstant *C1, const SCEVConstant *C2) { + APInt A = C1->getValue()->getValue(); + APInt B = C2->getValue()->getValue(); + uint32_t ABW = A.getBitWidth(); + uint32_t BBW = B.getBitWidth(); + + if (ABW > BBW) + B.sext(ABW); + else if (ABW < BBW) + A.sext(BBW); + + return APIntOps::srem(A, B); +} + +static const APInt sdiv(const SCEVConstant *C1, const SCEVConstant *C2) { + APInt A = C1->getValue()->getValue(); + APInt B = C2->getValue()->getValue(); + uint32_t ABW = A.getBitWidth(); + uint32_t BBW = B.getBitWidth(); + + if (ABW > BBW) + B.sext(ABW); + else if (ABW < BBW) + A.sext(BBW); + + return APIntOps::sdiv(A, B); +} + +namespace { +struct SCEVGCD : public SCEVVisitor<SCEVGCD, const SCEV *> { +public: + // Pattern match Step into Start. When Step is a multiply expression, find + // the largest subexpression of Step that appears in Start. When Start is an + // add expression, try to match Step in the subexpressions of Start, non + // matching subexpressions are returned under Remainder. + static const SCEV *findGCD(ScalarEvolution &SE, const SCEV *Start, + const SCEV *Step, const SCEV **Remainder) { + assert(Remainder && "Remainder should not be NULL"); + SCEVGCD R(SE, Step, SE.getConstant(Step->getType(), 0)); + const SCEV *Res = R.visit(Start); + *Remainder = R.Remainder; + return Res; + } + + SCEVGCD(ScalarEvolution &S, const SCEV *G, const SCEV *R) + : SE(S), GCD(G), Remainder(R) { + Zero = SE.getConstant(GCD->getType(), 0); + One = SE.getConstant(GCD->getType(), 1); + } + + const SCEV *visitConstant(const SCEVConstant *Constant) { + if (GCD == Constant || Constant == Zero) + return GCD; + + if (const SCEVConstant *CGCD = dyn_cast<SCEVConstant>(GCD)) { + const SCEV *Res = SE.getConstant(gcd(Constant, CGCD)); + if (Res != One) + return Res; + + Remainder = SE.getConstant(srem(Constant, CGCD)); + Constant = cast<SCEVConstant>(SE.getMinusSCEV(Constant, Remainder)); + Res = SE.getConstant(gcd(Constant, CGCD)); + return Res; + } + + // When GCD is not a constant, it could be that the GCD is an Add, Mul, + // AddRec, etc., in which case we want to find out how many times the + // Constant divides the GCD: we then return that as the new GCD. + const SCEV *Rem = Zero; + const SCEV *Res = findGCD(SE, GCD, Constant, &Rem); + + if (Res == One || Rem != Zero) { + Remainder = Constant; + return One; + } + + assert(isa<SCEVConstant>(Res) && "Res should be a constant"); + Remainder = SE.getConstant(srem(Constant, cast<SCEVConstant>(Res))); + return Res; + } + + const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { + if (GCD == Expr) + return GCD; + + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { + const SCEV *Rem = Zero; + const SCEV *Res = findGCD(SE, Expr->getOperand(e - 1 - i), GCD, &Rem); + + // FIXME: There may be ambiguous situations: for instance, + // GCD(-4 + (3 * %m), 2 * %m) where 2 divides -4 and %m divides (3 * %m). + // The order in which the AddExpr is traversed computes a different GCD + // and Remainder. + if (Res != One) + GCD = Res; + if (Rem != Zero) + Remainder = SE.getAddExpr(Remainder, Rem); + } + + return GCD; + } + + const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { + if (GCD == Expr) + return GCD; + + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { + if (Expr->getOperand(i) == GCD) + return GCD; + } + + // If we have not returned yet, it means that GCD is not part of Expr. + const SCEV *PartialGCD = One; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { + const SCEV *Rem = Zero; + const SCEV *Res = findGCD(SE, Expr->getOperand(i), GCD, &Rem); + if (Rem != Zero) + // GCD does not divide Expr->getOperand(i). + continue; + + if (Res == GCD) + return GCD; + PartialGCD = SE.getMulExpr(PartialGCD, Res); + if (PartialGCD == GCD) + return GCD; + } + + if (PartialGCD != One) + return PartialGCD; + + Remainder = Expr; + const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(GCD); + if (!Mul) + return PartialGCD; + + // When the GCD is a multiply expression, try to decompose it: + // this occurs when Step does not divide the Start expression + // as in: {(-4 + (3 * %m)),+,(2 * %m)} + for (int i = 0, e = Mul->getNumOperands(); i < e; ++i) { + const SCEV *Rem = Zero; + const SCEV *Res = findGCD(SE, Expr, Mul->getOperand(i), &Rem); + if (Rem == Zero) { + Remainder = Rem; + return Res; + } + } + + return PartialGCD; + } + + const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { + if (GCD == Expr) + return GCD; + + if (!Expr->isAffine()) { + Remainder = Expr; + return GCD; + } + + const SCEV *Rem = Zero; + const SCEV *Res = findGCD(SE, Expr->getOperand(0), GCD, &Rem); + if (Rem != Zero) + Remainder = SE.getAddExpr(Remainder, Rem); + + Rem = Zero; + Res = findGCD(SE, Expr->getOperand(1), Res, &Rem); + if (Rem != Zero) { + Remainder = Expr; + return GCD; + } + + return Res; + } + + const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitUnknown(const SCEVUnknown *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { + return One; + } + +private: + ScalarEvolution &SE; + const SCEV *GCD, *Remainder, *Zero, *One; +}; + +struct SCEVDivision : public SCEVVisitor<SCEVDivision, const SCEV *> { +public: + // Remove from Start all multiples of Step. + static const SCEV *divide(ScalarEvolution &SE, const SCEV *Start, + const SCEV *Step) { + SCEVDivision D(SE, Step); + const SCEV *Rem = D.Zero; + (void)Rem; + // The division is guaranteed to succeed: Step should divide Start with no + // remainder. + assert(Step == SCEVGCD::findGCD(SE, Start, Step, &Rem) && Rem == D.Zero && + "Step should divide Start with no remainder."); + return D.visit(Start); + } + + SCEVDivision(ScalarEvolution &S, const SCEV *G) : SE(S), GCD(G) { + Zero = SE.getConstant(GCD->getType(), 0); + One = SE.getConstant(GCD->getType(), 1); + } + + const SCEV *visitConstant(const SCEVConstant *Constant) { + if (GCD == Constant) + return One; + + if (const SCEVConstant *CGCD = dyn_cast<SCEVConstant>(GCD)) + return SE.getConstant(sdiv(Constant, CGCD)); + return Constant; + } + + const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { + if (GCD == Expr) + return One; + + SmallVector<const SCEV *, 2> Operands; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) + Operands.push_back(divide(SE, Expr->getOperand(i), GCD)); + + if (Operands.size() == 1) + return Operands[0]; + return SE.getAddExpr(Operands); + } + + const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { + if (GCD == Expr) + return One; + + bool FoundGCDTerm = false; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) + if (Expr->getOperand(i) == GCD) + FoundGCDTerm = true; + + SmallVector<const SCEV *, 2> Operands; + if (FoundGCDTerm) { + FoundGCDTerm = false; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { + if (FoundGCDTerm) + Operands.push_back(Expr->getOperand(i)); + else if (Expr->getOperand(i) == GCD) + FoundGCDTerm = true; + else + Operands.push_back(Expr->getOperand(i)); + } + } else { + FoundGCDTerm = false; + const SCEV *PartialGCD = One; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { + if (PartialGCD == GCD) { + Operands.push_back(Expr->getOperand(i)); + continue; + } + + const SCEV *Rem = Zero; + const SCEV *Res = SCEVGCD::findGCD(SE, Expr->getOperand(i), GCD, &Rem); + if (Rem == Zero) { + PartialGCD = SE.getMulExpr(PartialGCD, Res); + Operands.push_back(divide(SE, Expr->getOperand(i), GCD)); + } else { + Operands.push_back(Expr->getOperand(i)); + } + } + } + + if (Operands.size() == 1) + return Operands[0]; + return SE.getMulExpr(Operands); + } + + const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { + if (GCD == Expr) + return One; + + assert(Expr->isAffine() && "Expr should be affine"); + + const SCEV *Start = divide(SE, Expr->getStart(), GCD); + const SCEV *Step = divide(SE, Expr->getStepRecurrence(SE), GCD); + + return SE.getAddRecExpr(Start, Step, Expr->getLoop(), + Expr->getNoWrapFlags()); + } + + const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitUnknown(const SCEVUnknown *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { + return Expr; + } + +private: + ScalarEvolution &SE; + const SCEV *GCD, *Zero, *One; +}; +} + +/// Splits the SCEV into two vectors of SCEVs representing the subscripts and +/// sizes of an array access. Returns the remainder of the delinearization that +/// is the offset start of the array. For example +/// delinearize ({(((-4 + (3 * %m)))),+,(%m)}<%for.i>) { +/// IV: {0,+,1}<%for.i> +/// Start: -4 + (3 * %m) +/// Step: %m +/// SCEVUDiv (Start, Step) = 3 remainder -4 +/// rem = delinearize (3) = 3 +/// Subscripts.push_back(IV + rem) +/// Sizes.push_back(Step) +/// return remainder -4 +/// } +/// When delinearize fails, it returns the SCEV unchanged. +const SCEV * +SCEVAddRecExpr::delinearize(ScalarEvolution &SE, + SmallVectorImpl<const SCEV *> &Subscripts, + SmallVectorImpl<const SCEV *> &Sizes) const { + if (!this->isAffine()) + return this; + + const SCEV *Start = this->getStart(); + const SCEV *Step = this->getStepRecurrence(SE); + const SCEV *Zero = SE.getConstant(this->getType(), 0); + const SCEV *One = SE.getConstant(this->getType(), 1); + const SCEV *IV = + SE.getAddRecExpr(Zero, One, this->getLoop(), this->getNoWrapFlags()); + + DEBUG(dbgs() << "(delinearize: " << *this << "\n"); + + if (Step == One) { + DEBUG(dbgs() << "failed to delinearize " << *this << "\n)\n"); + return this; + } + + const SCEV *Remainder = NULL; + const SCEV *GCD = SCEVGCD::findGCD(SE, Start, Step, &Remainder); + + DEBUG(dbgs() << "GCD: " << *GCD << "\n"); + DEBUG(dbgs() << "Remainder: " << *Remainder << "\n"); + + if (GCD == One) { + DEBUG(dbgs() << "failed to delinearize " << *this << "\n)\n"); + return this; + } + + const SCEV *Quotient = + SCEVDivision::divide(SE, SE.getMinusSCEV(Start, Remainder), GCD); + DEBUG(dbgs() << "Quotient: " << *Quotient << "\n"); + + const SCEV *Rem; + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Quotient)) + Rem = AR->delinearize(SE, Subscripts, Sizes); + else + Rem = Quotient; + + if (Step != GCD) { + Step = SCEVDivision::divide(SE, Step, GCD); + IV = SE.getMulExpr(IV, Step); + } + const SCEV *Index = SE.getAddExpr(IV, Rem); + + Subscripts.push_back(Index); + Sizes.push_back(GCD); + +#ifndef NDEBUG + int Size = Sizes.size(); + DEBUG(dbgs() << "succeeded to delinearize " << *this << "\n"); + DEBUG(dbgs() << "ArrayDecl[UnknownSize]"); + for (int i = 0; i < Size - 1; i++) + DEBUG(dbgs() << "[" << *Sizes[i] << "]"); + DEBUG(dbgs() << " with elements of " << *Sizes[Size - 1] << " bytes.\n"); + + DEBUG(dbgs() << "ArrayRef"); + for (int i = 0; i < Size; i++) + DEBUG(dbgs() << "[" << *Subscripts[i] << "]"); + DEBUG(dbgs() << "\n)\n"); +#endif + + return Remainder; +} //===----------------------------------------------------------------------===// // SCEVCallbackVH Class Implementation diff --git a/test/Analysis/Delinearization/a.ll b/test/Analysis/Delinearization/a.ll new file mode 100644 index 0000000..9308749 --- /dev/null +++ b/test/Analysis/Delinearization/a.ll @@ -0,0 +1,74 @@ +; RUN: opt < %s -analyze -delinearize | FileCheck %s +; +; void foo(long n, long m, long o, int A[n][m][o]) { +; for (long i = 0; i < n; i++) +; for (long j = 0; j < m; j++) +; for (long k = 0; k < o; k++) +; A[2*i+3][3*j-4][5*k+7] = 1; +; } + +; AddRec: {{{(28 + (4 * (-4 + (3 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(12 * %o)}<%for.j>,+,20}<%for.k> +; CHECK: Base offset: %A +; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(i32) bytes. +; CHECK: ArrayRef[{3,+,2}<%for.i>][{-4,+,3}<%for.j>][{7,+,5}<%for.k>] + +; AddRec: {{(8 + ((4 + (12 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(12 * %o)}<%for.j> +; CHECK: Base offset: %A +; CHECK: ArrayDecl[UnknownSize][%o] with elements of sizeof(i32) bytes. +; CHECK: ArrayRef[{(1 + (3 * %m)),+,(2 * %m)}<%for.i>][{2,+,(3 * %o)}<%for.j>] + +; AddRec: {(8 + ((-8 + (24 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i> +; CHECK: Base offset: %A +; CHECK: ArrayDecl[UnknownSize] with elements of 2 bytes. +; CHECK: ArrayRef[{((1 + ((-1 + (3 * %m)) * %o)) * sizeof(i32)),+,(%m * %o * sizeof(i32))}<%for.i>] + +; Function Attrs: nounwind uwtable +define void @foo(i64 %n, i64 %m, i64 %o, i32* nocapture %A) #0 { +entry: + %cmp32 = icmp sgt i64 %n, 0 + br i1 %cmp32, label %for.cond1.preheader.lr.ph, label %for.end17 + +for.cond1.preheader.lr.ph: ; preds = %entry + %cmp230 = icmp sgt i64 %m, 0 + %cmp528 = icmp sgt i64 %o, 0 + br i1 %cmp230, label %for.i, label %for.end17 + +for.inc15.us: ; preds = %for.inc12.us.us, %for.i + %inc16.us = add nsw i64 %i.033.us, 1 + %exitcond55 = icmp eq i64 %inc16.us, %n + br i1 %exitcond55, label %for.end17, label %for.i + +for.i: ; preds = %for.cond1.preheader.lr.ph, %for.inc15.us + %i.033.us = phi i64 [ %inc16.us, %for.inc15.us ], [ 0, %for.cond1.preheader.lr.ph ] + %mul8.us = shl i64 %i.033.us, 1 + %add9.us = add nsw i64 %mul8.us, 3 + %0 = mul i64 %add9.us, %m + %sub.us = add i64 %0, -4 + br i1 %cmp528, label %for.j, label %for.inc15.us + +for.inc12.us.us: ; preds = %for.k + %inc13.us.us = add nsw i64 %j.031.us.us, 1 + %exitcond54 = icmp eq i64 %inc13.us.us, %m + br i1 %exitcond54, label %for.inc15.us, label %for.j + +for.j: ; preds = %for.i, %for.inc12.us.us + %j.031.us.us = phi i64 [ %inc13.us.us, %for.inc12.us.us ], [ 0, %for.i ] + %mul7.us.us = mul nsw i64 %j.031.us.us, 3 + %tmp.us.us = add i64 %sub.us, %mul7.us.us + %tmp27.us.us = mul i64 %tmp.us.us, %o + br label %for.k + +for.k: ; preds = %for.k, %for.j + %k.029.us.us = phi i64 [ 0, %for.j ], [ %inc.us.us, %for.k ] + %mul.us.us = mul nsw i64 %k.029.us.us, 5 + %arrayidx.sum.us.us = add i64 %mul.us.us, 7 + %arrayidx10.sum.us.us = add i64 %arrayidx.sum.us.us, %tmp27.us.us + %arrayidx11.us.us = getelementptr inbounds i32* %A, i64 %arrayidx10.sum.us.us + store i32 1, i32* %arrayidx11.us.us, align 4 + %inc.us.us = add nsw i64 %k.029.us.us, 1 + %exitcond = icmp eq i64 %inc.us.us, %o + br i1 %exitcond, label %for.inc12.us.us, label %for.k + +for.end17: ; preds = %for.inc15.us, %for.cond1.preheader.lr.ph, %entry + ret void +} diff --git a/test/Analysis/Delinearization/himeno_1.ll b/test/Analysis/Delinearization/himeno_1.ll new file mode 100644 index 0000000..9458bd2 --- /dev/null +++ b/test/Analysis/Delinearization/himeno_1.ll @@ -0,0 +1,102 @@ +; RUN: opt < %s -analyze -delinearize | FileCheck %s + +; #define MR(mt,n,r,c,d) mt->m[(n) * mt->mrows * mt->mcols * mt->mdeps + (r) * mt->mcols* mt->mdeps + (c) * mt->mdeps + (d)] +; +; struct Mat { +; float* m; +; int mnums; +; int mrows; +; int mcols; +; int mdeps; +; }; +; +; typedef struct Mat Matrix; +; +; void jacobi(int nn, Matrix* a, Matrix* p) +; { +; long i, j, k, max,jmax,kmax; +; +; p_rows_sub = p->mrows - 1; +; p_cols_sub = p->mcols - 1; +; p_deps_sub = p->mdeps - 1; +; +; for(i = 1; i < p_rows_sub; i++) +; for(j = 1; j < p_cols_sub; j++) +; for(k = 1; k < p_deps_sub; k++) +; MR(a,0,i,j,k) = i + j + k; +; } + +; AddRec: {{{(4 + (4 * (sext i32 %a.deps to i64) * (1 + (sext i32 %a.cols to i64))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>,+,(4 * (sext i32 %a.deps to i64))}<%for.j>,+,4}<%for.k> +; CHECK: Base offset: %a.base +; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.cols to i64)][(sext i32 %a.deps to i64)] with elements of sizeof(float) bytes. +; CHECK: ArrayRef[{1,+,1}<nuw><nsw><%for.i>][{1,+,1}<nuw><nsw><%for.j>][{1,+,1}<nuw><nsw><%for.k>] + +; AddRec: {{(-4 + (4 * (sext i32 (-1 + %p.deps) to i64)) + (4 * (sext i32 %a.deps to i64) * (1 + (sext i32 %a.cols to i64))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>,+,(4 * (sext i32 %a.deps to i64))}<%for.j> +; CHECK: Base offset: %a.base +; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.deps to i64)] with elements of sizeof(float) bytes. +; CHECK: ArrayRef[{(1 + (sext i32 %a.cols to i64)),+,(sext i32 %a.cols to i64)}<%for.i>][{(-1 + (sext i32 (-1 + %p.deps) to i64)),+,(sext i32 %a.deps to i64)}<%for.j>] + +; AddRec: {(-4 + (4 * (sext i32 (-1 + %p.deps) to i64)) + ((sext i32 %a.deps to i64) * (-4 + (4 * (sext i32 (-1 + %p.cols) to i64)) + (4 * (sext i32 %a.cols to i64)))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i> +; CHECK: Base offset: %a.base +; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(float) bytes. +; CHECK: ArrayRef[{(-1 + (sext i32 (-1 + %p.deps) to i64) + ((sext i32 %a.deps to i64) * (-1 + (sext i32 (-1 + %p.cols) to i64) + (sext i32 %a.cols to i64)))),+,((sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>] + +%struct.Mat = type { float*, i32, i32, i32, i32 } + +define void @jacobi(i32 %nn, %struct.Mat* nocapture %a, %struct.Mat* nocapture %p) nounwind uwtable { +entry: + %p.rows.ptr = getelementptr inbounds %struct.Mat* %p, i64 0, i32 2 + %p.rows = load i32* %p.rows.ptr + %p.rows.sub = add i32 %p.rows, -1 + %p.rows.sext = sext i32 %p.rows.sub to i64 + %p.cols.ptr = getelementptr inbounds %struct.Mat* %p, i64 0, i32 3 + %p.cols = load i32* %p.cols.ptr + %p.cols.sub = add i32 %p.cols, -1 + %p.cols.sext = sext i32 %p.cols.sub to i64 + %p.deps.ptr = getelementptr inbounds %struct.Mat* %p, i64 0, i32 4 + %p.deps = load i32* %p.deps.ptr + %p.deps.sub = add i32 %p.deps, -1 + %p.deps.sext = sext i32 %p.deps.sub to i64 + %a.cols.ptr = getelementptr inbounds %struct.Mat* %a, i64 0, i32 3 + %a.cols = load i32* %a.cols.ptr + %a.deps.ptr = getelementptr inbounds %struct.Mat* %a, i64 0, i32 4 + %a.deps = load i32* %a.deps.ptr + %a.base.ptr = getelementptr inbounds %struct.Mat* %a, i64 0, i32 0 + %a.base = load float** %a.base.ptr, align 8 + br label %for.i + +for.i: ; preds = %for.i.inc, %entry + %i = phi i64 [ %i.inc, %for.i.inc ], [ 1, %entry ] + br label %for.j + +for.j: ; preds = %for.j.inc, %for.i + %j = phi i64 [ %j.inc, %for.j.inc ], [ 1, %for.i ] + %a.cols.sext = sext i32 %a.cols to i64 + %a.deps.sext = sext i32 %a.deps to i64 + br label %for.k + +for.k: ; preds = %for.k, %for.j + %k = phi i64 [ 1, %for.j ], [ %k.inc, %for.k ] + %tmp1 = mul nsw i64 %a.cols.sext, %i + %tmp2 = add i64 %tmp1, %j + %tmp3 = mul i64 %tmp2, %a.deps.sext + %tmp4 = add nsw i64 %k, %tmp3 + %arrayidx = getelementptr inbounds float* %a.base, i64 %tmp4 + store float 1.000000e+00, float* %arrayidx + %k.inc = add nsw i64 %k, 1 + %k.exitcond = icmp eq i64 %k.inc, %p.deps.sext + br i1 %k.exitcond, label %for.j.inc, label %for.k + +for.j.inc: ; preds = %for.k + %j.inc = add nsw i64 %j, 1 + %j.exitcond = icmp eq i64 %j.inc, %p.cols.sext + br i1 %j.exitcond, label %for.i.inc, label %for.j + +for.i.inc: ; preds = %for.j.inc + %i.inc = add nsw i64 %i, 1 + %i.exitcond = icmp eq i64 %i.inc, %p.rows.sext + br i1 %i.exitcond, label %end, label %for.i + +end: ; preds = %for.i.inc + ret void +} diff --git a/test/Analysis/Delinearization/himeno_2.ll b/test/Analysis/Delinearization/himeno_2.ll new file mode 100644 index 0000000..a290066 --- /dev/null +++ b/test/Analysis/Delinearization/himeno_2.ll @@ -0,0 +1,102 @@ +; RUN: opt < %s -analyze -delinearize | FileCheck %s + +; #define MR(mt,n,r,c,d) mt->m[(n) * mt->mrows * mt->mcols * mt->mdeps + (r) * mt->mcols* mt->mdeps + (c) * mt->mdeps + (d)] +; +; struct Mat { +; float* m; +; int mnums; +; int mrows; +; int mcols; +; int mdeps; +; }; +; +; typedef struct Mat Matrix; +; +; void jacobi(int nn, Matrix* a, Matrix* p) +; { +; long i, j, k, max,jmax,kmax; +; +; p_rows_sub = p->mrows - 1; +; p_cols_sub = p->mcols - 1; +; p_deps_sub = p->mdeps - 1; +; +; for(i = 1; i < p_rows_sub; i++) +; for(j = 1; j < p_cols_sub; j++) +; for(k = 1; k < p_deps_sub; k++) +; MR(a,0,i,j,k) = i + j + k; +; } + +; AddRec: {{{(4 + (4 * (sext i32 %a.deps to i64) * (1 + (sext i32 %a.cols to i64))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>,+,(4 * (sext i32 %a.deps to i64))}<%for.j>,+,4}<%for.k> +; CHECK: Base offset: %a.base +; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.cols to i64)][(sext i32 %a.deps to i64)] with elements of sizeof(float) bytes. +; CHECK: ArrayRef[{1,+,1}<nuw><nsw><%for.i>][{1,+,1}<nuw><nsw><%for.j>][{1,+,1}<nuw><nsw><%for.k>] + +; AddRec: {{(-4 + (4 * (sext i32 (-1 + %p.deps) to i64)) + (4 * (sext i32 %a.deps to i64) * (1 + (sext i32 %a.cols to i64))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>,+,(4 * (sext i32 %a.deps to i64))}<%for.j> +; CHECK: Base offset: %a.base +; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.deps to i64)] with elements of sizeof(float) bytes. +; CHECK: ArrayRef[{(1 + (sext i32 %a.cols to i64)),+,(sext i32 %a.cols to i64)}<%for.i>][{(-1 + (sext i32 (-1 + %p.deps) to i64)),+,(sext i32 %a.deps to i64)}<%for.j>] + +; AddRec: {(-4 + (4 * (sext i32 (-1 + %p.deps) to i64)) + ((sext i32 %a.deps to i64) * (-4 + (4 * (sext i32 (-1 + %p.cols) to i64)) + (4 * (sext i32 %a.cols to i64)))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i> +; CHECK: Base offset: %a.base +; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(float) bytes. +; CHECK: ArrayRef[{(-1 + (sext i32 (-1 + %p.deps) to i64) + ((sext i32 %a.deps to i64) * (-1 + (sext i32 (-1 + %p.cols) to i64) + (sext i32 %a.cols to i64)))),+,((sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>] + +%struct.Mat = type { float*, i32, i32, i32, i32 } + +define void @jacobi(i32 %nn, %struct.Mat* nocapture %a, %struct.Mat* nocapture %p) nounwind uwtable { +entry: + %p.rows.ptr = getelementptr inbounds %struct.Mat* %p, i64 0, i32 2 + %p.rows = load i32* %p.rows.ptr + %p.rows.sub = add i32 %p.rows, -1 + %p.rows.sext = sext i32 %p.rows.sub to i64 + %p.cols.ptr = getelementptr inbounds %struct.Mat* %p, i64 0, i32 3 + %p.cols = load i32* %p.cols.ptr + %p.cols.sub = add i32 %p.cols, -1 + %p.cols.sext = sext i32 %p.cols.sub to i64 + %p.deps.ptr = getelementptr inbounds %struct.Mat* %p, i64 0, i32 4 + %p.deps = load i32* %p.deps.ptr + %p.deps.sub = add i32 %p.deps, -1 + %p.deps.sext = sext i32 %p.deps.sub to i64 + %a.cols.ptr = getelementptr inbounds %struct.Mat* %a, i64 0, i32 3 + %a.cols = load i32* %a.cols.ptr + %a.cols.sext = sext i32 %a.cols to i64 + %a.deps.ptr = getelementptr inbounds %struct.Mat* %a, i64 0, i32 4 + %a.deps = load i32* %a.deps.ptr + %a.deps.sext = sext i32 %a.deps to i64 + %a.base.ptr = getelementptr inbounds %struct.Mat* %a, i64 0, i32 0 + %a.base = load float** %a.base.ptr, align 8 + br label %for.i + +for.i: ; preds = %for.i.inc, %entry + %i = phi i64 [ %i.inc, %for.i.inc ], [ 1, %entry ] + br label %for.j + +for.j: ; preds = %for.j.inc, %for.i + %j = phi i64 [ %j.inc, %for.j.inc ], [ 1, %for.i ] + br label %for.k + +for.k: ; preds = %for.k, %for.j + %k = phi i64 [ 1, %for.j ], [ %k.inc, %for.k ] + %tmp1 = mul nsw i64 %a.cols.sext, %i + %tmp2 = add i64 %tmp1, %j + %tmp3 = mul i64 %tmp2, %a.deps.sext + %tmp4 = add nsw i64 %k, %tmp3 + %arrayidx = getelementptr inbounds float* %a.base, i64 %tmp4 + store float 1.000000e+00, float* %arrayidx + %k.inc = add nsw i64 %k, 1 + %k.exitcond = icmp eq i64 %k.inc, %p.deps.sext + br i1 %k.exitcond, label %for.j.inc, label %for.k + +for.j.inc: ; preds = %for.k + %j.inc = add nsw i64 %j, 1 + %j.exitcond = icmp eq i64 %j.inc, %p.cols.sext + br i1 %j.exitcond, label %for.i.inc, label %for.j + +for.i.inc: ; preds = %for.j.inc + %i.inc = add nsw i64 %i, 1 + %i.exitcond = icmp eq i64 %i.inc, %p.rows.sext + br i1 %i.exitcond, label %end, label %for.i + +end: ; preds = %for.i.inc + ret void +} diff --git a/test/Analysis/Delinearization/lit.local.cfg b/test/Analysis/Delinearization/lit.local.cfg new file mode 100644 index 0000000..19eebc0 --- /dev/null +++ b/test/Analysis/Delinearization/lit.local.cfg @@ -0,0 +1 @@ +config.suffixes = ['.ll', '.c', '.cpp'] diff --git a/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll b/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll new file mode 100644 index 0000000..82cab16 --- /dev/null +++ b/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll @@ -0,0 +1,68 @@ +; RUN: opt < %s -analyze -delinearize | FileCheck %s + +; void foo(long n, long m, long o, double A[n][m][o]) { +; +; for (long i = 0; i < n; i++) +; for (long j = 0; j < m; j++) +; for (long k = 0; k < o; k++) +; A[i+3][j-4][k+7] = 1.0; +; } + +; AddRec: {{{(56 + (8 * (-4 + (3 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k> +; CHECK: Base offset: %A +; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes. +; CHECK: ArrayRef[{3,+,1}<nw><%for.i>][{-4,+,1}<nw><%for.j>][{7,+,1}<nw><%for.k>] + +; AddRec: {{(48 + ((-24 + (24 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j> +; CHECK: Base offset: %A +; CHECK: ArrayDecl[UnknownSize][%o] with elements of sizeof(double) bytes. +; CHECK: ArrayRef[{(-3 + (3 * %m)),+,%m}<%for.i>][{6,+,%o}<%for.j>] + +; AddRec: {(48 + ((-32 + (32 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i> +; CHECK: Base offset: %A +; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(double) bytes. +; CHECK: ArrayRef[{(6 + ((-4 + (4 * %m)) * %o)),+,(%m * %o)}<%for.i>] + +define void @foo(i64 %n, i64 %m, i64 %o, double* %A) { +entry: + br label %for.i + +for.i: + %i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.inc ] + br label %for.j + +for.j: + %j = phi i64 [ 0, %for.i ], [ %j.inc, %for.j.inc ] + br label %for.k + +for.k: + %k = phi i64 [ 0, %for.j ], [ %k.inc, %for.k.inc ] + %offset0 = add nsw i64 %i, 3 + %subscript0 = mul i64 %offset0, %m + %offset1 = add nsw i64 %j, -4 + %subscript1 = add i64 %offset1, %subscript0 + %subscript2 = mul i64 %subscript1, %o + %offset2 = add nsw i64 %k, 7 + %subscript = add i64 %subscript2, %offset2 + %idx = getelementptr inbounds double* %A, i64 %subscript + store double 1.0, double* %idx + br label %for.k.inc + +for.k.inc: + %k.inc = add nsw i64 %k, 1 + %k.exitcond = icmp eq i64 %k.inc, %o + br i1 %k.exitcond, label %for.j.inc, label %for.k + +for.j.inc: + %j.inc = add nsw i64 %j, 1 + %j.exitcond = icmp eq i64 %j.inc, %m + br i1 %j.exitcond, label %for.i.inc, label %for.j + +for.i.inc: + %i.inc = add nsw i64 %i, 1 + %i.exitcond = icmp eq i64 %i.inc, %n + br i1 %i.exitcond, label %end, label %for.i + +end: + ret void +} diff --git a/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll b/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll new file mode 100644 index 0000000..a1e779f --- /dev/null +++ b/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll @@ -0,0 +1,72 @@ +; RUN: opt < %s -analyze -delinearize | FileCheck %s + +; void foo(long n, long m, long o, long p, double A[n][m][o+p]) { +; +; for (long i = 0; i < n; i++) +; for (long j = 0; j < m; j++) +; for (long k = 0; k < o; k++) +; A[i+3][j-4][k+7] = 1.0; +; } + +; AddRec: {{{(56 + (8 * (-4 + (3 * %m)) * (%o + %p)) + %A),+,(8 * (%o + %p) * %m)}<%for.cond4.preheader.lr.ph.us>,+,(8 * (%o + %p))}<%for.body6.lr.ph.us.us>,+,8}<%for.body6.us.us> +; CHECK: Base offset: %A +; CHECK: ArrayDecl[UnknownSize][%m][(%o + %p)] with elements of sizeof(double) bytes. +; CHECK: ArrayRef[{3,+,1}<nw><%for.cond4.preheader.lr.ph.us>][{-4,+,1}<nw><%for.body6.lr.ph.us.us>][{7,+,1}<nw><%for.body6.us.us>] + +; AddRec: {{(48 + (8 * %o) + (8 * (-4 + (3 * %m)) * (%o + %p)) + %A),+,(8 * (%o + %p) * %m)}<%for.cond4.preheader.lr.ph.us>,+,(8 * (%o + %p))}<%for.body6.lr.ph.us.us> +; CHECK: Base offset: %A +; CHECK: ArrayDecl[UnknownSize][(%o + %p)] with elements of sizeof(double) bytes. +; CHECK: ArrayRef[{(-4 + (3 * %m)),+,%m}<%for.cond4.preheader.lr.ph.us>][{(6 + %o),+,(%o + %p)}<%for.body6.lr.ph.us.us>] + +; AddRec: {(48 + (8 * %o) + ((-40 + (32 * %m)) * (%o + %p)) + %A),+,(8 * (%o + %p) * %m)}<%for.cond4.preheader.lr.ph.us> +; CHECK: Base offset: %A +; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(double) bytes. +; CHECK: ArrayRef[{(6 + ((-5 + (4 * %m)) * (%o + %p)) + %o),+,((%o + %p) * %m)}<%for.cond4.preheader.lr.ph.us>] + +define void @foo(i64 %n, i64 %m, i64 %o, i64 %p, double* nocapture %A) nounwind uwtable { +entry: + %add = add nsw i64 %p, %o + %cmp22 = icmp sgt i64 %n, 0 + br i1 %cmp22, label %for.cond1.preheader.lr.ph, label %for.end16 + +for.cond1.preheader.lr.ph: ; preds = %entry + %cmp220 = icmp sgt i64 %m, 0 + %cmp518 = icmp sgt i64 %o, 0 + br i1 %cmp220, label %for.cond4.preheader.lr.ph.us, label %for.end16 + +for.inc14.us: ; preds = %for.cond4.preheader.lr.ph.us, %for.inc11.us.us + %inc15.us = add nsw i64 %i.023.us, 1 + %exitcond43 = icmp eq i64 %inc15.us, %n + br i1 %exitcond43, label %for.end16, label %for.cond4.preheader.lr.ph.us + +for.cond4.preheader.lr.ph.us: ; preds = %for.inc14.us, %for.cond1.preheader.lr.ph + %i.023.us = phi i64 [ %inc15.us, %for.inc14.us ], [ 0, %for.cond1.preheader.lr.ph ] + %add8.us = add nsw i64 %i.023.us, 3 + %0 = mul i64 %add8.us, %m + %sub.us = add i64 %0, -4 + br i1 %cmp518, label %for.body6.lr.ph.us.us, label %for.inc14.us + +for.inc11.us.us: ; preds = %for.body6.us.us + %inc12.us.us = add nsw i64 %j.021.us.us, 1 + %exitcond42 = icmp eq i64 %inc12.us.us, %m + br i1 %exitcond42, label %for.inc14.us, label %for.body6.lr.ph.us.us + +for.body6.lr.ph.us.us: ; preds = %for.cond4.preheader.lr.ph.us, %for.inc11.us.us + %j.021.us.us = phi i64 [ %inc12.us.us, %for.inc11.us.us ], [ 0, %for.cond4.preheader.lr.ph.us ] + %tmp.us.us = add i64 %sub.us, %j.021.us.us + %tmp17.us.us = mul i64 %tmp.us.us, %add + br label %for.body6.us.us + +for.body6.us.us: ; preds = %for.body6.us.us, %for.body6.lr.ph.us.us + %k.019.us.us = phi i64 [ 0, %for.body6.lr.ph.us.us ], [ %inc.us.us, %for.body6.us.us ] + %arrayidx.sum.us.us = add i64 %k.019.us.us, 7 + %arrayidx9.sum.us.us = add i64 %arrayidx.sum.us.us, %tmp17.us.us + %arrayidx10.us.us = getelementptr inbounds double* %A, i64 %arrayidx9.sum.us.us + store double 1.000000e+00, double* %arrayidx10.us.us, align 8 + %inc.us.us = add nsw i64 %k.019.us.us, 1 + %exitcond = icmp eq i64 %inc.us.us, %o + br i1 %exitcond, label %for.inc11.us.us, label %for.body6.us.us + +for.end16: ; preds = %for.cond1.preheader.lr.ph, %for.inc14.us, %entry + ret void +} diff --git a/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll b/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll new file mode 100644 index 0000000..a52a4c9 --- /dev/null +++ b/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll @@ -0,0 +1,68 @@ +; RUN: opt < %s -analyze -delinearize | FileCheck %s + +; void foo(long n, long m, long o, double A[n][m][o], long p, long q, long r) { +; +; for (long i = 0; i < n; i++) +; for (long j = 0; j < m; j++) +; for (long k = 0; k < o; k++) +; A[i+p][j+q][k+r] = 1.0; +; } + +; AddRec: {{{((8 * ((((%m * %p) + %q) * %o) + %r)) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k> +; CHECK: Base offset: %A +; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes. +; CHECK: ArrayRef[{%p,+,1}<nw><%for.i>][{%q,+,1}<nw><%for.j>][{%r,+,1}<nw><%for.k>] + +; AddRec: {{(-8 + (8 * ((((%m * %p) + %q) * %o) + %r)) + (8 * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j> +; CHECK: Base offset: %A +; CHECK: ArrayDecl[UnknownSize][%o] with elements of sizeof(double) bytes. +; CHECK: ArrayRef[{(1 + (%m * %p) + %q),+,%m}<%for.i>][{(-1 + %r),+,%o}<%for.j>] + +; AddRec: {(-8 + (8 * ((((%m * %p) + %q) * %o) + %r)) + (8 * %m * %o) + %A),+,(8 * %m * %o)}<%for.i> +; CHECK: Base offset: %A +; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(double) bytes. +; CHECK: ArrayRef[{(-1 + ((((1 + %p) * %m) + %q) * %o) + %r),+,(%m * %o)}<%for.i>] + +define void @foo(i64 %n, i64 %m, i64 %o, double* %A, i64 %p, i64 %q, i64 %r) { +entry: + br label %for.i + +for.i: + %i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.inc ] + br label %for.j + +for.j: + %j = phi i64 [ 0, %for.i ], [ %j.inc, %for.j.inc ] + br label %for.k + +for.k: + %k = phi i64 [ 0, %for.j ], [ %k.inc, %for.k.inc ] + %offset0 = add nsw i64 %i, %p + %subscript0 = mul i64 %offset0, %m + %offset1 = add nsw i64 %j, %q + %subscript1 = add i64 %offset1, %subscript0 + %subscript2 = mul i64 %subscript1, %o + %offset2 = add nsw i64 %k, %r + %subscript = add i64 %subscript2, %offset2 + %idx = getelementptr inbounds double* %A, i64 %subscript + store double 1.0, double* %idx + br label %for.k.inc + +for.k.inc: + %k.inc = add nsw i64 %k, 1 + %k.exitcond = icmp eq i64 %k.inc, %o + br i1 %k.exitcond, label %for.j.inc, label %for.k + +for.j.inc: + %j.inc = add nsw i64 %j, 1 + %j.exitcond = icmp eq i64 %j.inc, %m + br i1 %j.exitcond, label %for.i.inc, label %for.j + +for.i.inc: + %i.inc = add nsw i64 %i, 1 + %i.exitcond = icmp eq i64 %i.inc, %n + br i1 %i.exitcond, label %end, label %for.i + +end: + ret void +} diff --git a/test/Analysis/Delinearization/multidim_only_ivs_2d.ll b/test/Analysis/Delinearization/multidim_only_ivs_2d.ll new file mode 100644 index 0000000..d68a158 --- /dev/null +++ b/test/Analysis/Delinearization/multidim_only_ivs_2d.ll @@ -0,0 +1,46 @@ +; RUN: opt < %s -analyze -delinearize | FileCheck %s + +; Derived from the following code: +; +; void foo(long n, long m, double A[n][m]) { +; for (long i = 0; i < n; i++) +; for (long j = 0; j < m; j++) +; A[i][j] = 1.0; +; } + +; AddRec: {{%A,+,(8 * %m)}<%for.i>,+,8}<%for.j> +; CHECK: Base offset: %A +; CHECK: ArrayDecl[UnknownSize][%m] with elements of sizeof(double) bytes. +; CHECK: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>] + +; AddRec: {(-8 + (8 * %m) + %A),+,(8 * %m)}<%for.i> +; CHECK: Base offset: %A +; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(double) bytes. +; CHECK: ArrayRef[{(-1 + %m),+,%m}<%for.i>] + +define void @foo(i64 %n, i64 %m, double* %A) { +entry: + br label %for.i + +for.i: + %i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.inc ] + %tmp = mul nsw i64 %i, %m + br label %for.j + +for.j: + %j = phi i64 [ 0, %for.i ], [ %j.inc, %for.j ] + %vlaarrayidx.sum = add i64 %j, %tmp + %arrayidx = getelementptr inbounds double* %A, i64 %vlaarrayidx.sum + store double 1.0, double* %arrayidx + %j.inc = add nsw i64 %j, 1 + %j.exitcond = icmp eq i64 %j.inc, %m + br i1 %j.exitcond, label %for.i.inc, label %for.j + +for.i.inc: + %i.inc = add nsw i64 %i, 1 + %i.exitcond = icmp eq i64 %i.inc, %n + br i1 %i.exitcond, label %end, label %for.i + +end: + ret void +} diff --git a/test/Analysis/Delinearization/multidim_only_ivs_2d_nested.ll b/test/Analysis/Delinearization/multidim_only_ivs_2d_nested.ll new file mode 100644 index 0000000..7207420 --- /dev/null +++ b/test/Analysis/Delinearization/multidim_only_ivs_2d_nested.ll @@ -0,0 +1,78 @@ +; RUN: opt < %s -analyze -delinearize | FileCheck %s + +; extern void bar(long n, long m, double A[n][m]); +; +; void foo(long a, long b) { +; for (long n = 1; n < a; ++n) +; for (long m = 1; m < b; ++m) { +; double A[n][m]; +; for (long i = 0; i < n; i++) +; for (long j = 0; j < m; j++) +; A[i][j] = 1.0; +; bar(n, m, A); +; } +; } + +; AddRec: {{%vla.us,+,{8,+,8}<%for.cond7.preheader.lr.ph.split.us.us>}<%for.body9.lr.ph.us.us>,+,8}<%for.body9.us.us> +; CHECK: Base offset: %vla.us +; CHECK: ArrayDecl[UnknownSize][{1,+,1}<%for.cond7.preheader.lr.ph.split.us.us>] with elements of sizeof(double) bytes. +; CHECK: ArrayRef[{0,+,1}<nuw><nsw><%for.body9.lr.ph.us.us>][{0,+,1}<nuw><nsw><%for.body9.us.us>] + +define void @foo(i64 %a, i64 %b) nounwind uwtable { +entry: + %cmp43 = icmp sgt i64 %a, 1 + br i1 %cmp43, label %for.cond1.preheader.lr.ph, label %for.end19 + +for.cond1.preheader.lr.ph: ; preds = %entry + %cmp224 = icmp sgt i64 %b, 1 + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.inc17, %for.cond1.preheader.lr.ph + %indvars.iv51 = phi i64 [ 1, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next52, %for.inc17 ] + br i1 %cmp224, label %for.cond7.preheader.lr.ph.split.us.us, label %for.inc17 + +for.end13.us: ; preds = %for.inc11.us.us + call void @bar(i64 %indvars.iv51, i64 %indvars.iv48, double* %vla.us) nounwind + call void @llvm.stackrestore(i8* %1) + %indvars.iv.next49 = add i64 %indvars.iv48, 1 + %exitcond54 = icmp eq i64 %indvars.iv.next49, %b + br i1 %exitcond54, label %for.inc17, label %for.cond7.preheader.lr.ph.split.us.us + +for.inc11.us.us: ; preds = %for.body9.us.us + %inc12.us.us = add nsw i64 %i.023.us.us, 1 + %exitcond53 = icmp eq i64 %inc12.us.us, %indvars.iv51 + br i1 %exitcond53, label %for.end13.us, label %for.body9.lr.ph.us.us + +for.body9.lr.ph.us.us: ; preds = %for.cond7.preheader.lr.ph.split.us.us, %for.inc11.us.us + %i.023.us.us = phi i64 [ 0, %for.cond7.preheader.lr.ph.split.us.us ], [ %inc12.us.us, %for.inc11.us.us ] + %0 = mul nsw i64 %i.023.us.us, %indvars.iv48 + br label %for.body9.us.us + +for.body9.us.us: ; preds = %for.body9.us.us, %for.body9.lr.ph.us.us + %j.021.us.us = phi i64 [ 0, %for.body9.lr.ph.us.us ], [ %inc.us.us, %for.body9.us.us ] + %arrayidx.sum.us.us = add i64 %j.021.us.us, %0 + %arrayidx10.us.us = getelementptr inbounds double* %vla.us, i64 %arrayidx.sum.us.us + store double 1.000000e+00, double* %arrayidx10.us.us, align 8 + %inc.us.us = add nsw i64 %j.021.us.us, 1 + %exitcond50 = icmp eq i64 %inc.us.us, %indvars.iv48 + br i1 %exitcond50, label %for.inc11.us.us, label %for.body9.us.us + +for.cond7.preheader.lr.ph.split.us.us: ; preds = %for.cond1.preheader, %for.end13.us + %indvars.iv48 = phi i64 [ %indvars.iv.next49, %for.end13.us ], [ 1, %for.cond1.preheader ] + %1 = call i8* @llvm.stacksave() + %2 = mul nuw i64 %indvars.iv48, %indvars.iv51 + %vla.us = alloca double, i64 %2, align 16 + br label %for.body9.lr.ph.us.us + +for.inc17: ; preds = %for.end13.us, %for.cond1.preheader + %indvars.iv.next52 = add i64 %indvars.iv51, 1 + %exitcond55 = icmp eq i64 %indvars.iv.next52, %a + br i1 %exitcond55, label %for.end19, label %for.cond1.preheader + +for.end19: ; preds = %for.inc17, %entry + ret void +} + +declare i8* @llvm.stacksave() nounwind +declare void @bar(i64, i64, double*) +declare void @llvm.stackrestore(i8*) nounwind diff --git a/test/Analysis/Delinearization/multidim_only_ivs_3d.ll b/test/Analysis/Delinearization/multidim_only_ivs_3d.ll new file mode 100644 index 0000000..24f9583 --- /dev/null +++ b/test/Analysis/Delinearization/multidim_only_ivs_3d.ll @@ -0,0 +1,65 @@ +; RUN: opt < %s -analyze -delinearize | FileCheck %s + +; void foo(long n, long m, long o, double A[n][m][o]) { +; +; for (long i = 0; i < n; i++) +; for (long j = 0; j < m; j++) +; for (long k = 0; k < o; k++) +; A[i][j][k] = 1.0; +; } + +; AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k> +; CHECK: Base offset: %A +; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes. +; CHECK: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>][{0,+,1}<nuw><nsw><%for.k>] + +; AddRec: {{(-8 + (8 * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j> +; CHECK: Base offset: %A +; CHECK: ArrayDecl[UnknownSize][(%m * %o)] with elements of sizeof(double) bytes. +; CHECK: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{(-1 + %o),+,%o}<%for.j>] + +; AddRec: {(-8 + (8 * %m * %o) + %A),+,(8 * %m * %o)}<%for.i> +; CHECK: Base offset: %A +; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(double) bytes. +; CHECK: ArrayRef[{(-1 + (%m * %o)),+,(%m * %o)}<%for.i>] + +define void @foo(i64 %n, i64 %m, i64 %o, double* %A) { +entry: + br label %for.i + +for.i: + %i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.inc ] + br label %for.j + +for.j: + %j = phi i64 [ 0, %for.i ], [ %j.inc, %for.j.inc ] + br label %for.k + +for.k: + %k = phi i64 [ 0, %for.j ], [ %k.inc, %for.k.inc ] + %subscript0 = mul i64 %i, %m + %subscript1 = add i64 %j, %subscript0 + %subscript2 = mul i64 %subscript1, %o + %subscript = add i64 %subscript2, %k + %idx = getelementptr inbounds double* %A, i64 %subscript + store double 1.0, double* %idx + br label %for.k.inc + +for.k.inc: + %k.inc = add nsw i64 %k, 1 + %k.exitcond = icmp eq i64 %k.inc, %o + br i1 %k.exitcond, label %for.j.inc, label %for.k + +for.j.inc: + %j.inc = add nsw i64 %j, 1 + %j.exitcond = icmp eq i64 %j.inc, %m + br i1 %j.exitcond, label %for.i.inc, label %for.j + +for.i.inc: + %i.inc = add nsw i64 %i, 1 + %i.exitcond = icmp eq i64 %i.inc, %n + br i1 %i.exitcond, label %end, label %for.i + +end: + ret void +} diff --git a/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll b/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll new file mode 100644 index 0000000..e151610 --- /dev/null +++ b/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll @@ -0,0 +1,75 @@ +; RUN: opt < %s -analyze -delinearize | FileCheck %s +; void foo(int n, int m, int o, double A[n][m][o]) { +; +; for (int i = 0; i < n; i++) +; for (int j = 0; j < m; j++) +; for (int k = 0; k < o; k++) +; A[i][j][k] = 1.0; +; } + +; AddRec: {{{%A,+,(8 * (zext i32 %m to i64) * (zext i32 %o to i64))}<%for.i>,+,(8 * (zext i32 %o to i64))}<%for.j>,+,8}<%for.k> +; CHECK: Base offset: %A +; CHECK: ArrayDecl[UnknownSize][(zext i32 %m to i64)][(zext i32 %o to i64)] with elements of 8 bytes. +; CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>] + +; AddRec: {{((8 * (zext i32 (-1 + %o) to i64)) + %A),+,(8 * (zext i32 %m to i64) * (zext i32 %o to i64))}<%for.i>,+,(8 * (zext i32 %o to i64))}<%for.j> +; CHECK: Base offset: %A +; CHECK: ArrayDecl[UnknownSize][((zext i32 %m to i64) * (zext i32 %o to i64))] with elements of 8 bytes. +; CHECK: ArrayRef[{0,+,1}<%for.i>][{(zext i32 (-1 + %o) to i64),+,(zext i32 %o to i64)}<%for.j>] + +; AddRec: {((8 * (zext i32 (-1 + %o) to i64)) + (8 * (zext i32 (-1 + %m) to i64) * (zext i32 %o to i64)) + %A),+,(8 * (zext i32 %m to i64) * (zext i32 %o to i64))}<%for.i> +; CHECK: Base offset: %A +; CHECK: ArrayDecl[UnknownSize] with elements of 8 bytes. +; CHECK: ArrayRef[{((zext i32 (-1 + %o) to i64) + ((zext i32 (-1 + %m) to i64) * (zext i32 %o to i64))),+,((zext i32 %m to i64) * (zext i32 %o to i64))}<%for.i>] + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @foo(i32 %n, i32 %m, i32 %o, double* %A) { +entry: + %m_zext = zext i32 %m to i64 + %n_zext = zext i32 %o to i64 + br label %for.i + +for.i: + %i = phi i64 [ %i.inc, %for.i.inc ], [ 0, %entry ] + br label %for.j + +for.j: + %j = phi i64 [ %j.inc, %for.j.inc ], [ 0, %for.i ] + br label %for.k + +for.k: + %k = phi i64 [ %k.inc, %for.k.inc ], [ 0, %for.j ] + %tmp = mul i64 %i, %m_zext + %tmp1 = trunc i64 %j to i32 + %tmp2 = trunc i64 %i to i32 + %mul.us.us = mul nsw i32 %tmp1, %tmp2 + %tmp.us.us = add i64 %j, %tmp + %tmp17.us.us = mul i64 %tmp.us.us, %n_zext + %subscript = add i64 %tmp17.us.us, %k + %idx = getelementptr inbounds double* %A, i64 %subscript + store double 1.0, double* %idx + br label %for.k.inc + +for.k.inc: + %k.inc = add i64 %k, 1 + %k.inc.trunc = trunc i64 %k.inc to i32 + %k.exitcond = icmp eq i32 %k.inc.trunc, %o + br i1 %k.exitcond, label %for.j.inc, label %for.k + +for.j.inc: + %j.inc = add i64 %j, 1 + %j.inc.trunc = trunc i64 %j.inc to i32 + %j.exitcond = icmp eq i32 %j.inc.trunc, %m + br i1 %j.exitcond, label %for.i.inc, label %for.j + +for.i.inc: + %i.inc = add i64 %i, 1 + %i.inc.trunc = trunc i64 %i.inc to i32 + %i.exitcond = icmp eq i32 %i.inc.trunc, %n + br i1 %i.exitcond, label %end, label %for.i + +end: + ret void +} |