diff options
-rw-r--r-- | include/llvm/Analysis/Passes.h | 8 | ||||
-rw-r--r-- | include/llvm/Analysis/ProfileDataLoader.h | 148 | ||||
-rw-r--r-- | include/llvm/InitializePasses.h | 1 | ||||
-rw-r--r-- | include/llvm/LinkAllPasses.h | 1 | ||||
-rw-r--r-- | lib/Analysis/Analysis.cpp | 1 | ||||
-rw-r--r-- | lib/Analysis/CMakeLists.txt | 2 | ||||
-rw-r--r-- | lib/Analysis/ProfileDataLoader.cpp | 186 | ||||
-rw-r--r-- | lib/Analysis/ProfileDataLoaderPass.cpp | 188 | ||||
-rw-r--r-- | lib/Analysis/ProfileInfo.cpp | 26 | ||||
-rw-r--r-- | test/Analysis/Profiling/load-branch-weights-ifs.ll | 119 | ||||
-rw-r--r-- | test/Analysis/Profiling/load-branch-weights-loops.ll | 185 | ||||
-rw-r--r-- | test/Analysis/Profiling/load-branch-weights-switches.ll | 162 |
12 files changed, 1001 insertions, 26 deletions
diff --git a/include/llvm/Analysis/Passes.h b/include/llvm/Analysis/Passes.h index a22bd12..c52f846 100644 --- a/include/llvm/Analysis/Passes.h +++ b/include/llvm/Analysis/Passes.h @@ -103,6 +103,14 @@ namespace llvm { //===--------------------------------------------------------------------===// // + // createProfileMetadataLoaderPass - This pass loads information from a + // profile dump file and sets branch weight metadata. + // + ModulePass *createProfileMetadataLoaderPass(); + extern char &ProfileMetadataLoaderPassID; + + //===--------------------------------------------------------------------===// + // // createNoProfileInfoPass - This pass implements the default "no profile". // ImmutablePass *createNoProfileInfoPass(); diff --git a/include/llvm/Analysis/ProfileDataLoader.h b/include/llvm/Analysis/ProfileDataLoader.h new file mode 100644 index 0000000..3d15bda --- /dev/null +++ b/include/llvm/Analysis/ProfileDataLoader.h @@ -0,0 +1,148 @@ +//===- ProfileDataLoader.h - Load & convert profile info ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The ProfileDataLoader class is used to load profiling data from a dump file. +// The ProfileDataT<FType, BType> class is used to store the mapping of this +// data to control flow edges. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_PROFILEDATALOADER_H +#define LLVM_ANALYSIS_PROFILEDATALOADER_H + +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include <vector> +#include <string> +#include <map> + +namespace llvm { + +class ModulePass; +class Function; +class BasicBlock; + +// Helpers for dumping edges to dbgs(). +raw_ostream& operator<<(raw_ostream &O, std::pair<const BasicBlock *, + const BasicBlock *> E); +raw_ostream& operator<<(raw_ostream &O, const BasicBlock *BB); +raw_ostream& operator<<(raw_ostream &O, const Function *F); + +/// \brief The ProfileDataT<FType, BType> class is used to store the mapping of +/// profiling data to control flow edges. +/// +/// An edge is defined by its source and sink basic blocks. +template<class FType, class BType> +class ProfileDataT { + public: + // The profiling information defines an Edge by its source and sink basic + // blocks. + typedef std::pair<const BType*, const BType*> Edge; + + private: + typedef std::map<Edge, unsigned> EdgeWeights; + + /// \brief Count the number of times a transition between two blocks is + /// executed. + /// + /// As a special case, we also hold an edge from the null BasicBlock to the + /// entry block to indicate how many times the function was entered. + std::map<const FType*, EdgeWeights> EdgeInformation; + + public: + static char ID; // Class identification, replacement for typeinfo + ProfileDataT() {}; + ~ProfileDataT() {}; + + /// getFunction() - Returns the Function for an Edge. + static const FType* getFunction(Edge e) { + // e.first may be NULL + assert( ((!e.first) || (e.first->getParent() == e.second->getParent())) + && "A ProfileData::Edge can not be between two functions"); + assert(e.second && "A ProfileData::Edge must have a real sink"); + return e.second->getParent(); + } + + /// getEdge() - Creates an Edge between two BasicBlocks. + static Edge getEdge(const BType *Src, const BType *Dest) { + return std::make_pair(Src, Dest); + } + + /// getEdgeWeight - Return the number of times that a given edge was + /// executed. + unsigned getEdgeWeight(Edge e) const { + const FType *f = getFunction(e); + assert( (EdgeInformation.find(f) != EdgeInformation.end()) + && "No profiling information for function"); + EdgeWeights weights = EdgeInformation.find(f)->second; + + assert( (weights.find(e) != weights.end()) + && "No profiling information for edge"); + return weights.find(e)->second; + } + + /// addEdgeWeight - Add 'weight' to the already stored execution count for + /// this edge. + void addEdgeWeight(Edge e, unsigned weight) { + EdgeInformation[getFunction(e)][e] += weight; + } +}; + +typedef ProfileDataT<Function, BasicBlock> ProfileData; +//typedef ProfileDataT<MachineFunction, MachineBasicBlock> MachineProfileData; + +/// The ProfileDataLoader class is used to load raw profiling data from the +/// dump file. +class ProfileDataLoader { +private: + /// The name of the file where the raw profiling data is stored. + const std::string &Filename; + + /// A vector of the command line arguments used when the target program was + /// run to generate profiling data. One entry per program run. + std::vector<std::string> CommandLines; + + /// The raw values for how many times each edge was traversed, values from + /// multiple program runs are accumulated. + std::vector<unsigned> EdgeCounts; + +public: + /// ProfileDataLoader ctor - Read the specified profiling data file, exiting + /// the program if the file is invalid or broken. + ProfileDataLoader(const char *ToolName, const std::string &Filename); + + /// A special value used to represent the weight of an edge which has not + /// been counted yet. + static const unsigned Uncounted; + + /// The maximum value that can be stored in a profiling counter. + static const unsigned MaxCount; + + /// getNumExecutions - Return the number of times the target program was run + /// to generate this profiling data. + unsigned getNumExecutions() const { return CommandLines.size(); } + + /// getExecution - Return the command line parameters used to generate the + /// i'th set of profiling data. + const std::string& getExecution(unsigned i) const { return CommandLines[i]; } + + const std::string& getFileName() const { return Filename; } + + /// getRawEdgeCounts - Return the raw profiling data, this is just a list of + /// numbers with no mappings to edges. + const std::vector<unsigned>& getRawEdgeCounts() const { return EdgeCounts; } +}; + +/// createProfileMetadataLoaderPass - This function returns a Pass that loads +/// the profiling information for the module from the specified filename. +ModulePass *createProfileMetadataLoaderPass(const std::string &Filename); + +} // End llvm namespace + +#endif diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index de97957..9943113 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -141,6 +141,7 @@ void initializeLiveRegMatrixPass(PassRegistry&); void initializeLiveStacksPass(PassRegistry&); void initializeLiveVariablesPass(PassRegistry&); void initializeLoaderPassPass(PassRegistry&); +void initializeProfileMetadataLoaderPassPass(PassRegistry&); void initializePathProfileLoaderPassPass(PassRegistry&); void initializeLocalStackSlotPassPass(PassRegistry&); void initializeLoopDeletionPass(PassRegistry&); diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h index 697c94c..fe4c92a 100644 --- a/include/llvm/LinkAllPasses.h +++ b/include/llvm/LinkAllPasses.h @@ -107,6 +107,7 @@ namespace { (void) llvm::createProfileVerifierPass(); (void) llvm::createPathProfileVerifierPass(); (void) llvm::createProfileLoaderPass(); + (void) llvm::createProfileMetadataLoaderPass(); (void) llvm::createPathProfileLoaderPass(); (void) llvm::createPromoteMemoryToRegisterPass(); (void) llvm::createDemoteRegisterToMemoryPass(); diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index 0ba6af9..87a75fd 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -61,6 +61,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializePathProfileLoaderPassPass(Registry); initializeProfileVerifierPassPass(Registry); initializePathProfileVerifierPass(Registry); + initializeProfileMetadataLoaderPassPass(Registry); initializeRegionInfoPass(Registry); initializeRegionViewerPass(Registry); initializeRegionPrinterPass(Registry); diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 96e68b4..e461848 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -44,6 +44,8 @@ add_llvm_library(LLVMAnalysis ProfileInfoLoader.cpp ProfileInfoLoaderPass.cpp ProfileVerifierPass.cpp + ProfileDataLoader.cpp + ProfileDataLoaderPass.cpp RegionInfo.cpp RegionPass.cpp RegionPrinter.cpp diff --git a/lib/Analysis/ProfileDataLoader.cpp b/lib/Analysis/ProfileDataLoader.cpp new file mode 100644 index 0000000..0006492 --- /dev/null +++ b/lib/Analysis/ProfileDataLoader.cpp @@ -0,0 +1,186 @@ +//===- ProfileDataLoader.cpp - Load profile information from disk ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The ProfileDataLoader class is used to load raw profiling data from the dump +// file. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Module.h" +#include "llvm/InstrTypes.h" +#include "llvm/Analysis/ProfileDataLoader.h" +#include "llvm/Analysis/ProfileDataTypes.h" +#include "llvm/Support/raw_ostream.h" +#include <cstdio> +#include <cstdlib> +using namespace llvm; + +namespace llvm { + +template<> +char ProfileDataT<Function,BasicBlock>::ID = 0; + +raw_ostream& operator<<(raw_ostream &O, const Function *F) { + return O << F->getName(); +} + +raw_ostream& operator<<(raw_ostream &O, const BasicBlock *BB) { + return O << BB->getName(); +} + +raw_ostream& operator<<(raw_ostream &O, std::pair<const BasicBlock *, const BasicBlock *> E) { + O << "("; + + if (E.first) + O << E.first; + else + O << "0"; + + O << ","; + + if (E.second) + O << E.second; + else + O << "0"; + + return O << ")"; +} + +} // namespace llvm + +/// ByteSwap - Byteswap 'Var' if 'Really' is true. Required when the compiler +/// host and target have different endianness. +static inline unsigned ByteSwap(unsigned Var, bool Really) { + if (!Really) return Var; + return ((Var & (255U<< 0U)) << 24U) | + ((Var & (255U<< 8U)) << 8U) | + ((Var & (255U<<16U)) >> 8U) | + ((Var & (255U<<24U)) >> 24U); +} + +/// AddCounts - Add 'A' and 'B', accounting for the fact that the value of one +/// (or both) may not be defined. +static unsigned AddCounts(unsigned A, unsigned B) { + // If either value is undefined, use the other. + // Undefined + undefined = undefined. + if (A == ProfileDataLoader::Uncounted) return B; + if (B == ProfileDataLoader::Uncounted) return A; + + // Saturate to the maximum storable value. This could change taken/nottaken + // ratios, but is presumably better than wrapping and thus potentially + // inverting ratios. + unsigned long long tmp = (unsigned long long)A + (unsigned long long)B; + if (tmp > (unsigned long long)ProfileDataLoader::MaxCount) + tmp = ProfileDataLoader::MaxCount; + return (unsigned)tmp; +} + +/// ReadProfilingData - Load 'NumEntries' items of type 'T' from file 'F' +template <typename T> +static void ReadProfilingData(const char *ToolName, FILE *F, + std::vector<T> &Data, size_t NumEntries) { + // Read in the block of data... + if (fread(&Data[0], sizeof(T), NumEntries, F) != NumEntries) { + errs() << ToolName << ": profiling data truncated!\n"; + perror(0); + exit(1); + } +} + +/// ReadProfilingNumEntries - Read how many entries are in this profiling data +/// packet. +static unsigned ReadProfilingNumEntries(const char *ToolName, FILE *F, + bool ShouldByteSwap) { + std::vector<unsigned> NumEntries(1); + ReadProfilingData<unsigned>(ToolName, F, NumEntries, 1); + return ByteSwap(NumEntries[0], ShouldByteSwap); +} + +/// ReadProfilingBlock - Read the number of entries in the next profiling data +/// packet and then accumulate the entries into 'Data'. +static void ReadProfilingBlock(const char *ToolName, FILE *F, + bool ShouldByteSwap, + std::vector<unsigned> &Data) { + // Read the number of entries... + unsigned NumEntries = ReadProfilingNumEntries(ToolName, F, ShouldByteSwap); + + // Read in the data. + std::vector<unsigned> TempSpace(NumEntries); + ReadProfilingData<unsigned>(ToolName, F, TempSpace, (size_t)NumEntries); + + // Make sure we have enough space ... + if (Data.size() < NumEntries) + Data.resize(NumEntries, ProfileDataLoader::Uncounted); + + // Accumulate the data we just read into the existing data. + for (unsigned i = 0; i < NumEntries; ++i) { + Data[i] = AddCounts(ByteSwap(TempSpace[i], ShouldByteSwap), Data[i]); + } +} + +/// ReadProfilingArgBlock - Read the command line arguments that the progam was +/// run with when the current profiling data packet(s) were generated. +static void ReadProfilingArgBlock(const char *ToolName, FILE *F, + bool ShouldByteSwap, + std::vector<std::string> &CommandLines) { + // Read the number of bytes ... + unsigned ArgLength = ReadProfilingNumEntries(ToolName, F, ShouldByteSwap); + + // Read in the arguments (if there are any to read). Round up the length to + // the nearest 4-byte multiple. + std::vector<char> Args(ArgLength+4); + if (ArgLength) + ReadProfilingData<char>(ToolName, F, Args, (ArgLength+3) & ~3); + + // Store the arguments. + CommandLines.push_back(std::string(&Args[0], &Args[ArgLength])); +} + +const unsigned ProfileDataLoader::Uncounted = ~0U; +const unsigned ProfileDataLoader::MaxCount = ~0U - 1U; + +/// ProfileDataLoader ctor - Read the specified profiling data file, exiting +/// the program if the file is invalid or broken. +ProfileDataLoader::ProfileDataLoader(const char *ToolName, + const std::string &Filename) + : Filename(Filename) { + FILE *F = fopen(Filename.c_str(), "rb"); + if (F == 0) { + errs() << ToolName << ": Error opening '" << Filename << "': "; + perror(0); + exit(1); + } + + // Keep reading packets until we run out of them. + unsigned PacketType; + while (fread(&PacketType, sizeof(unsigned), 1, F) == 1) { + // If the low eight bits of the packet are zero, we must be dealing with an + // endianness mismatch. Byteswap all words read from the profiling + // information. This can happen when the compiler host and target have + // different endianness. + bool ShouldByteSwap = (char)PacketType == 0; + PacketType = ByteSwap(PacketType, ShouldByteSwap); + + switch (PacketType) { + case ArgumentInfo: + ReadProfilingArgBlock(ToolName, F, ShouldByteSwap, CommandLines); + break; + + case EdgeInfo: + ReadProfilingBlock(ToolName, F, ShouldByteSwap, EdgeCounts); + break; + + default: + errs() << ToolName << ": Unknown packet type #" << PacketType << "!\n"; + exit(1); + } + } + + fclose(F); +} diff --git a/lib/Analysis/ProfileDataLoaderPass.cpp b/lib/Analysis/ProfileDataLoaderPass.cpp new file mode 100644 index 0000000..2a61a0b --- /dev/null +++ b/lib/Analysis/ProfileDataLoaderPass.cpp @@ -0,0 +1,188 @@ +//===- ProfileDataLoaderPass.cpp - Set branch weight metadata from prof ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass loads profiling data from a dump file and sets branch weight +// metadata. +// +// TODO: Replace all "profile-metadata-loader" strings with "profile-loader" +// once ProfileInfo etc. has been removed. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "profile-metadata-loader" +#include "llvm/BasicBlock.h" +#include "llvm/InstrTypes.h" +#include "llvm/Module.h" +#include "llvm/LLVMContext.h" +#include "llvm/MDBuilder.h" +#include "llvm/Metadata.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ProfileDataLoader.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Format.h" +#include "llvm/ADT/Statistic.h" +#include <vector> +using namespace llvm; + +STATISTIC(NumEdgesRead, "The # of edges read."); +STATISTIC(NumTermsAnnotated, "The # of terminator instructions annotated."); + +static cl::opt<std::string> +ProfileMetadataFilename("profile-file", cl::init("llvmprof.out"), + cl::value_desc("filename"), + cl::desc("Profile file loaded by -profile-metadata-loader")); + +namespace { + /// This pass loads profiling data from a dump file and sets branch weight + /// metadata. + class ProfileMetadataLoaderPass : public ModulePass { + std::string Filename; + public: + static char ID; // Class identification, replacement for typeinfo + explicit ProfileMetadataLoaderPass(const std::string &filename = "") + : ModulePass(ID), Filename(filename) { + initializeProfileMetadataLoaderPassPass(*PassRegistry::getPassRegistry()); + if (filename.empty()) Filename = ProfileMetadataFilename; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + + virtual const char *getPassName() const { + return "Profile loader"; + } + + virtual void readEdge(unsigned, ProfileData&, ProfileData::Edge, + std::vector<unsigned>&); + virtual unsigned matchEdges(Module&, ProfileData&, std::vector<unsigned>&); + virtual void setBranchWeightMetadata(Module&, ProfileData&); + + virtual bool runOnModule(Module &M); + }; +} // End of anonymous namespace + +char ProfileMetadataLoaderPass::ID = 0; +INITIALIZE_PASS_BEGIN(ProfileMetadataLoaderPass, "profile-metadata-loader", + "Load profile information from llvmprof.out", false, true) +INITIALIZE_PASS_END(ProfileMetadataLoaderPass, "profile-metadata-loader", + "Load profile information from llvmprof.out", false, true) + +char &llvm::ProfileMetadataLoaderPassID = ProfileMetadataLoaderPass::ID; + +/// createProfileMetadataLoaderPass - This function returns a Pass that loads +/// the profiling information for the module from the specified filename, +/// making it available to the optimizers. +ModulePass *llvm::createProfileMetadataLoaderPass() { + return new ProfileMetadataLoaderPass(); +} +ModulePass *llvm::createProfileMetadataLoaderPass(const std::string &Filename) { + return new ProfileMetadataLoaderPass(Filename); +} + +/// readEdge - Take the value from a profile counter and assign it to an edge. +void ProfileMetadataLoaderPass::readEdge(unsigned ReadCount, + ProfileData &PB, ProfileData::Edge e, + std::vector<unsigned> &Counters) { + if (ReadCount < Counters.size()) { + unsigned weight = Counters[ReadCount]; + assert(weight != ProfileDataLoader::Uncounted); + PB.addEdgeWeight(e, weight); + + DEBUG(dbgs() << "-- Read Edge Counter for " << e + << " (# "<< (ReadCount) << "): " + << PB.getEdgeWeight(e) << "\n"); + } +} + +/// matchEdges - Link every profile counter with an edge. +unsigned ProfileMetadataLoaderPass::matchEdges(Module &M, ProfileData &PB, + std::vector<unsigned> &Counters) { + if (Counters.size() == 0) return 0; + + unsigned ReadCount = 0; + + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + DEBUG(dbgs() << "Loading edges in '" << F->getName() << "'\n"); + readEdge(ReadCount++, PB, PB.getEdge(0, &F->getEntryBlock()), Counters); + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) { + readEdge(ReadCount++, PB, PB.getEdge(BB,TI->getSuccessor(s)), + Counters); + } + } + } + + return ReadCount; +} + +/// setBranchWeightMetadata - Translate the counter values associated with each +/// edge into branch weights for each conditional branch (a branch with 2 or +/// more desinations). +void ProfileMetadataLoaderPass::setBranchWeightMetadata(Module &M, + ProfileData &PB) { + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + DEBUG(dbgs() << "Setting branch metadata in '" << F->getName() << "'\n"); + + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + unsigned NumSuccessors = TI->getNumSuccessors(); + + // If there is only one successor then we can not set a branch + // probability as the target is certain. + if (NumSuccessors < 2) continue; + + // Load the weights of all edges leading from this terminator. + DEBUG(dbgs() << "-- Terminator with " << NumSuccessors + << " successors:\n"); + std::vector<uint32_t> Weights(NumSuccessors); + for (unsigned s = 0 ; s < NumSuccessors ; ++s) { + ProfileData::Edge edge = PB.getEdge(BB, TI->getSuccessor(s)); + Weights[s] = (uint32_t)PB.getEdgeWeight(edge); + DEBUG(dbgs() << "---- Edge '" << edge << "' has weight " + << Weights[s] << "\n"); + } + + // Set branch weight metadata. This will set branch probabilities of + // 100%/0% if that is true of the dynamic execution. + // BranchProbabilityInfo can account for this when it loads this metadata + // (it gives the unexectuted branch a weight of 1 for the purposes of + // probability calculations). + MDBuilder MDB(TI->getContext()); + MDNode *Node = MDB.createBranchWeights(Weights); + TI->setMetadata(LLVMContext::MD_prof, Node); + NumTermsAnnotated++; + } + } +} + +bool ProfileMetadataLoaderPass::runOnModule(Module &M) { + ProfileDataLoader PDL("profile-data-loader", Filename); + ProfileData PB; + + std::vector<unsigned> Counters = PDL.getRawEdgeCounts(); + + unsigned ReadCount = matchEdges(M, PB, Counters); + + if (ReadCount != Counters.size()) { + errs() << "WARNING: profile information is inconsistent with " + << "the current program!\n"; + } + NumEdgesRead = ReadCount; + + setBranchWeightMetadata(M, PB); + + return ReadCount > 0; +} diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp index 173de2c..b5b7ac1 100644 --- a/lib/Analysis/ProfileInfo.cpp +++ b/lib/Analysis/ProfileInfo.cpp @@ -1016,40 +1016,14 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) { } } -raw_ostream& operator<<(raw_ostream &O, const Function *F) { - return O << F->getName(); -} - raw_ostream& operator<<(raw_ostream &O, const MachineFunction *MF) { return O << MF->getFunction()->getName() << "(MF)"; } -raw_ostream& operator<<(raw_ostream &O, const BasicBlock *BB) { - return O << BB->getName(); -} - raw_ostream& operator<<(raw_ostream &O, const MachineBasicBlock *MBB) { return O << MBB->getBasicBlock()->getName() << "(MB)"; } -raw_ostream& operator<<(raw_ostream &O, std::pair<const BasicBlock *, const BasicBlock *> E) { - O << "("; - - if (E.first) - O << E.first; - else - O << "0"; - - O << ","; - - if (E.second) - O << E.second; - else - O << "0"; - - return O << ")"; -} - raw_ostream& operator<<(raw_ostream &O, std::pair<const MachineBasicBlock *, const MachineBasicBlock *> E) { O << "("; diff --git a/test/Analysis/Profiling/load-branch-weights-ifs.ll b/test/Analysis/Profiling/load-branch-weights-ifs.ll new file mode 100644 index 0000000..b5ee2f3 --- /dev/null +++ b/test/Analysis/Profiling/load-branch-weights-ifs.ll @@ -0,0 +1,119 @@ +; RUN: opt -insert-edge-profiling -o %t1 < %s +; RUN: rm -f %t1.prof_data +; RUN: lli -load %llvmshlibdir/libprofile_rt%shlibext %t1 \ +; RUN: -llvmprof-output %t1.prof_data +; RUN: opt -profile-file %t1.prof_data -profile-metadata-loader -S -o - < %s \ +; RUN: | FileCheck %s +; RUN: rm -f %t1.prof_data + +;; func_mod - Branch taken 6 times in 7. +define i32 @func_mod(i32 %N) nounwind uwtable { +entry: + %retval = alloca i32, align 4 + %N.addr = alloca i32, align 4 + store i32 %N, i32* %N.addr, align 4 + %0 = load i32* %N.addr, align 4 + %rem = srem i32 %0, 7 + %tobool = icmp ne i32 %rem, 0 + br i1 %tobool, label %if.then, label %if.else +; CHECK: br i1 %tobool, label %if.then, label %if.else, !prof !0 + +if.then: + store i32 1, i32* %retval + br label %return + +if.else: + store i32 0, i32* %retval + br label %return + +return: + %1 = load i32* %retval + ret i32 %1 +} + +;; func_const_true - conditional branch which 100% taken probability. +define i32 @func_const_true(i32 %N) nounwind uwtable { +entry: + %retval = alloca i32, align 4 + %N.addr = alloca i32, align 4 + store i32 %N, i32* %N.addr, align 4 + %0 = load i32* %N.addr, align 4 + %cmp = icmp eq i32 %0, 1 + br i1 %cmp, label %if.then, label %if.end +; CHECK: br i1 %cmp, label %if.then, label %if.end, !prof !1 + +if.then: + store i32 1, i32* %retval + br label %return + +if.end: + store i32 0, i32* %retval + br label %return + +return: + %1 = load i32* %retval + ret i32 %1 +} + +;; func_const_true - conditional branch which 100% not-taken probability. +define i32 @func_const_false(i32 %N) nounwind uwtable { +entry: + %retval = alloca i32, align 4 + %N.addr = alloca i32, align 4 + store i32 %N, i32* %N.addr, align 4 + %0 = load i32* %N.addr, align 4 + %cmp = icmp eq i32 %0, 1 + br i1 %cmp, label %if.then, label %if.end +; CHECK: br i1 %cmp, label %if.then, label %if.end, !prof !2 + +if.then: + store i32 1, i32* %retval + br label %return + +if.end: + store i32 0, i32* %retval + br label %return + +return: + %1 = load i32* %retval + ret i32 %1 +} + +define i32 @main(i32 %argc, i8** %argv) nounwind uwtable { +entry: + %retval = alloca i32, align 4 + %argc.addr = alloca i32, align 4 + %argv.addr = alloca i8**, align 8 + %loop = alloca i32, align 4 + store i32 0, i32* %retval + store i32 0, i32* %loop, align 4 + br label %for.cond + +for.cond: + %0 = load i32* %loop, align 4 + %cmp = icmp slt i32 %0, 7000 + br i1 %cmp, label %for.body, label %for.end +; CHECK: br i1 %cmp, label %for.body, label %for.end, !prof !3 + +for.body: + %1 = load i32* %loop, align 4 + %call = call i32 @func_mod(i32 %1) + br label %for.inc + +for.inc: + %2 = load i32* %loop, align 4 + %inc = add nsw i32 %2, 1 + store i32 %inc, i32* %loop, align 4 + br label %for.cond + +for.end: + %call1 = call i32 @func_const_true(i32 1) + %call2 = call i32 @func_const_false(i32 0) + ret i32 0 +} + +; CHECK: !0 = metadata !{metadata !"branch_weights", i32 6000, i32 1000} +; CHECK: !1 = metadata !{metadata !"branch_weights", i32 1, i32 0} +; CHECK: !2 = metadata !{metadata !"branch_weights", i32 0, i32 1} +; CHECK: !3 = metadata !{metadata !"branch_weights", i32 7000, i32 1} +; CHECK-NOT: !4 diff --git a/test/Analysis/Profiling/load-branch-weights-loops.ll b/test/Analysis/Profiling/load-branch-weights-loops.ll new file mode 100644 index 0000000..26c89b7 --- /dev/null +++ b/test/Analysis/Profiling/load-branch-weights-loops.ll @@ -0,0 +1,185 @@ +; RUN: opt -insert-edge-profiling -o %t1 < %s +; RUN: rm -f %t1.prof_data +; RUN: lli -load %llvmshlibdir/libprofile_rt%shlibext %t1 \ +; RUN: -llvmprof-output %t1.prof_data +; RUN: opt -profile-file %t1.prof_data -profile-metadata-loader -S -o - < %s \ +; RUN: | FileCheck %s +; RUN: rm -f %t1.prof_data + +;; func_for - Test branch probabilities for a vanilla for loop. +define i32 @func_for(i32 %N) nounwind uwtable { +entry: + %N.addr = alloca i32, align 4 + %ret = alloca i32, align 4 + %loop = alloca i32, align 4 + store i32 %N, i32* %N.addr, align 4 + store i32 0, i32* %ret, align 4 + store i32 0, i32* %loop, align 4 + br label %for.cond + +for.cond: + %0 = load i32* %loop, align 4 + %1 = load i32* %N.addr, align 4 + %cmp = icmp slt i32 %0, %1 + br i1 %cmp, label %for.body, label %for.end +; CHECK: br i1 %cmp, label %for.body, label %for.end, !prof !0 + +for.body: + %2 = load i32* %N.addr, align 4 + %3 = load i32* %ret, align 4 + %add = add nsw i32 %3, %2 + store i32 %add, i32* %ret, align 4 + br label %for.inc + +for.inc: + %4 = load i32* %loop, align 4 + %inc = add nsw i32 %4, 1 + store i32 %inc, i32* %loop, align 4 + br label %for.cond + +for.end: + %5 = load i32* %ret, align 4 + ret i32 %5 +} + +;; func_for_odd - Test branch probabilities for a for loop with a continue and +;; a break. +define i32 @func_for_odd(i32 %N) nounwind uwtable { +entry: + %N.addr = alloca i32, align 4 + %ret = alloca i32, align 4 + %loop = alloca i32, align 4 + store i32 %N, i32* %N.addr, align 4 + store i32 0, i32* %ret, align 4 + store i32 0, i32* %loop, align 4 + br label %for.cond + +for.cond: + %0 = load i32* %loop, align 4 + %1 = load i32* %N.addr, align 4 + %cmp = icmp slt i32 %0, %1 + br i1 %cmp, label %for.body, label %for.end +; CHECK: br i1 %cmp, label %for.body, label %for.end, !prof !1 + +for.body: + %2 = load i32* %loop, align 4 + %rem = srem i32 %2, 10 + %tobool = icmp ne i32 %rem, 0 + br i1 %tobool, label %if.then, label %if.end +; CHECK: br i1 %tobool, label %if.then, label %if.end, !prof !2 + +if.then: + br label %for.inc + +if.end: + %3 = load i32* %loop, align 4 + %cmp1 = icmp eq i32 %3, 500 + br i1 %cmp1, label %if.then2, label %if.end3 +; CHECK: br i1 %cmp1, label %if.then2, label %if.end3, !prof !3 + +if.then2: + br label %for.end + +if.end3: + %4 = load i32* %N.addr, align 4 + %5 = load i32* %ret, align 4 + %add = add nsw i32 %5, %4 + store i32 %add, i32* %ret, align 4 + br label %for.inc + +for.inc: + %6 = load i32* %loop, align 4 + %inc = add nsw i32 %6, 1 + store i32 %inc, i32* %loop, align 4 + br label %for.cond + +for.end: + %7 = load i32* %ret, align 4 + ret i32 %7 +} + +;; func_while - Test branch probability in a vanilla while loop. +define i32 @func_while(i32 %N) nounwind uwtable { +entry: + %N.addr = alloca i32, align 4 + %ret = alloca i32, align 4 + %loop = alloca i32, align 4 + store i32 %N, i32* %N.addr, align 4 + store i32 0, i32* %ret, align 4 + store i32 0, i32* %loop, align 4 + br label %while.cond + +while.cond: + %0 = load i32* %loop, align 4 + %1 = load i32* %N.addr, align 4 + %cmp = icmp slt i32 %0, %1 + br i1 %cmp, label %while.body, label %while.end +; CHECK: br i1 %cmp, label %while.body, label %while.end, !prof !0 + +while.body: + %2 = load i32* %N.addr, align 4 + %3 = load i32* %ret, align 4 + %add = add nsw i32 %3, %2 + store i32 %add, i32* %ret, align 4 + %4 = load i32* %loop, align 4 + %inc = add nsw i32 %4, 1 + store i32 %inc, i32* %loop, align 4 + br label %while.cond + +while.end: + %5 = load i32* %ret, align 4 + ret i32 %5 +} + +;; func_while - Test branch probability in a vanilla do-while loop. +define i32 @func_do_while(i32 %N) nounwind uwtable { +entry: + %N.addr = alloca i32, align 4 + %ret = alloca i32, align 4 + %loop = alloca i32, align 4 + store i32 %N, i32* %N.addr, align 4 + store i32 0, i32* %ret, align 4 + store i32 0, i32* %loop, align 4 + br label %do.body + +do.body: + %0 = load i32* %N.addr, align 4 + %1 = load i32* %ret, align 4 + %add = add nsw i32 %1, %0 + store i32 %add, i32* %ret, align 4 + %2 = load i32* %loop, align 4 + %inc = add nsw i32 %2, 1 + store i32 %inc, i32* %loop, align 4 + br label %do.cond + +do.cond: + %3 = load i32* %loop, align 4 + %4 = load i32* %N.addr, align 4 + %cmp = icmp slt i32 %3, %4 + br i1 %cmp, label %do.body, label %do.end +; CHECK: br i1 %cmp, label %do.body, label %do.end, !prof !4 + +do.end: + %5 = load i32* %ret, align 4 + ret i32 %5 +} + +define i32 @main(i32 %argc, i8** %argv) nounwind uwtable { +entry: + %retval = alloca i32, align 4 + %argc.addr = alloca i32, align 4 + %argv.addr = alloca i8**, align 8 + store i32 0, i32* %retval + %call = call i32 @func_for(i32 1000) + %call1 = call i32 @func_for_odd(i32 1000) + %call2 = call i32 @func_while(i32 1000) + %call3 = call i32 @func_do_while(i32 1000) + ret i32 0 +} + +!0 = metadata !{metadata !"branch_weights", i32 1000, i32 1} +!1 = metadata !{metadata !"branch_weights", i32 501, i32 0} +!2 = metadata !{metadata !"branch_weights", i32 450, i32 51} +!3 = metadata !{metadata !"branch_weights", i32 1, i32 50} +!4 = metadata !{metadata !"branch_weights", i32 999, i32 1} +; CHECK-NOT: !5 diff --git a/test/Analysis/Profiling/load-branch-weights-switches.ll b/test/Analysis/Profiling/load-branch-weights-switches.ll new file mode 100644 index 0000000..9efe6d0 --- /dev/null +++ b/test/Analysis/Profiling/load-branch-weights-switches.ll @@ -0,0 +1,162 @@ +; RUN: opt -insert-edge-profiling -o %t1 < %s +; RUN: rm -f %t1.prof_data +; RUN: lli -load %llvmshlibdir/libprofile_rt%shlibext %t1 \ +; RUN: -llvmprof-output %t1.prof_data +; RUN: opt -profile-file %t1.prof_data -profile-metadata-loader -S -o - < %s \ +; RUN: | FileCheck %s +; RUN: rm -f %t1.prof_data + +;; func_switch - Test branch probabilities for a switch instruction with an +;; even chance of taking each case (or no case). +define i32 @func_switch(i32 %N) nounwind uwtable { +entry: + %retval = alloca i32, align 4 + %N.addr = alloca i32, align 4 + store i32 %N, i32* %N.addr, align 4 + %0 = load i32* %N.addr, align 4 + %rem = srem i32 %0, 4 + switch i32 %rem, label %sw.epilog [ + i32 0, label %sw.bb + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 + ] +; CHECK: ], !prof !0 + +sw.bb: + store i32 5, i32* %retval + br label %return + +sw.bb1: + store i32 6, i32* %retval + br label %return + +sw.bb2: + store i32 7, i32* %retval + br label %return + +sw.epilog: + store i32 8, i32* %retval + br label %return + +return: + %1 = load i32* %retval + ret i32 %1 +} + +;; func_switch_switch - Test branch probabilities in a switch-instruction that +;; leads to further switch instructions. The first-tier switch occludes some +;; possibilities in the second-tier switches, leading to some branches having a +;; 0 probability. +define i32 @func_switch_switch(i32 %N) nounwind uwtable { +entry: + %retval = alloca i32, align 4 + %N.addr = alloca i32, align 4 + store i32 %N, i32* %N.addr, align 4 + %0 = load i32* %N.addr, align 4 + %rem = srem i32 %0, 2 + switch i32 %rem, label %sw.default11 [ + i32 0, label %sw.bb + i32 1, label %sw.bb5 + ] +; CHECK: ], !prof !1 + +sw.bb: + %1 = load i32* %N.addr, align 4 + %rem1 = srem i32 %1, 4 + switch i32 %rem1, label %sw.default [ + i32 0, label %sw.bb2 + i32 1, label %sw.bb3 + i32 2, label %sw.bb4 + ] +; CHECK: ], !prof !2 + +sw.bb2: + store i32 5, i32* %retval + br label %return + +sw.bb3: + store i32 6, i32* %retval + br label %return + +sw.bb4: + store i32 7, i32* %retval + br label %return + +sw.default: + store i32 8, i32* %retval + br label %return + +sw.bb5: + %2 = load i32* %N.addr, align 4 + %rem6 = srem i32 %2, 4 + switch i32 %rem6, label %sw.default10 [ + i32 0, label %sw.bb7 + i32 1, label %sw.bb8 + i32 2, label %sw.bb9 + ] +; CHECK: ], !prof !3 + +sw.bb7: + store i32 9, i32* %retval + br label %return + +sw.bb8: + store i32 10, i32* %retval + br label %return + +sw.bb9: + store i32 11, i32* %retval + br label %return + +sw.default10: + store i32 12, i32* %retval + br label %return + +sw.default11: + store i32 13, i32* %retval + br label %return + +return: + %3 = load i32* %retval + ret i32 %3 +} + +define i32 @main(i32 %argc, i8** %argv) nounwind uwtable { +entry: + %retval = alloca i32, align 4 + %argc.addr = alloca i32, align 4 + %argv.addr = alloca i8**, align 8 + %loop = alloca i32, align 4 + store i32 0, i32* %retval + store i32 0, i32* %loop, align 4 + br label %for.cond + +for.cond: + %0 = load i32* %loop, align 4 + %cmp = icmp slt i32 %0, 4000 + br i1 %cmp, label %for.body, label %for.end +; CHECK: br i1 %cmp, label %for.body, label %for.end, !prof !4 + +for.body: + %1 = load i32* %loop, align 4 + %call = call i32 @func_switch(i32 %1) + %2 = load i32* %loop, align 4 + %call1 = call i32 @func_switch_switch(i32 %2) + br label %for.inc + +for.inc: + %3 = load i32* %loop, align 4 + %inc = add nsw i32 %3, 1 + store i32 %inc, i32* %loop, align 4 + br label %for.cond + +for.end: + ret i32 0 +} + +; CHECK: !0 = metadata !{metadata !"branch_weights", i32 1000, i32 1000, i32 1000, i32 1000} +; CHECK: !1 = metadata !{metadata !"branch_weights", i32 0, i32 2000, i32 2000} +; CHECK: !2 = metadata !{metadata !"branch_weights", i32 0, i32 1000, i32 0, i32 1000} +; CHECK: !3 = metadata !{metadata !"branch_weights", i32 1000, i32 0, i32 1000, i32 0} +; CHECK: !4 = metadata !{metadata !"branch_weights", i32 4000, i32 1} +; CHECK-NOT: !5 |