aboutsummaryrefslogtreecommitdiffstats
path: root/lib/ProfileData
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2014-12-01 14:51:49 -0800
committerStephen Hines <srhines@google.com>2014-12-02 16:08:10 -0800
commit37ed9c199ca639565f6ce88105f9e39e898d82d0 (patch)
tree8fb36d3910e3ee4c4e1b7422f4f017108efc52f5 /lib/ProfileData
parentd2327b22152ced7bc46dc629fc908959e8a52d03 (diff)
downloadexternal_llvm-37ed9c199ca639565f6ce88105f9e39e898d82d0.zip
external_llvm-37ed9c199ca639565f6ce88105f9e39e898d82d0.tar.gz
external_llvm-37ed9c199ca639565f6ce88105f9e39e898d82d0.tar.bz2
Update aosp/master LLVM for rebase to r222494.
Change-Id: Ic787f5e0124df789bd26f3f24680f45e678eef2d
Diffstat (limited to 'lib/ProfileData')
-rw-r--r--lib/ProfileData/Android.mk12
-rw-r--r--lib/ProfileData/CMakeLists.txt6
-rw-r--r--lib/ProfileData/CoverageMapping.cpp475
-rw-r--r--lib/ProfileData/CoverageMappingReader.cpp553
-rw-r--r--lib/ProfileData/CoverageMappingWriter.cpp187
-rw-r--r--lib/ProfileData/InstrProf.cpp6
-rw-r--r--lib/ProfileData/InstrProfIndexed.h8
-rw-r--r--lib/ProfileData/InstrProfReader.cpp100
-rw-r--r--lib/ProfileData/InstrProfWriter.cpp56
-rw-r--r--lib/ProfileData/LLVMBuild.txt2
-rw-r--r--lib/ProfileData/SampleProf.cpp51
-rw-r--r--lib/ProfileData/SampleProfReader.cpp399
-rw-r--r--lib/ProfileData/SampleProfWriter.cpp126
13 files changed, 1918 insertions, 63 deletions
diff --git a/lib/ProfileData/Android.mk b/lib/ProfileData/Android.mk
index f4b3fa9..1e1d5f2 100644
--- a/lib/ProfileData/Android.mk
+++ b/lib/ProfileData/Android.mk
@@ -1,9 +1,15 @@
LOCAL_PATH:= $(call my-dir)
profiledata_SRC_FILES := \
- InstrProf.cpp \
- InstrProfReader.cpp \
- InstrProfWriter.cpp \
+ CoverageMapping.cpp \
+ CoverageMappingReader.cpp \
+ CoverageMappingWriter.cpp \
+ InstrProf.cpp \
+ InstrProfReader.cpp \
+ InstrProfWriter.cpp \
+ SampleProf.cpp \
+ SampleProfReader.cpp \
+ SampleProfWriter.cpp
# For the host
# =====================================================
diff --git a/lib/ProfileData/CMakeLists.txt b/lib/ProfileData/CMakeLists.txt
index aefb16c..b9d472d 100644
--- a/lib/ProfileData/CMakeLists.txt
+++ b/lib/ProfileData/CMakeLists.txt
@@ -2,4 +2,10 @@ add_llvm_library(LLVMProfileData
InstrProf.cpp
InstrProfReader.cpp
InstrProfWriter.cpp
+ CoverageMapping.cpp
+ CoverageMappingWriter.cpp
+ CoverageMappingReader.cpp
+ SampleProf.cpp
+ SampleProfReader.cpp
+ SampleProfWriter.cpp
)
diff --git a/lib/ProfileData/CoverageMapping.cpp b/lib/ProfileData/CoverageMapping.cpp
new file mode 100644
index 0000000..0ccebc2
--- /dev/null
+++ b/lib/ProfileData/CoverageMapping.cpp
@@ -0,0 +1,475 @@
+//=-- CoverageMapping.cpp - Code coverage mapping support ---------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for clang's and llvm's instrumentation based
+// code coverage.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ProfileData/CoverageMapping.h"
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ProfileData/CoverageMappingReader.h"
+#include "llvm/ProfileData/InstrProfReader.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+using namespace coverage;
+
+#define DEBUG_TYPE "coverage-mapping"
+
+Counter CounterExpressionBuilder::get(const CounterExpression &E) {
+ auto It = ExpressionIndices.find(E);
+ if (It != ExpressionIndices.end())
+ return Counter::getExpression(It->second);
+ unsigned I = Expressions.size();
+ Expressions.push_back(E);
+ ExpressionIndices[E] = I;
+ return Counter::getExpression(I);
+}
+
+void CounterExpressionBuilder::extractTerms(
+ Counter C, int Sign, SmallVectorImpl<std::pair<unsigned, int>> &Terms) {
+ switch (C.getKind()) {
+ case Counter::Zero:
+ break;
+ case Counter::CounterValueReference:
+ Terms.push_back(std::make_pair(C.getCounterID(), Sign));
+ break;
+ case Counter::Expression:
+ const auto &E = Expressions[C.getExpressionID()];
+ extractTerms(E.LHS, Sign, Terms);
+ extractTerms(E.RHS, E.Kind == CounterExpression::Subtract ? -Sign : Sign,
+ Terms);
+ break;
+ }
+}
+
+Counter CounterExpressionBuilder::simplify(Counter ExpressionTree) {
+ // Gather constant terms.
+ llvm::SmallVector<std::pair<unsigned, int>, 32> Terms;
+ extractTerms(ExpressionTree, +1, Terms);
+
+ // If there are no terms, this is just a zero. The algorithm below assumes at
+ // least one term.
+ if (Terms.size() == 0)
+ return Counter::getZero();
+
+ // Group the terms by counter ID.
+ std::sort(Terms.begin(), Terms.end(),
+ [](const std::pair<unsigned, int> &LHS,
+ const std::pair<unsigned, int> &RHS) {
+ return LHS.first < RHS.first;
+ });
+
+ // Combine terms by counter ID to eliminate counters that sum to zero.
+ auto Prev = Terms.begin();
+ for (auto I = Prev + 1, E = Terms.end(); I != E; ++I) {
+ if (I->first == Prev->first) {
+ Prev->second += I->second;
+ continue;
+ }
+ ++Prev;
+ *Prev = *I;
+ }
+ Terms.erase(++Prev, Terms.end());
+
+ Counter C;
+ // Create additions. We do this before subtractions to avoid constructs like
+ // ((0 - X) + Y), as opposed to (Y - X).
+ for (auto Term : Terms) {
+ if (Term.second <= 0)
+ continue;
+ for (int I = 0; I < Term.second; ++I)
+ if (C.isZero())
+ C = Counter::getCounter(Term.first);
+ else
+ C = get(CounterExpression(CounterExpression::Add, C,
+ Counter::getCounter(Term.first)));
+ }
+
+ // Create subtractions.
+ for (auto Term : Terms) {
+ if (Term.second >= 0)
+ continue;
+ for (int I = 0; I < -Term.second; ++I)
+ C = get(CounterExpression(CounterExpression::Subtract, C,
+ Counter::getCounter(Term.first)));
+ }
+ return C;
+}
+
+Counter CounterExpressionBuilder::add(Counter LHS, Counter RHS) {
+ return simplify(get(CounterExpression(CounterExpression::Add, LHS, RHS)));
+}
+
+Counter CounterExpressionBuilder::subtract(Counter LHS, Counter RHS) {
+ return simplify(
+ get(CounterExpression(CounterExpression::Subtract, LHS, RHS)));
+}
+
+void CounterMappingContext::dump(const Counter &C,
+ llvm::raw_ostream &OS) const {
+ switch (C.getKind()) {
+ case Counter::Zero:
+ OS << '0';
+ return;
+ case Counter::CounterValueReference:
+ OS << '#' << C.getCounterID();
+ break;
+ case Counter::Expression: {
+ if (C.getExpressionID() >= Expressions.size())
+ return;
+ const auto &E = Expressions[C.getExpressionID()];
+ OS << '(';
+ dump(E.LHS, OS);
+ OS << (E.Kind == CounterExpression::Subtract ? " - " : " + ");
+ dump(E.RHS, OS);
+ OS << ')';
+ break;
+ }
+ }
+ if (CounterValues.empty())
+ return;
+ ErrorOr<int64_t> Value = evaluate(C);
+ if (!Value)
+ return;
+ OS << '[' << *Value << ']';
+}
+
+ErrorOr<int64_t> CounterMappingContext::evaluate(const Counter &C) const {
+ switch (C.getKind()) {
+ case Counter::Zero:
+ return 0;
+ case Counter::CounterValueReference:
+ if (C.getCounterID() >= CounterValues.size())
+ return std::make_error_code(std::errc::argument_out_of_domain);
+ return CounterValues[C.getCounterID()];
+ case Counter::Expression: {
+ if (C.getExpressionID() >= Expressions.size())
+ return std::make_error_code(std::errc::argument_out_of_domain);
+ const auto &E = Expressions[C.getExpressionID()];
+ ErrorOr<int64_t> LHS = evaluate(E.LHS);
+ if (!LHS)
+ return LHS;
+ ErrorOr<int64_t> RHS = evaluate(E.RHS);
+ if (!RHS)
+ return RHS;
+ return E.Kind == CounterExpression::Subtract ? *LHS - *RHS : *LHS + *RHS;
+ }
+ }
+ llvm_unreachable("Unhandled CounterKind");
+}
+
+void FunctionRecordIterator::skipOtherFiles() {
+ while (Current != Records.end() && !Filename.empty() &&
+ Filename != Current->Filenames[0])
+ ++Current;
+ if (Current == Records.end())
+ *this = FunctionRecordIterator();
+}
+
+ErrorOr<std::unique_ptr<CoverageMapping>>
+CoverageMapping::load(ObjectFileCoverageMappingReader &CoverageReader,
+ IndexedInstrProfReader &ProfileReader) {
+ auto Coverage = std::unique_ptr<CoverageMapping>(new CoverageMapping());
+
+ std::vector<uint64_t> Counts;
+ for (const auto &Record : CoverageReader) {
+ Counts.clear();
+ if (std::error_code EC = ProfileReader.getFunctionCounts(
+ Record.FunctionName, Record.FunctionHash, Counts)) {
+ if (EC != instrprof_error::hash_mismatch &&
+ EC != instrprof_error::unknown_function)
+ return EC;
+ Coverage->MismatchedFunctionCount++;
+ continue;
+ }
+
+ assert(Counts.size() != 0 && "Function's counts are empty");
+ FunctionRecord Function(Record.FunctionName, Record.Filenames,
+ Counts.front());
+ CounterMappingContext Ctx(Record.Expressions, Counts);
+ for (const auto &Region : Record.MappingRegions) {
+ ErrorOr<int64_t> ExecutionCount = Ctx.evaluate(Region.Count);
+ if (!ExecutionCount)
+ break;
+ Function.CountedRegions.push_back(CountedRegion(Region, *ExecutionCount));
+ }
+ if (Function.CountedRegions.size() != Record.MappingRegions.size()) {
+ Coverage->MismatchedFunctionCount++;
+ continue;
+ }
+
+ Coverage->Functions.push_back(std::move(Function));
+ }
+
+ return std::move(Coverage);
+}
+
+ErrorOr<std::unique_ptr<CoverageMapping>>
+CoverageMapping::load(StringRef ObjectFilename, StringRef ProfileFilename) {
+ auto CounterMappingBuff = MemoryBuffer::getFileOrSTDIN(ObjectFilename);
+ if (auto EC = CounterMappingBuff.getError())
+ return EC;
+ ObjectFileCoverageMappingReader CoverageReader(CounterMappingBuff.get());
+ if (auto EC = CoverageReader.readHeader())
+ return EC;
+ std::unique_ptr<IndexedInstrProfReader> ProfileReader;
+ if (auto EC = IndexedInstrProfReader::create(ProfileFilename, ProfileReader))
+ return EC;
+ return load(CoverageReader, *ProfileReader);
+}
+
+namespace {
+/// \brief Distributes functions into instantiation sets.
+///
+/// An instantiation set is a collection of functions that have the same source
+/// code, ie, template functions specializations.
+class FunctionInstantiationSetCollector {
+ typedef DenseMap<std::pair<unsigned, unsigned>,
+ std::vector<const FunctionRecord *>> MapT;
+ MapT InstantiatedFunctions;
+
+public:
+ void insert(const FunctionRecord &Function, unsigned FileID) {
+ auto I = Function.CountedRegions.begin(), E = Function.CountedRegions.end();
+ while (I != E && I->FileID != FileID)
+ ++I;
+ assert(I != E && "function does not cover the given file");
+ auto &Functions = InstantiatedFunctions[I->startLoc()];
+ Functions.push_back(&Function);
+ }
+
+ MapT::iterator begin() { return InstantiatedFunctions.begin(); }
+
+ MapT::iterator end() { return InstantiatedFunctions.end(); }
+};
+
+class SegmentBuilder {
+ std::vector<CoverageSegment> Segments;
+ SmallVector<const CountedRegion *, 8> ActiveRegions;
+
+ /// Start a segment with no count specified.
+ void startSegment(unsigned Line, unsigned Col) {
+ DEBUG(dbgs() << "Top level segment at " << Line << ":" << Col << "\n");
+ Segments.emplace_back(Line, Col, /*IsRegionEntry=*/false);
+ }
+
+ /// Start a segment with the given Region's count.
+ void startSegment(unsigned Line, unsigned Col, bool IsRegionEntry,
+ const CountedRegion &Region) {
+ if (Segments.empty())
+ Segments.emplace_back(Line, Col, IsRegionEntry);
+ CoverageSegment S = Segments.back();
+ // Avoid creating empty regions.
+ if (S.Line != Line || S.Col != Col) {
+ Segments.emplace_back(Line, Col, IsRegionEntry);
+ S = Segments.back();
+ }
+ DEBUG(dbgs() << "Segment at " << Line << ":" << Col);
+ // Set this region's count.
+ if (Region.Kind != coverage::CounterMappingRegion::SkippedRegion) {
+ DEBUG(dbgs() << " with count " << Region.ExecutionCount);
+ Segments.back().setCount(Region.ExecutionCount);
+ }
+ DEBUG(dbgs() << "\n");
+ }
+
+ /// Start a segment for the given region.
+ void startSegment(const CountedRegion &Region) {
+ startSegment(Region.LineStart, Region.ColumnStart, true, Region);
+ }
+
+ /// Pop the top region off of the active stack, starting a new segment with
+ /// the containing Region's count.
+ void popRegion() {
+ const CountedRegion *Active = ActiveRegions.back();
+ unsigned Line = Active->LineEnd, Col = Active->ColumnEnd;
+ ActiveRegions.pop_back();
+ if (ActiveRegions.empty())
+ startSegment(Line, Col);
+ else
+ startSegment(Line, Col, false, *ActiveRegions.back());
+ }
+
+public:
+ /// Build a list of CoverageSegments from a sorted list of Regions.
+ std::vector<CoverageSegment> buildSegments(ArrayRef<CountedRegion> Regions) {
+ for (const auto &Region : Regions) {
+ // Pop any regions that end before this one starts.
+ while (!ActiveRegions.empty() &&
+ ActiveRegions.back()->endLoc() <= Region.startLoc())
+ popRegion();
+ if (Segments.size() && Segments.back().Line == Region.LineStart &&
+ Segments.back().Col == Region.ColumnStart) {
+ if (Region.Kind != coverage::CounterMappingRegion::SkippedRegion)
+ Segments.back().addCount(Region.ExecutionCount);
+ } else {
+ // Add this region to the stack.
+ ActiveRegions.push_back(&Region);
+ startSegment(Region);
+ }
+ }
+ // Pop any regions that are left in the stack.
+ while (!ActiveRegions.empty())
+ popRegion();
+ return Segments;
+ }
+};
+}
+
+std::vector<StringRef> CoverageMapping::getUniqueSourceFiles() const {
+ std::vector<StringRef> Filenames;
+ for (const auto &Function : getCoveredFunctions())
+ for (const auto &Filename : Function.Filenames)
+ Filenames.push_back(Filename);
+ std::sort(Filenames.begin(), Filenames.end());
+ auto Last = std::unique(Filenames.begin(), Filenames.end());
+ Filenames.erase(Last, Filenames.end());
+ return Filenames;
+}
+
+static Optional<unsigned> findMainViewFileID(StringRef SourceFile,
+ const FunctionRecord &Function) {
+ llvm::SmallVector<bool, 8> IsExpandedFile(Function.Filenames.size(), false);
+ llvm::SmallVector<bool, 8> FilenameEquivalence(Function.Filenames.size(),
+ false);
+ for (unsigned I = 0, E = Function.Filenames.size(); I < E; ++I)
+ if (SourceFile == Function.Filenames[I])
+ FilenameEquivalence[I] = true;
+ for (const auto &CR : Function.CountedRegions)
+ if (CR.Kind == CounterMappingRegion::ExpansionRegion &&
+ FilenameEquivalence[CR.FileID])
+ IsExpandedFile[CR.ExpandedFileID] = true;
+ for (unsigned I = 0, E = Function.Filenames.size(); I < E; ++I)
+ if (FilenameEquivalence[I] && !IsExpandedFile[I])
+ return I;
+ return None;
+}
+
+static Optional<unsigned> findMainViewFileID(const FunctionRecord &Function) {
+ llvm::SmallVector<bool, 8> IsExpandedFile(Function.Filenames.size(), false);
+ for (const auto &CR : Function.CountedRegions)
+ if (CR.Kind == CounterMappingRegion::ExpansionRegion)
+ IsExpandedFile[CR.ExpandedFileID] = true;
+ for (unsigned I = 0, E = Function.Filenames.size(); I < E; ++I)
+ if (!IsExpandedFile[I])
+ return I;
+ return None;
+}
+
+static SmallSet<unsigned, 8> gatherFileIDs(StringRef SourceFile,
+ const FunctionRecord &Function) {
+ SmallSet<unsigned, 8> IDs;
+ for (unsigned I = 0, E = Function.Filenames.size(); I < E; ++I)
+ if (SourceFile == Function.Filenames[I])
+ IDs.insert(I);
+ return IDs;
+}
+
+/// Sort a nested sequence of regions from a single file.
+template <class It> static void sortNestedRegions(It First, It Last) {
+ std::sort(First, Last,
+ [](const CountedRegion &LHS, const CountedRegion &RHS) {
+ if (LHS.startLoc() == RHS.startLoc())
+ // When LHS completely contains RHS, we sort LHS first.
+ return RHS.endLoc() < LHS.endLoc();
+ return LHS.startLoc() < RHS.startLoc();
+ });
+}
+
+static bool isExpansion(const CountedRegion &R, unsigned FileID) {
+ return R.Kind == CounterMappingRegion::ExpansionRegion && R.FileID == FileID;
+}
+
+CoverageData CoverageMapping::getCoverageForFile(StringRef Filename) {
+ CoverageData FileCoverage(Filename);
+ std::vector<coverage::CountedRegion> Regions;
+
+ for (const auto &Function : Functions) {
+ auto MainFileID = findMainViewFileID(Filename, Function);
+ if (!MainFileID)
+ continue;
+ auto FileIDs = gatherFileIDs(Filename, Function);
+ for (const auto &CR : Function.CountedRegions)
+ if (FileIDs.count(CR.FileID)) {
+ Regions.push_back(CR);
+ if (isExpansion(CR, *MainFileID))
+ FileCoverage.Expansions.emplace_back(CR, Function);
+ }
+ }
+
+ sortNestedRegions(Regions.begin(), Regions.end());
+ FileCoverage.Segments = SegmentBuilder().buildSegments(Regions);
+
+ return FileCoverage;
+}
+
+std::vector<const FunctionRecord *>
+CoverageMapping::getInstantiations(StringRef Filename) {
+ FunctionInstantiationSetCollector InstantiationSetCollector;
+ for (const auto &Function : Functions) {
+ auto MainFileID = findMainViewFileID(Filename, Function);
+ if (!MainFileID)
+ continue;
+ InstantiationSetCollector.insert(Function, *MainFileID);
+ }
+
+ std::vector<const FunctionRecord *> Result;
+ for (const auto &InstantiationSet : InstantiationSetCollector) {
+ if (InstantiationSet.second.size() < 2)
+ continue;
+ for (auto Function : InstantiationSet.second)
+ Result.push_back(Function);
+ }
+ return Result;
+}
+
+CoverageData
+CoverageMapping::getCoverageForFunction(const FunctionRecord &Function) {
+ auto MainFileID = findMainViewFileID(Function);
+ if (!MainFileID)
+ return CoverageData();
+
+ CoverageData FunctionCoverage(Function.Filenames[*MainFileID]);
+ std::vector<coverage::CountedRegion> Regions;
+ for (const auto &CR : Function.CountedRegions)
+ if (CR.FileID == *MainFileID) {
+ Regions.push_back(CR);
+ if (isExpansion(CR, *MainFileID))
+ FunctionCoverage.Expansions.emplace_back(CR, Function);
+ }
+
+ sortNestedRegions(Regions.begin(), Regions.end());
+ FunctionCoverage.Segments = SegmentBuilder().buildSegments(Regions);
+
+ return FunctionCoverage;
+}
+
+CoverageData
+CoverageMapping::getCoverageForExpansion(const ExpansionRecord &Expansion) {
+ CoverageData ExpansionCoverage(
+ Expansion.Function.Filenames[Expansion.FileID]);
+ std::vector<coverage::CountedRegion> Regions;
+ for (const auto &CR : Expansion.Function.CountedRegions)
+ if (CR.FileID == Expansion.FileID) {
+ Regions.push_back(CR);
+ if (isExpansion(CR, Expansion.FileID))
+ ExpansionCoverage.Expansions.emplace_back(CR, Expansion.Function);
+ }
+
+ sortNestedRegions(Regions.begin(), Regions.end());
+ ExpansionCoverage.Segments = SegmentBuilder().buildSegments(Regions);
+
+ return ExpansionCoverage;
+}
diff --git a/lib/ProfileData/CoverageMappingReader.cpp b/lib/ProfileData/CoverageMappingReader.cpp
new file mode 100644
index 0000000..6476d28
--- /dev/null
+++ b/lib/ProfileData/CoverageMappingReader.cpp
@@ -0,0 +1,553 @@
+//=-- CoverageMappingReader.cpp - Code coverage mapping reader ----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for reading coverage mapping data for
+// instrumentation based coverage.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ProfileData/CoverageMappingReader.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/LEB128.h"
+
+using namespace llvm;
+using namespace coverage;
+using namespace object;
+
+#define DEBUG_TYPE "coverage-mapping"
+
+void CoverageMappingIterator::increment() {
+ // Check if all the records were read or if an error occurred while reading
+ // the next record.
+ if (Reader->readNextRecord(Record))
+ *this = CoverageMappingIterator();
+}
+
+std::error_code RawCoverageReader::readULEB128(uint64_t &Result) {
+ if (Data.size() < 1)
+ return error(instrprof_error::truncated);
+ unsigned N = 0;
+ Result = decodeULEB128(reinterpret_cast<const uint8_t *>(Data.data()), &N);
+ if (N > Data.size())
+ return error(instrprof_error::malformed);
+ Data = Data.substr(N);
+ return success();
+}
+
+std::error_code RawCoverageReader::readIntMax(uint64_t &Result,
+ uint64_t MaxPlus1) {
+ if (auto Err = readULEB128(Result))
+ return Err;
+ if (Result >= MaxPlus1)
+ return error(instrprof_error::malformed);
+ return success();
+}
+
+std::error_code RawCoverageReader::readSize(uint64_t &Result) {
+ if (auto Err = readULEB128(Result))
+ return Err;
+ // Sanity check the number.
+ if (Result > Data.size())
+ return error(instrprof_error::malformed);
+ return success();
+}
+
+std::error_code RawCoverageReader::readString(StringRef &Result) {
+ uint64_t Length;
+ if (auto Err = readSize(Length))
+ return Err;
+ Result = Data.substr(0, Length);
+ Data = Data.substr(Length);
+ return success();
+}
+
+std::error_code RawCoverageFilenamesReader::read() {
+ uint64_t NumFilenames;
+ if (auto Err = readSize(NumFilenames))
+ return Err;
+ for (size_t I = 0; I < NumFilenames; ++I) {
+ StringRef Filename;
+ if (auto Err = readString(Filename))
+ return Err;
+ Filenames.push_back(Filename);
+ }
+ return success();
+}
+
+std::error_code RawCoverageMappingReader::decodeCounter(unsigned Value,
+ Counter &C) {
+ auto Tag = Value & Counter::EncodingTagMask;
+ switch (Tag) {
+ case Counter::Zero:
+ C = Counter::getZero();
+ return success();
+ case Counter::CounterValueReference:
+ C = Counter::getCounter(Value >> Counter::EncodingTagBits);
+ return success();
+ default:
+ break;
+ }
+ Tag -= Counter::Expression;
+ switch (Tag) {
+ case CounterExpression::Subtract:
+ case CounterExpression::Add: {
+ auto ID = Value >> Counter::EncodingTagBits;
+ if (ID >= Expressions.size())
+ return error(instrprof_error::malformed);
+ Expressions[ID].Kind = CounterExpression::ExprKind(Tag);
+ C = Counter::getExpression(ID);
+ break;
+ }
+ default:
+ return error(instrprof_error::malformed);
+ }
+ return success();
+}
+
+std::error_code RawCoverageMappingReader::readCounter(Counter &C) {
+ uint64_t EncodedCounter;
+ if (auto Err =
+ readIntMax(EncodedCounter, std::numeric_limits<unsigned>::max()))
+ return Err;
+ if (auto Err = decodeCounter(EncodedCounter, C))
+ return Err;
+ return success();
+}
+
+static const unsigned EncodingExpansionRegionBit = 1
+ << Counter::EncodingTagBits;
+
+/// \brief Read the sub-array of regions for the given inferred file id.
+/// \param NumFileIDs the number of file ids that are defined for this
+/// function.
+std::error_code RawCoverageMappingReader::readMappingRegionsSubArray(
+ std::vector<CounterMappingRegion> &MappingRegions, unsigned InferredFileID,
+ size_t NumFileIDs) {
+ uint64_t NumRegions;
+ if (auto Err = readSize(NumRegions))
+ return Err;
+ unsigned LineStart = 0;
+ for (size_t I = 0; I < NumRegions; ++I) {
+ Counter C;
+ CounterMappingRegion::RegionKind Kind = CounterMappingRegion::CodeRegion;
+
+ // Read the combined counter + region kind.
+ uint64_t EncodedCounterAndRegion;
+ if (auto Err = readIntMax(EncodedCounterAndRegion,
+ std::numeric_limits<unsigned>::max()))
+ return Err;
+ unsigned Tag = EncodedCounterAndRegion & Counter::EncodingTagMask;
+ uint64_t ExpandedFileID = 0;
+ if (Tag != Counter::Zero) {
+ if (auto Err = decodeCounter(EncodedCounterAndRegion, C))
+ return Err;
+ } else {
+ // Is it an expansion region?
+ if (EncodedCounterAndRegion & EncodingExpansionRegionBit) {
+ Kind = CounterMappingRegion::ExpansionRegion;
+ ExpandedFileID = EncodedCounterAndRegion >>
+ Counter::EncodingCounterTagAndExpansionRegionTagBits;
+ if (ExpandedFileID >= NumFileIDs)
+ return error(instrprof_error::malformed);
+ } else {
+ switch (EncodedCounterAndRegion >>
+ Counter::EncodingCounterTagAndExpansionRegionTagBits) {
+ case CounterMappingRegion::CodeRegion:
+ // Don't do anything when we have a code region with a zero counter.
+ break;
+ case CounterMappingRegion::SkippedRegion:
+ Kind = CounterMappingRegion::SkippedRegion;
+ break;
+ default:
+ return error(instrprof_error::malformed);
+ }
+ }
+ }
+
+ // Read the source range.
+ uint64_t LineStartDelta, CodeBeforeColumnStart, NumLines, ColumnEnd;
+ if (auto Err =
+ readIntMax(LineStartDelta, std::numeric_limits<unsigned>::max()))
+ return Err;
+ if (auto Err = readULEB128(CodeBeforeColumnStart))
+ return Err;
+ bool HasCodeBefore = CodeBeforeColumnStart & 1;
+ uint64_t ColumnStart = CodeBeforeColumnStart >>
+ CounterMappingRegion::EncodingHasCodeBeforeBits;
+ if (ColumnStart > std::numeric_limits<unsigned>::max())
+ return error(instrprof_error::malformed);
+ if (auto Err = readIntMax(NumLines, std::numeric_limits<unsigned>::max()))
+ return Err;
+ if (auto Err = readIntMax(ColumnEnd, std::numeric_limits<unsigned>::max()))
+ return Err;
+ LineStart += LineStartDelta;
+ // Adjust the column locations for the empty regions that are supposed to
+ // cover whole lines. Those regions should be encoded with the
+ // column range (1 -> std::numeric_limits<unsigned>::max()), but because
+ // the encoded std::numeric_limits<unsigned>::max() is several bytes long,
+ // we set the column range to (0 -> 0) to ensure that the column start and
+ // column end take up one byte each.
+ // The std::numeric_limits<unsigned>::max() is used to represent a column
+ // position at the end of the line without knowing the length of that line.
+ if (ColumnStart == 0 && ColumnEnd == 0) {
+ ColumnStart = 1;
+ ColumnEnd = std::numeric_limits<unsigned>::max();
+ }
+
+ DEBUG({
+ dbgs() << "Counter in file " << InferredFileID << " " << LineStart << ":"
+ << ColumnStart << " -> " << (LineStart + NumLines) << ":"
+ << ColumnEnd << ", ";
+ if (Kind == CounterMappingRegion::ExpansionRegion)
+ dbgs() << "Expands to file " << ExpandedFileID;
+ else
+ CounterMappingContext(Expressions).dump(C, dbgs());
+ dbgs() << "\n";
+ });
+
+ MappingRegions.push_back(CounterMappingRegion(
+ C, InferredFileID, LineStart, ColumnStart, LineStart + NumLines,
+ ColumnEnd, HasCodeBefore, Kind));
+ MappingRegions.back().ExpandedFileID = ExpandedFileID;
+ }
+ return success();
+}
+
+std::error_code RawCoverageMappingReader::read(CoverageMappingRecord &Record) {
+
+ // Read the virtual file mapping.
+ llvm::SmallVector<unsigned, 8> VirtualFileMapping;
+ uint64_t NumFileMappings;
+ if (auto Err = readSize(NumFileMappings))
+ return Err;
+ for (size_t I = 0; I < NumFileMappings; ++I) {
+ uint64_t FilenameIndex;
+ if (auto Err = readIntMax(FilenameIndex, TranslationUnitFilenames.size()))
+ return Err;
+ VirtualFileMapping.push_back(FilenameIndex);
+ }
+
+ // Construct the files using unique filenames and virtual file mapping.
+ for (auto I : VirtualFileMapping) {
+ Filenames.push_back(TranslationUnitFilenames[I]);
+ }
+
+ // Read the expressions.
+ uint64_t NumExpressions;
+ if (auto Err = readSize(NumExpressions))
+ return Err;
+ // Create an array of dummy expressions that get the proper counters
+ // when the expressions are read, and the proper kinds when the counters
+ // are decoded.
+ Expressions.resize(
+ NumExpressions,
+ CounterExpression(CounterExpression::Subtract, Counter(), Counter()));
+ for (size_t I = 0; I < NumExpressions; ++I) {
+ if (auto Err = readCounter(Expressions[I].LHS))
+ return Err;
+ if (auto Err = readCounter(Expressions[I].RHS))
+ return Err;
+ }
+
+ // Read the mapping regions sub-arrays.
+ for (unsigned InferredFileID = 0, S = VirtualFileMapping.size();
+ InferredFileID < S; ++InferredFileID) {
+ if (auto Err = readMappingRegionsSubArray(MappingRegions, InferredFileID,
+ VirtualFileMapping.size()))
+ return Err;
+ }
+
+ // Set the counters for the expansion regions.
+ // i.e. Counter of expansion region = counter of the first region
+ // from the expanded file.
+ // Perform multiple passes to correctly propagate the counters through
+ // all the nested expansion regions.
+ SmallVector<CounterMappingRegion *, 8> FileIDExpansionRegionMapping;
+ FileIDExpansionRegionMapping.resize(VirtualFileMapping.size(), nullptr);
+ for (unsigned Pass = 1, S = VirtualFileMapping.size(); Pass < S; ++Pass) {
+ for (auto &R : MappingRegions) {
+ if (R.Kind != CounterMappingRegion::ExpansionRegion)
+ continue;
+ assert(!FileIDExpansionRegionMapping[R.ExpandedFileID]);
+ FileIDExpansionRegionMapping[R.ExpandedFileID] = &R;
+ }
+ for (auto &R : MappingRegions) {
+ if (FileIDExpansionRegionMapping[R.FileID]) {
+ FileIDExpansionRegionMapping[R.FileID]->Count = R.Count;
+ FileIDExpansionRegionMapping[R.FileID] = nullptr;
+ }
+ }
+ }
+
+ Record.FunctionName = FunctionName;
+ Record.Filenames = Filenames;
+ Record.Expressions = Expressions;
+ Record.MappingRegions = MappingRegions;
+ return success();
+}
+
+ObjectFileCoverageMappingReader::ObjectFileCoverageMappingReader(
+ StringRef FileName)
+ : CurrentRecord(0) {
+ auto File = llvm::object::ObjectFile::createObjectFile(FileName);
+ if (!File)
+ error(File.getError());
+ else
+ Object = std::move(File.get());
+}
+
+namespace {
+/// \brief The coverage mapping data for a single function.
+/// It points to the function's name.
+template <typename IntPtrT> struct CoverageMappingFunctionRecord {
+ IntPtrT FunctionNamePtr;
+ uint32_t FunctionNameSize;
+ uint32_t CoverageMappingSize;
+ uint64_t FunctionHash;
+};
+
+/// \brief The coverage mapping data for a single translation unit.
+/// It points to the array of function coverage mapping records and the encoded
+/// filenames array.
+template <typename IntPtrT> struct CoverageMappingTURecord {
+ uint32_t FunctionRecordsSize;
+ uint32_t FilenamesSize;
+ uint32_t CoverageMappingsSize;
+ uint32_t Version;
+};
+
+/// \brief A helper structure to access the data from a section
+/// in an object file.
+struct SectionData {
+ StringRef Data;
+ uint64_t Address;
+
+ std::error_code load(SectionRef &Section) {
+ if (auto Err = Section.getContents(Data))
+ return Err;
+ Address = Section.getAddress();
+ return instrprof_error::success;
+ }
+
+ std::error_code get(uint64_t Pointer, size_t Size, StringRef &Result) {
+ if (Pointer < Address)
+ return instrprof_error::malformed;
+ auto Offset = Pointer - Address;
+ if (Offset + Size > Data.size())
+ return instrprof_error::malformed;
+ Result = Data.substr(Pointer - Address, Size);
+ return instrprof_error::success;
+ }
+};
+}
+
+template <typename T>
+std::error_code readCoverageMappingData(
+ SectionData &ProfileNames, StringRef Data,
+ std::vector<ObjectFileCoverageMappingReader::ProfileMappingRecord> &Records,
+ std::vector<StringRef> &Filenames) {
+ llvm::DenseSet<T> UniqueFunctionMappingData;
+
+ // Read the records in the coverage data section.
+ while (!Data.empty()) {
+ if (Data.size() < sizeof(CoverageMappingTURecord<T>))
+ return instrprof_error::malformed;
+ auto TU = reinterpret_cast<const CoverageMappingTURecord<T> *>(Data.data());
+ Data = Data.substr(sizeof(CoverageMappingTURecord<T>));
+ switch (TU->Version) {
+ case CoverageMappingVersion1:
+ break;
+ default:
+ return instrprof_error::unsupported_version;
+ }
+ auto Version = CoverageMappingVersion(TU->Version);
+
+ // Get the function records.
+ auto FunctionRecords =
+ reinterpret_cast<const CoverageMappingFunctionRecord<T> *>(Data.data());
+ if (Data.size() <
+ sizeof(CoverageMappingFunctionRecord<T>) * TU->FunctionRecordsSize)
+ return instrprof_error::malformed;
+ Data = Data.substr(sizeof(CoverageMappingFunctionRecord<T>) *
+ TU->FunctionRecordsSize);
+
+ // Get the filenames.
+ if (Data.size() < TU->FilenamesSize)
+ return instrprof_error::malformed;
+ auto RawFilenames = Data.substr(0, TU->FilenamesSize);
+ Data = Data.substr(TU->FilenamesSize);
+ size_t FilenamesBegin = Filenames.size();
+ RawCoverageFilenamesReader Reader(RawFilenames, Filenames);
+ if (auto Err = Reader.read())
+ return Err;
+
+ // Get the coverage mappings.
+ if (Data.size() < TU->CoverageMappingsSize)
+ return instrprof_error::malformed;
+ auto CoverageMappings = Data.substr(0, TU->CoverageMappingsSize);
+ Data = Data.substr(TU->CoverageMappingsSize);
+
+ for (unsigned I = 0; I < TU->FunctionRecordsSize; ++I) {
+ auto &MappingRecord = FunctionRecords[I];
+
+ // Get the coverage mapping.
+ if (CoverageMappings.size() < MappingRecord.CoverageMappingSize)
+ return instrprof_error::malformed;
+ auto Mapping =
+ CoverageMappings.substr(0, MappingRecord.CoverageMappingSize);
+ CoverageMappings =
+ CoverageMappings.substr(MappingRecord.CoverageMappingSize);
+
+ // Ignore this record if we already have a record that points to the same
+ // function name.
+ // This is useful to ignore the redundant records for the functions
+ // with ODR linkage.
+ if (!UniqueFunctionMappingData.insert(MappingRecord.FunctionNamePtr)
+ .second)
+ continue;
+ StringRef FunctionName;
+ if (auto Err =
+ ProfileNames.get(MappingRecord.FunctionNamePtr,
+ MappingRecord.FunctionNameSize, FunctionName))
+ return Err;
+ Records.push_back(ObjectFileCoverageMappingReader::ProfileMappingRecord(
+ Version, FunctionName, MappingRecord.FunctionHash, Mapping,
+ FilenamesBegin, Filenames.size() - FilenamesBegin));
+ }
+ }
+
+ return instrprof_error::success;
+}
+
+static const char *TestingFormatMagic = "llvmcovmtestdata";
+
+static std::error_code decodeTestingFormat(StringRef Data,
+ SectionData &ProfileNames,
+ StringRef &CoverageMapping) {
+ Data = Data.substr(StringRef(TestingFormatMagic).size());
+ if (Data.size() < 1)
+ return instrprof_error::truncated;
+ unsigned N = 0;
+ auto ProfileNamesSize =
+ decodeULEB128(reinterpret_cast<const uint8_t *>(Data.data()), &N);
+ if (N > Data.size())
+ return instrprof_error::malformed;
+ Data = Data.substr(N);
+ if (Data.size() < 1)
+ return instrprof_error::truncated;
+ N = 0;
+ ProfileNames.Address =
+ decodeULEB128(reinterpret_cast<const uint8_t *>(Data.data()), &N);
+ if (N > Data.size())
+ return instrprof_error::malformed;
+ Data = Data.substr(N);
+ if (Data.size() < ProfileNamesSize)
+ return instrprof_error::malformed;
+ ProfileNames.Data = Data.substr(0, ProfileNamesSize);
+ CoverageMapping = Data.substr(ProfileNamesSize);
+ return instrprof_error::success;
+}
+
+ObjectFileCoverageMappingReader::ObjectFileCoverageMappingReader(
+ std::unique_ptr<MemoryBuffer> &ObjectBuffer, sys::fs::file_magic Type)
+ : CurrentRecord(0) {
+ if (ObjectBuffer->getBuffer().startswith(TestingFormatMagic)) {
+ // This is a special format used for testing.
+ SectionData ProfileNames;
+ StringRef CoverageMapping;
+ if (auto Err = decodeTestingFormat(ObjectBuffer->getBuffer(), ProfileNames,
+ CoverageMapping)) {
+ error(Err);
+ return;
+ }
+ error(readCoverageMappingData<uint64_t>(ProfileNames, CoverageMapping,
+ MappingRecords, Filenames));
+ Object = OwningBinary<ObjectFile>(std::unique_ptr<ObjectFile>(),
+ std::move(ObjectBuffer));
+ return;
+ }
+
+ auto File = object::ObjectFile::createObjectFile(
+ ObjectBuffer->getMemBufferRef(), Type);
+ if (!File)
+ error(File.getError());
+ else
+ Object = OwningBinary<ObjectFile>(std::move(File.get()),
+ std::move(ObjectBuffer));
+}
+
+std::error_code ObjectFileCoverageMappingReader::readHeader() {
+ const ObjectFile *OF = Object.getBinary();
+ if (!OF)
+ return getError();
+ auto BytesInAddress = OF->getBytesInAddress();
+ if (BytesInAddress != 4 && BytesInAddress != 8)
+ return error(instrprof_error::malformed);
+
+ // Look for the sections that we are interested in.
+ int FoundSectionCount = 0;
+ SectionRef ProfileNames, CoverageMapping;
+ for (const auto &Section : OF->sections()) {
+ StringRef Name;
+ if (auto Err = Section.getName(Name))
+ return Err;
+ if (Name == "__llvm_prf_names") {
+ ProfileNames = Section;
+ } else if (Name == "__llvm_covmap") {
+ CoverageMapping = Section;
+ } else
+ continue;
+ ++FoundSectionCount;
+ }
+ if (FoundSectionCount != 2)
+ return error(instrprof_error::bad_header);
+
+ // Get the contents of the given sections.
+ StringRef Data;
+ if (auto Err = CoverageMapping.getContents(Data))
+ return Err;
+ SectionData ProfileNamesData;
+ if (auto Err = ProfileNamesData.load(ProfileNames))
+ return Err;
+
+ // Load the data from the found sections.
+ std::error_code Err;
+ if (BytesInAddress == 4)
+ Err = readCoverageMappingData<uint32_t>(ProfileNamesData, Data,
+ MappingRecords, Filenames);
+ else
+ Err = readCoverageMappingData<uint64_t>(ProfileNamesData, Data,
+ MappingRecords, Filenames);
+ if (Err)
+ return error(Err);
+
+ return success();
+}
+
+std::error_code
+ObjectFileCoverageMappingReader::readNextRecord(CoverageMappingRecord &Record) {
+ if (CurrentRecord >= MappingRecords.size())
+ return error(instrprof_error::eof);
+
+ FunctionsFilenames.clear();
+ Expressions.clear();
+ MappingRegions.clear();
+ auto &R = MappingRecords[CurrentRecord];
+ RawCoverageMappingReader Reader(
+ R.FunctionName, R.CoverageMapping,
+ makeArrayRef(Filenames.data() + R.FilenamesBegin, R.FilenamesSize),
+ FunctionsFilenames, Expressions, MappingRegions);
+ if (auto Err = Reader.read(Record))
+ return Err;
+ Record.FunctionHash = R.FunctionHash;
+ ++CurrentRecord;
+ return success();
+}
diff --git a/lib/ProfileData/CoverageMappingWriter.cpp b/lib/ProfileData/CoverageMappingWriter.cpp
new file mode 100644
index 0000000..6969c2a
--- /dev/null
+++ b/lib/ProfileData/CoverageMappingWriter.cpp
@@ -0,0 +1,187 @@
+//=-- CoverageMappingWriter.cpp - Code coverage mapping writer -------------=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing coverage mapping data for
+// instrumentation based coverage.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ProfileData/CoverageMappingWriter.h"
+#include "llvm/Support/LEB128.h"
+
+using namespace llvm;
+using namespace coverage;
+
+void CoverageFilenamesSectionWriter::write(raw_ostream &OS) {
+ encodeULEB128(Filenames.size(), OS);
+ for (const auto &Filename : Filenames) {
+ encodeULEB128(Filename.size(), OS);
+ OS << Filename;
+ }
+}
+
+namespace {
+/// \brief Gather only the expressions that are used by the mapping
+/// regions in this function.
+class CounterExpressionsMinimizer {
+ ArrayRef<CounterExpression> Expressions;
+ llvm::SmallVector<CounterExpression, 16> UsedExpressions;
+ std::vector<unsigned> AdjustedExpressionIDs;
+
+public:
+ void mark(Counter C) {
+ if (!C.isExpression())
+ return;
+ unsigned ID = C.getExpressionID();
+ AdjustedExpressionIDs[ID] = 1;
+ mark(Expressions[ID].LHS);
+ mark(Expressions[ID].RHS);
+ }
+
+ void gatherUsed(Counter C) {
+ if (!C.isExpression() || !AdjustedExpressionIDs[C.getExpressionID()])
+ return;
+ AdjustedExpressionIDs[C.getExpressionID()] = UsedExpressions.size();
+ const auto &E = Expressions[C.getExpressionID()];
+ UsedExpressions.push_back(E);
+ gatherUsed(E.LHS);
+ gatherUsed(E.RHS);
+ }
+
+ CounterExpressionsMinimizer(ArrayRef<CounterExpression> Expressions,
+ ArrayRef<CounterMappingRegion> MappingRegions)
+ : Expressions(Expressions) {
+ AdjustedExpressionIDs.resize(Expressions.size(), 0);
+ for (const auto &I : MappingRegions)
+ mark(I.Count);
+ for (const auto &I : MappingRegions)
+ gatherUsed(I.Count);
+ }
+
+ ArrayRef<CounterExpression> getExpressions() const { return UsedExpressions; }
+
+ /// \brief Adjust the given counter to correctly transition from the old
+ /// expression ids to the new expression ids.
+ Counter adjust(Counter C) const {
+ if (C.isExpression())
+ C = Counter::getExpression(AdjustedExpressionIDs[C.getExpressionID()]);
+ return C;
+ }
+};
+}
+
+/// \brief Encode the counter.
+///
+/// The encoding uses the following format:
+/// Low 2 bits - Tag:
+/// Counter::Zero(0) - A Counter with kind Counter::Zero
+/// Counter::CounterValueReference(1) - A counter with kind
+/// Counter::CounterValueReference
+/// Counter::Expression(2) + CounterExpression::Subtract(0) -
+/// A counter with kind Counter::Expression and an expression
+/// with kind CounterExpression::Subtract
+/// Counter::Expression(2) + CounterExpression::Add(1) -
+/// A counter with kind Counter::Expression and an expression
+/// with kind CounterExpression::Add
+/// Remaining bits - Counter/Expression ID.
+static unsigned encodeCounter(ArrayRef<CounterExpression> Expressions,
+ Counter C) {
+ unsigned Tag = unsigned(C.getKind());
+ if (C.isExpression())
+ Tag += Expressions[C.getExpressionID()].Kind;
+ unsigned ID = C.getCounterID();
+ assert(ID <=
+ (std::numeric_limits<unsigned>::max() >> Counter::EncodingTagBits));
+ return Tag | (ID << Counter::EncodingTagBits);
+}
+
+static void writeCounter(ArrayRef<CounterExpression> Expressions, Counter C,
+ raw_ostream &OS) {
+ encodeULEB128(encodeCounter(Expressions, C), OS);
+}
+
+void CoverageMappingWriter::write(raw_ostream &OS) {
+ // Sort the regions in an ascending order by the file id and the starting
+ // location.
+ std::sort(MappingRegions.begin(), MappingRegions.end());
+
+ // Write out the fileid -> filename mapping.
+ encodeULEB128(VirtualFileMapping.size(), OS);
+ for (const auto &FileID : VirtualFileMapping)
+ encodeULEB128(FileID, OS);
+
+ // Write out the expressions.
+ CounterExpressionsMinimizer Minimizer(Expressions, MappingRegions);
+ auto MinExpressions = Minimizer.getExpressions();
+ encodeULEB128(MinExpressions.size(), OS);
+ for (const auto &E : MinExpressions) {
+ writeCounter(MinExpressions, Minimizer.adjust(E.LHS), OS);
+ writeCounter(MinExpressions, Minimizer.adjust(E.RHS), OS);
+ }
+
+ // Write out the mapping regions.
+ // Split the regions into subarrays where each region in a
+ // subarray has a fileID which is the index of that subarray.
+ unsigned PrevLineStart = 0;
+ unsigned CurrentFileID = ~0U;
+ for (auto I = MappingRegions.begin(), E = MappingRegions.end(); I != E; ++I) {
+ if (I->FileID != CurrentFileID) {
+ // Ensure that all file ids have at least one mapping region.
+ assert(I->FileID == (CurrentFileID + 1));
+ // Find the number of regions with this file id.
+ unsigned RegionCount = 1;
+ for (auto J = I + 1; J != E && I->FileID == J->FileID; ++J)
+ ++RegionCount;
+ // Start a new region sub-array.
+ encodeULEB128(RegionCount, OS);
+
+ CurrentFileID = I->FileID;
+ PrevLineStart = 0;
+ }
+ Counter Count = Minimizer.adjust(I->Count);
+ switch (I->Kind) {
+ case CounterMappingRegion::CodeRegion:
+ writeCounter(MinExpressions, Count, OS);
+ break;
+ case CounterMappingRegion::ExpansionRegion: {
+ assert(Count.isZero());
+ assert(I->ExpandedFileID <=
+ (std::numeric_limits<unsigned>::max() >>
+ Counter::EncodingCounterTagAndExpansionRegionTagBits));
+ // Mark an expansion region with a set bit that follows the counter tag,
+ // and pack the expanded file id into the remaining bits.
+ unsigned EncodedTagExpandedFileID =
+ (1 << Counter::EncodingTagBits) |
+ (I->ExpandedFileID
+ << Counter::EncodingCounterTagAndExpansionRegionTagBits);
+ encodeULEB128(EncodedTagExpandedFileID, OS);
+ break;
+ }
+ case CounterMappingRegion::SkippedRegion:
+ assert(Count.isZero());
+ encodeULEB128(unsigned(I->Kind)
+ << Counter::EncodingCounterTagAndExpansionRegionTagBits,
+ OS);
+ break;
+ }
+ assert(I->LineStart >= PrevLineStart);
+ encodeULEB128(I->LineStart - PrevLineStart, OS);
+ uint64_t CodeBeforeColumnStart =
+ uint64_t(I->HasCodeBefore) |
+ (uint64_t(I->ColumnStart)
+ << CounterMappingRegion::EncodingHasCodeBeforeBits);
+ encodeULEB128(CodeBeforeColumnStart, OS);
+ assert(I->LineEnd >= I->LineStart);
+ encodeULEB128(I->LineEnd - I->LineStart, OS);
+ encodeULEB128(I->ColumnEnd, OS);
+ PrevLineStart = I->LineStart;
+ }
+ // Ensure that all file ids have at least one mapping region.
+ assert(CurrentFileID == (VirtualFileMapping.size() - 1));
+}
diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp
index 0121222..900dff9 100644
--- a/lib/ProfileData/InstrProf.cpp
+++ b/lib/ProfileData/InstrProf.cpp
@@ -14,6 +14,7 @@
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
using namespace llvm;
@@ -55,7 +56,8 @@ class InstrProfErrorCategoryType : public std::error_category {
};
}
+static ManagedStatic<InstrProfErrorCategoryType> ErrorCategory;
+
const std::error_category &llvm::instrprof_category() {
- static InstrProfErrorCategoryType C;
- return C;
+ return *ErrorCategory;
}
diff --git a/lib/ProfileData/InstrProfIndexed.h b/lib/ProfileData/InstrProfIndexed.h
index 7761704..c2bc46c 100644
--- a/lib/ProfileData/InstrProfIndexed.h
+++ b/lib/ProfileData/InstrProfIndexed.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_PROFILEDATA_INSTRPROF_INDEXED_H_
-#define LLVM_PROFILEDATA_INSTRPROF_INDEXED_H_
+#ifndef LLVM_LIB_PROFILEDATA_INSTRPROFINDEXED_H
+#define LLVM_LIB_PROFILEDATA_INSTRPROFINDEXED_H
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MD5.h"
@@ -46,10 +46,10 @@ static inline uint64_t ComputeHash(HashT Type, StringRef K) {
}
const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81"
-const uint64_t Version = 1;
+const uint64_t Version = 2;
const HashT HashType = HashT::MD5;
}
} // end namespace llvm
-#endif // LLVM_PROFILEDATA_INSTRPROF_INDEXED_H_
+#endif
diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp
index 0b36728..0160a64 100644
--- a/lib/ProfileData/InstrProfReader.cpp
+++ b/lib/ProfileData/InstrProfReader.cpp
@@ -21,32 +21,34 @@
using namespace llvm;
-static std::error_code
-setupMemoryBuffer(std::string Path, std::unique_ptr<MemoryBuffer> &Buffer) {
+static ErrorOr<std::unique_ptr<MemoryBuffer>>
+setupMemoryBuffer(std::string Path) {
ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
MemoryBuffer::getFileOrSTDIN(Path);
if (std::error_code EC = BufferOrErr.getError())
return EC;
- Buffer = std::move(BufferOrErr.get());
+ auto Buffer = std::move(BufferOrErr.get());
// Sanity check the file.
if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max())
return instrprof_error::too_large;
- return instrprof_error::success;
+ return std::move(Buffer);
}
static std::error_code initializeReader(InstrProfReader &Reader) {
return Reader.readHeader();
}
-std::error_code
-InstrProfReader::create(std::string Path,
- std::unique_ptr<InstrProfReader> &Result) {
+ErrorOr<std::unique_ptr<InstrProfReader>>
+InstrProfReader::create(std::string Path) {
// Set up the buffer to read.
- std::unique_ptr<MemoryBuffer> Buffer;
- if (std::error_code EC = setupMemoryBuffer(Path, Buffer))
+ auto BufferOrError = setupMemoryBuffer(Path);
+ if (std::error_code EC = BufferOrError.getError())
return EC;
+ auto Buffer = std::move(BufferOrError.get());
+ std::unique_ptr<InstrProfReader> Result;
+
// Create the reader.
if (IndexedInstrProfReader::hasFormat(*Buffer))
Result.reset(new IndexedInstrProfReader(std::move(Buffer)));
@@ -58,16 +60,20 @@ InstrProfReader::create(std::string Path,
Result.reset(new TextInstrProfReader(std::move(Buffer)));
// Initialize the reader and return the result.
- return initializeReader(*Result);
+ if (std::error_code EC = initializeReader(*Result))
+ return EC;
+
+ return std::move(Result);
}
std::error_code IndexedInstrProfReader::create(
std::string Path, std::unique_ptr<IndexedInstrProfReader> &Result) {
// Set up the buffer to read.
- std::unique_ptr<MemoryBuffer> Buffer;
- if (std::error_code EC = setupMemoryBuffer(Path, Buffer))
+ auto BufferOrError = setupMemoryBuffer(Path);
+ if (std::error_code EC = BufferOrError.getError())
return EC;
+ auto Buffer = std::move(BufferOrError.get());
// Create the reader.
if (!IndexedInstrProfReader::hasFormat(*Buffer))
return instrprof_error::bad_magic;
@@ -83,8 +89,8 @@ void InstrProfIterator::Increment() {
}
std::error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) {
- // Skip empty lines.
- while (!Line.is_at_end() && Line->empty())
+ // Skip empty lines and comments.
+ while (!Line.is_at_end() && (Line->empty() || Line->startswith("#")))
++Line;
// If we hit EOF while looking for a name, we're done.
if (Line.is_at_end())
@@ -190,6 +196,9 @@ RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) {
// garbage at the end of the file.
if (CurrentPos + sizeof(RawHeader) > End)
return instrprof_error::malformed;
+ // The writer ensures each profile is padded to start at an aligned address.
+ if (reinterpret_cast<size_t>(CurrentPos) % alignOf<uint64_t>())
+ return instrprof_error::malformed;
// The magic should have the same byte order as in the previous header.
uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos);
if (Magic != swap(getRawMagic<IntPtrT>()))
@@ -307,8 +316,8 @@ std::error_code IndexedInstrProfReader::readHeader() {
return error(instrprof_error::bad_magic);
// Read the version.
- uint64_t Version = endian::readNext<uint64_t, little, unaligned>(Cur);
- if (Version != IndexedInstrProf::Version)
+ FormatVersion = endian::readNext<uint64_t, little, unaligned>(Cur);
+ if (FormatVersion > IndexedInstrProf::Version)
return error(instrprof_error::unsupported_version);
// Read the maximal function count.
@@ -331,18 +340,31 @@ std::error_code IndexedInstrProfReader::readHeader() {
}
std::error_code IndexedInstrProfReader::getFunctionCounts(
- StringRef FuncName, uint64_t &FuncHash, std::vector<uint64_t> &Counts) {
- const auto &Iter = Index->find(FuncName);
+ StringRef FuncName, uint64_t FuncHash, std::vector<uint64_t> &Counts) {
+ auto Iter = Index->find(FuncName);
if (Iter == Index->end())
return error(instrprof_error::unknown_function);
- // Found it. Make sure it's valid before giving back a result.
- const InstrProfRecord &Record = *Iter;
- if (Record.Name.empty())
- return error(instrprof_error::malformed);
- FuncHash = Record.Hash;
- Counts = Record.Counts;
- return success();
+ // Found it. Look for counters with the right hash.
+ ArrayRef<uint64_t> Data = (*Iter).Data;
+ uint64_t NumCounts;
+ for (uint64_t I = 0, E = Data.size(); I != E; I += NumCounts) {
+ // The function hash comes first.
+ uint64_t FoundHash = Data[I++];
+ // In v1, we have at least one count. Later, we have the number of counts.
+ if (I == E)
+ return error(instrprof_error::malformed);
+ NumCounts = FormatVersion == 1 ? E - I : Data[I++];
+ // If we have more counts than data, this is bogus.
+ if (I + NumCounts > E)
+ return error(instrprof_error::malformed);
+ // Check for a match and fill the vector if there is one.
+ if (FoundHash == FuncHash) {
+ Counts = Data.slice(I, NumCounts);
+ return success();
+ }
+ }
+ return error(instrprof_error::hash_mismatch);
}
std::error_code
@@ -351,10 +373,30 @@ IndexedInstrProfReader::readNextRecord(InstrProfRecord &Record) {
if (RecordIterator == Index->data_end())
return error(instrprof_error::eof);
- // Read the next one.
- Record = *RecordIterator;
- ++RecordIterator;
- if (Record.Name.empty())
+ // Record the current function name.
+ Record.Name = (*RecordIterator).Name;
+
+ ArrayRef<uint64_t> Data = (*RecordIterator).Data;
+ // Valid data starts with a hash and either a count or the number of counts.
+ if (CurrentOffset + 1 > Data.size())
return error(instrprof_error::malformed);
+ // First we have a function hash.
+ Record.Hash = Data[CurrentOffset++];
+ // In version 1 we knew the number of counters implicitly, but in newer
+ // versions we store the number of counters next.
+ uint64_t NumCounts =
+ FormatVersion == 1 ? Data.size() - CurrentOffset : Data[CurrentOffset++];
+ if (CurrentOffset + NumCounts > Data.size())
+ return error(instrprof_error::malformed);
+ // And finally the counts themselves.
+ Record.Counts = Data.slice(CurrentOffset, NumCounts);
+
+ // If we've exhausted this function's data, increment the record.
+ CurrentOffset += NumCounts;
+ if (CurrentOffset == Data.size()) {
+ ++RecordIterator;
+ CurrentOffset = 0;
+ }
+
return success();
}
diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp
index e55c299..ad1b876 100644
--- a/lib/ProfileData/InstrProfWriter.cpp
+++ b/lib/ProfileData/InstrProfWriter.cpp
@@ -45,7 +45,9 @@ public:
offset_type N = K.size();
LE.write<offset_type>(N);
- offset_type M = (1 + V->Counts.size()) * sizeof(uint64_t);
+ offset_type M = 0;
+ for (const auto &Counts : *V)
+ M += (2 + Counts.second.size()) * sizeof(uint64_t);
LE.write<offset_type>(M);
return std::make_pair(N, M);
@@ -59,9 +61,13 @@ public:
offset_type) {
using namespace llvm::support;
endian::Writer<little> LE(Out);
- LE.write<uint64_t>(V->Hash);
- for (uint64_t I : V->Counts)
- LE.write<uint64_t>(I);
+
+ for (const auto &Counts : *V) {
+ LE.write<uint64_t>(Counts.first);
+ LE.write<uint64_t>(Counts.second.size());
+ for (uint64_t I : Counts.second)
+ LE.write<uint64_t>(I);
+ }
}
};
}
@@ -70,41 +76,43 @@ std::error_code
InstrProfWriter::addFunctionCounts(StringRef FunctionName,
uint64_t FunctionHash,
ArrayRef<uint64_t> Counters) {
- auto Where = FunctionData.find(FunctionName);
- if (Where == FunctionData.end()) {
- // If this is the first time we've seen this function, just add it.
- auto &Data = FunctionData[FunctionName];
- Data.Hash = FunctionHash;
- Data.Counts = Counters;
+ auto &CounterData = FunctionData[FunctionName];
+
+ auto Where = CounterData.find(FunctionHash);
+ if (Where == CounterData.end()) {
+ // We've never seen a function with this name and hash, add it.
+ CounterData[FunctionHash] = Counters;
+ // We keep track of the max function count as we go for simplicity.
+ if (Counters[0] > MaxFunctionCount)
+ MaxFunctionCount = Counters[0];
return instrprof_error::success;
}
- auto &Data = Where->getValue();
- // We can only add to existing functions if they match, so we check the hash
- // and number of counters.
- if (Data.Hash != FunctionHash)
- return instrprof_error::hash_mismatch;
- if (Data.Counts.size() != Counters.size())
+ // We're updating a function we've seen before.
+ auto &FoundCounters = Where->second;
+ // If the number of counters doesn't match we either have bad data or a hash
+ // collision.
+ if (FoundCounters.size() != Counters.size())
return instrprof_error::count_mismatch;
- // These match, add up the counters.
+
for (size_t I = 0, E = Counters.size(); I < E; ++I) {
- if (Data.Counts[I] + Counters[I] < Data.Counts[I])
+ if (FoundCounters[I] + Counters[I] < FoundCounters[I])
return instrprof_error::counter_overflow;
- Data.Counts[I] += Counters[I];
+ FoundCounters[I] += Counters[I];
}
+ // We keep track of the max function count as we go for simplicity.
+ if (FoundCounters[0] > MaxFunctionCount)
+ MaxFunctionCount = FoundCounters[0];
+
return instrprof_error::success;
}
void InstrProfWriter::write(raw_fd_ostream &OS) {
OnDiskChainedHashTableGenerator<InstrProfRecordTrait> Generator;
- uint64_t MaxFunctionCount = 0;
// Populate the hash table generator.
- for (const auto &I : FunctionData) {
+ for (const auto &I : FunctionData)
Generator.insert(I.getKey(), &I.getValue());
- if (I.getValue().Counts[0] > MaxFunctionCount)
- MaxFunctionCount = I.getValue().Counts[0];
- }
using namespace llvm::support;
endian::Writer<little> LE(OS);
diff --git a/lib/ProfileData/LLVMBuild.txt b/lib/ProfileData/LLVMBuild.txt
index 0a8cbe3..a7f471f 100644
--- a/lib/ProfileData/LLVMBuild.txt
+++ b/lib/ProfileData/LLVMBuild.txt
@@ -19,4 +19,4 @@
type = Library
name = ProfileData
parent = Libraries
-required_libraries = Support
+required_libraries = Core Support Object
diff --git a/lib/ProfileData/SampleProf.cpp b/lib/ProfileData/SampleProf.cpp
new file mode 100644
index 0000000..920c48a
--- /dev/null
+++ b/lib/ProfileData/SampleProf.cpp
@@ -0,0 +1,51 @@
+//=-- SampleProf.cpp - Sample profiling format support --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains common definitions used in the reading and writing of
+// sample profile data.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ProfileData/SampleProf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+
+using namespace llvm;
+
+namespace {
+class SampleProfErrorCategoryType : public std::error_category {
+ const char *name() const LLVM_NOEXCEPT override { return "llvm.sampleprof"; }
+ std::string message(int IE) const override {
+ sampleprof_error E = static_cast<sampleprof_error>(IE);
+ switch (E) {
+ case sampleprof_error::success:
+ return "Success";
+ case sampleprof_error::bad_magic:
+ return "Invalid file format (bad magic)";
+ case sampleprof_error::unsupported_version:
+ return "Unsupported format version";
+ case sampleprof_error::too_large:
+ return "Too much profile data";
+ case sampleprof_error::truncated:
+ return "Truncated profile data";
+ case sampleprof_error::malformed:
+ return "Malformed profile data";
+ case sampleprof_error::unrecognized_format:
+ return "Unrecognized profile encoding format";
+ }
+ llvm_unreachable("A value of sampleprof_error has no message.");
+ }
+};
+}
+
+static ManagedStatic<SampleProfErrorCategoryType> ErrorCategory;
+
+const std::error_category &llvm::sampleprof_category() {
+ return *ErrorCategory;
+}
diff --git a/lib/ProfileData/SampleProfReader.cpp b/lib/ProfileData/SampleProfReader.cpp
new file mode 100644
index 0000000..b39bfd6
--- /dev/null
+++ b/lib/ProfileData/SampleProfReader.cpp
@@ -0,0 +1,399 @@
+//===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the class that reads LLVM sample profiles. It
+// supports two file formats: text and binary. The textual representation
+// is useful for debugging and testing purposes. The binary representation
+// is more compact, resulting in smaller file sizes. However, they can
+// both be used interchangeably.
+//
+// NOTE: If you are making changes to the file format, please remember
+// to document them in the Clang documentation at
+// tools/clang/docs/UsersManual.rst.
+//
+// Text format
+// -----------
+//
+// Sample profiles are written as ASCII text. The file is divided into
+// sections, which correspond to each of the functions executed at runtime.
+// Each section has the following format
+//
+// function1:total_samples:total_head_samples
+// offset1[.discriminator]: number_of_samples [fn1:num fn2:num ... ]
+// offset2[.discriminator]: number_of_samples [fn3:num fn4:num ... ]
+// ...
+// offsetN[.discriminator]: number_of_samples [fn5:num fn6:num ... ]
+//
+// The file may contain blank lines between sections and within a
+// section. However, the spacing within a single line is fixed. Additional
+// spaces will result in an error while reading the file.
+//
+// Function names must be mangled in order for the profile loader to
+// match them in the current translation unit. The two numbers in the
+// function header specify how many total samples were accumulated in the
+// function (first number), and the total number of samples accumulated
+// in the prologue of the function (second number). This head sample
+// count provides an indicator of how frequently the function is invoked.
+//
+// Each sampled line may contain several items. Some are optional (marked
+// below):
+//
+// a. Source line offset. This number represents the line number
+// in the function where the sample was collected. The line number is
+// always relative to the line where symbol of the function is
+// defined. So, if the function has its header at line 280, the offset
+// 13 is at line 293 in the file.
+//
+// Note that this offset should never be a negative number. This could
+// happen in cases like macros. The debug machinery will register the
+// line number at the point of macro expansion. So, if the macro was
+// expanded in a line before the start of the function, the profile
+// converter should emit a 0 as the offset (this means that the optimizers
+// will not be able to associate a meaningful weight to the instructions
+// in the macro).
+//
+// b. [OPTIONAL] Discriminator. This is used if the sampled program
+// was compiled with DWARF discriminator support
+// (http://wiki.dwarfstd.org/index.php?title=Path_Discriminators).
+// DWARF discriminators are unsigned integer values that allow the
+// compiler to distinguish between multiple execution paths on the
+// same source line location.
+//
+// For example, consider the line of code ``if (cond) foo(); else bar();``.
+// If the predicate ``cond`` is true 80% of the time, then the edge
+// into function ``foo`` should be considered to be taken most of the
+// time. But both calls to ``foo`` and ``bar`` are at the same source
+// line, so a sample count at that line is not sufficient. The
+// compiler needs to know which part of that line is taken more
+// frequently.
+//
+// This is what discriminators provide. In this case, the calls to
+// ``foo`` and ``bar`` will be at the same line, but will have
+// different discriminator values. This allows the compiler to correctly
+// set edge weights into ``foo`` and ``bar``.
+//
+// c. Number of samples. This is an integer quantity representing the
+// number of samples collected by the profiler at this source
+// location.
+//
+// d. [OPTIONAL] Potential call targets and samples. If present, this
+// line contains a call instruction. This models both direct and
+// number of samples. For example,
+//
+// 130: 7 foo:3 bar:2 baz:7
+//
+// The above means that at relative line offset 130 there is a call
+// instruction that calls one of ``foo()``, ``bar()`` and ``baz()``,
+// with ``baz()`` being the relatively more frequently called target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ProfileData/SampleProfReader.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Regex.h"
+
+using namespace llvm::sampleprof;
+using namespace llvm;
+
+/// \brief Print the samples collected for a function on stream \p OS.
+///
+/// \param OS Stream to emit the output to.
+void FunctionSamples::print(raw_ostream &OS) {
+ OS << TotalSamples << ", " << TotalHeadSamples << ", " << BodySamples.size()
+ << " sampled lines\n";
+ for (const auto &SI : BodySamples) {
+ LineLocation Loc = SI.first;
+ const SampleRecord &Sample = SI.second;
+ OS << "\tline offset: " << Loc.LineOffset
+ << ", discriminator: " << Loc.Discriminator
+ << ", number of samples: " << Sample.getSamples();
+ if (Sample.hasCalls()) {
+ OS << ", calls:";
+ for (const auto &I : Sample.getCallTargets())
+ OS << " " << I.first() << ":" << I.second;
+ }
+ OS << "\n";
+ }
+ OS << "\n";
+}
+
+/// \brief Dump the function profile for \p FName.
+///
+/// \param FName Name of the function to print.
+/// \param OS Stream to emit the output to.
+void SampleProfileReader::dumpFunctionProfile(StringRef FName,
+ raw_ostream &OS) {
+ OS << "Function: " << FName << ": ";
+ Profiles[FName].print(OS);
+}
+
+/// \brief Dump all the function profiles found on stream \p OS.
+void SampleProfileReader::dump(raw_ostream &OS) {
+ for (const auto &I : Profiles)
+ dumpFunctionProfile(I.getKey(), OS);
+}
+
+/// \brief Load samples from a text file.
+///
+/// See the documentation at the top of the file for an explanation of
+/// the expected format.
+///
+/// \returns true if the file was loaded successfully, false otherwise.
+std::error_code SampleProfileReaderText::read() {
+ line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#');
+
+ // Read the profile of each function. Since each function may be
+ // mentioned more than once, and we are collecting flat profiles,
+ // accumulate samples as we parse them.
+ Regex HeadRE("^([^0-9].*):([0-9]+):([0-9]+)$");
+ Regex LineSampleRE("^([0-9]+)\\.?([0-9]+)?: ([0-9]+)(.*)$");
+ Regex CallSampleRE(" +([^0-9 ][^ ]*):([0-9]+)");
+ while (!LineIt.is_at_eof()) {
+ // Read the header of each function.
+ //
+ // Note that for function identifiers we are actually expecting
+ // mangled names, but we may not always get them. This happens when
+ // the compiler decides not to emit the function (e.g., it was inlined
+ // and removed). In this case, the binary will not have the linkage
+ // name for the function, so the profiler will emit the function's
+ // unmangled name, which may contain characters like ':' and '>' in its
+ // name (member functions, templates, etc).
+ //
+ // The only requirement we place on the identifier, then, is that it
+ // should not begin with a number.
+ SmallVector<StringRef, 4> Matches;
+ if (!HeadRE.match(*LineIt, &Matches)) {
+ reportParseError(LineIt.line_number(),
+ "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
+ return sampleprof_error::malformed;
+ }
+ assert(Matches.size() == 4);
+ StringRef FName = Matches[1];
+ unsigned NumSamples, NumHeadSamples;
+ Matches[2].getAsInteger(10, NumSamples);
+ Matches[3].getAsInteger(10, NumHeadSamples);
+ Profiles[FName] = FunctionSamples();
+ FunctionSamples &FProfile = Profiles[FName];
+ FProfile.addTotalSamples(NumSamples);
+ FProfile.addHeadSamples(NumHeadSamples);
+ ++LineIt;
+
+ // Now read the body. The body of the function ends when we reach
+ // EOF or when we see the start of the next function.
+ while (!LineIt.is_at_eof() && isdigit((*LineIt)[0])) {
+ if (!LineSampleRE.match(*LineIt, &Matches)) {
+ reportParseError(
+ LineIt.line_number(),
+ "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " + *LineIt);
+ return sampleprof_error::malformed;
+ }
+ assert(Matches.size() == 5);
+ unsigned LineOffset, NumSamples, Discriminator = 0;
+ Matches[1].getAsInteger(10, LineOffset);
+ if (Matches[2] != "")
+ Matches[2].getAsInteger(10, Discriminator);
+ Matches[3].getAsInteger(10, NumSamples);
+
+ // If there are function calls in this line, generate a call sample
+ // entry for each call.
+ std::string CallsLine(Matches[4]);
+ while (CallsLine != "") {
+ SmallVector<StringRef, 3> CallSample;
+ if (!CallSampleRE.match(CallsLine, &CallSample)) {
+ reportParseError(LineIt.line_number(),
+ "Expected 'mangled_name:NUM', found " + CallsLine);
+ return sampleprof_error::malformed;
+ }
+ StringRef CalledFunction = CallSample[1];
+ unsigned CalledFunctionSamples;
+ CallSample[2].getAsInteger(10, CalledFunctionSamples);
+ FProfile.addCalledTargetSamples(LineOffset, Discriminator,
+ CalledFunction, CalledFunctionSamples);
+ CallsLine = CallSampleRE.sub("", CallsLine);
+ }
+
+ FProfile.addBodySamples(LineOffset, Discriminator, NumSamples);
+ ++LineIt;
+ }
+ }
+
+ return sampleprof_error::success;
+}
+
+template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() {
+ unsigned NumBytesRead = 0;
+ std::error_code EC;
+ uint64_t Val = decodeULEB128(Data, &NumBytesRead);
+
+ if (Val > std::numeric_limits<T>::max())
+ EC = sampleprof_error::malformed;
+ else if (Data + NumBytesRead > End)
+ EC = sampleprof_error::truncated;
+ else
+ EC = sampleprof_error::success;
+
+ if (EC) {
+ reportParseError(0, EC.message());
+ return EC;
+ }
+
+ Data += NumBytesRead;
+ return static_cast<T>(Val);
+}
+
+ErrorOr<StringRef> SampleProfileReaderBinary::readString() {
+ std::error_code EC;
+ StringRef Str(reinterpret_cast<const char *>(Data));
+ if (Data + Str.size() + 1 > End) {
+ EC = sampleprof_error::truncated;
+ reportParseError(0, EC.message());
+ return EC;
+ }
+
+ Data += Str.size() + 1;
+ return Str;
+}
+
+std::error_code SampleProfileReaderBinary::read() {
+ while (!at_eof()) {
+ auto FName(readString());
+ if (std::error_code EC = FName.getError())
+ return EC;
+
+ Profiles[*FName] = FunctionSamples();
+ FunctionSamples &FProfile = Profiles[*FName];
+
+ auto Val = readNumber<unsigned>();
+ if (std::error_code EC = Val.getError())
+ return EC;
+ FProfile.addTotalSamples(*Val);
+
+ Val = readNumber<unsigned>();
+ if (std::error_code EC = Val.getError())
+ return EC;
+ FProfile.addHeadSamples(*Val);
+
+ // Read the samples in the body.
+ auto NumRecords = readNumber<unsigned>();
+ if (std::error_code EC = NumRecords.getError())
+ return EC;
+ for (unsigned I = 0; I < *NumRecords; ++I) {
+ auto LineOffset = readNumber<uint64_t>();
+ if (std::error_code EC = LineOffset.getError())
+ return EC;
+
+ auto Discriminator = readNumber<uint64_t>();
+ if (std::error_code EC = Discriminator.getError())
+ return EC;
+
+ auto NumSamples = readNumber<uint64_t>();
+ if (std::error_code EC = NumSamples.getError())
+ return EC;
+
+ auto NumCalls = readNumber<unsigned>();
+ if (std::error_code EC = NumCalls.getError())
+ return EC;
+
+ for (unsigned J = 0; J < *NumCalls; ++J) {
+ auto CalledFunction(readString());
+ if (std::error_code EC = CalledFunction.getError())
+ return EC;
+
+ auto CalledFunctionSamples = readNumber<uint64_t>();
+ if (std::error_code EC = CalledFunctionSamples.getError())
+ return EC;
+
+ FProfile.addCalledTargetSamples(*LineOffset, *Discriminator,
+ *CalledFunction,
+ *CalledFunctionSamples);
+ }
+
+ FProfile.addBodySamples(*LineOffset, *Discriminator, *NumSamples);
+ }
+ }
+
+ return sampleprof_error::success;
+}
+
+std::error_code SampleProfileReaderBinary::readHeader() {
+ Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
+ End = Data + Buffer->getBufferSize();
+
+ // Read and check the magic identifier.
+ auto Magic = readNumber<uint64_t>();
+ if (std::error_code EC = Magic.getError())
+ return EC;
+ else if (*Magic != SPMagic())
+ return sampleprof_error::bad_magic;
+
+ // Read the version number.
+ auto Version = readNumber<uint64_t>();
+ if (std::error_code EC = Version.getError())
+ return EC;
+ else if (*Version != SPVersion())
+ return sampleprof_error::unsupported_version;
+
+ return sampleprof_error::success;
+}
+
+bool SampleProfileReaderBinary::hasFormat(const MemoryBuffer &Buffer) {
+ const uint8_t *Data =
+ reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
+ uint64_t Magic = decodeULEB128(Data);
+ return Magic == SPMagic();
+}
+
+/// \brief Prepare a memory buffer for the contents of \p Filename.
+///
+/// \returns an error code indicating the status of the buffer.
+static ErrorOr<std::unique_ptr<MemoryBuffer>>
+setupMemoryBuffer(std::string Filename) {
+ auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(Filename);
+ if (std::error_code EC = BufferOrErr.getError())
+ return EC;
+ auto Buffer = std::move(BufferOrErr.get());
+
+ // Sanity check the file.
+ if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max())
+ return sampleprof_error::too_large;
+
+ return std::move(Buffer);
+}
+
+/// \brief Create a sample profile reader based on the format of the input file.
+///
+/// \param Filename The file to open.
+///
+/// \param Reader The reader to instantiate according to \p Filename's format.
+///
+/// \param C The LLVM context to use to emit diagnostics.
+///
+/// \returns an error code indicating the status of the created reader.
+ErrorOr<std::unique_ptr<SampleProfileReader>>
+SampleProfileReader::create(StringRef Filename, LLVMContext &C) {
+ auto BufferOrError = setupMemoryBuffer(Filename);
+ if (std::error_code EC = BufferOrError.getError())
+ return EC;
+
+ auto Buffer = std::move(BufferOrError.get());
+ std::unique_ptr<SampleProfileReader> Reader;
+ if (SampleProfileReaderBinary::hasFormat(*Buffer))
+ Reader.reset(new SampleProfileReaderBinary(std::move(Buffer), C));
+ else
+ Reader.reset(new SampleProfileReaderText(std::move(Buffer), C));
+
+ if (std::error_code EC = Reader->readHeader())
+ return EC;
+
+ return std::move(Reader);
+}
diff --git a/lib/ProfileData/SampleProfWriter.cpp b/lib/ProfileData/SampleProfWriter.cpp
new file mode 100644
index 0000000..8525045
--- /dev/null
+++ b/lib/ProfileData/SampleProfWriter.cpp
@@ -0,0 +1,126 @@
+//===- SampleProfWriter.cpp - Write LLVM sample profile data --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the class that writes LLVM sample profiles. It
+// supports two file formats: text and binary. The textual representation
+// is useful for debugging and testing purposes. The binary representation
+// is more compact, resulting in smaller file sizes. However, they can
+// both be used interchangeably.
+//
+// See lib/ProfileData/SampleProfReader.cpp for documentation on each of the
+// supported formats.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ProfileData/SampleProfWriter.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/Regex.h"
+
+using namespace llvm::sampleprof;
+using namespace llvm;
+
+/// \brief Write samples to a text file.
+bool SampleProfileWriterText::write(StringRef FName, const FunctionSamples &S) {
+ if (S.empty())
+ return true;
+
+ OS << FName << ":" << S.getTotalSamples() << ":" << S.getHeadSamples()
+ << "\n";
+
+ for (const auto &I : S.getBodySamples()) {
+ LineLocation Loc = I.first;
+ const SampleRecord &Sample = I.second;
+ if (Loc.Discriminator == 0)
+ OS << Loc.LineOffset << ": ";
+ else
+ OS << Loc.LineOffset << "." << Loc.Discriminator << ": ";
+
+ OS << Sample.getSamples();
+
+ for (const auto &J : Sample.getCallTargets())
+ OS << " " << J.first() << ":" << J.second;
+ OS << "\n";
+ }
+
+ return true;
+}
+
+SampleProfileWriterBinary::SampleProfileWriterBinary(StringRef F,
+ std::error_code &EC)
+ : SampleProfileWriter(F, EC, sys::fs::F_None) {
+ if (EC)
+ return;
+
+ // Write the file header.
+ encodeULEB128(SPMagic(), OS);
+ encodeULEB128(SPVersion(), OS);
+}
+
+/// \brief Write samples to a binary file.
+///
+/// \returns true if the samples were written successfully, false otherwise.
+bool SampleProfileWriterBinary::write(StringRef FName,
+ const FunctionSamples &S) {
+ if (S.empty())
+ return true;
+
+ OS << FName;
+ encodeULEB128(0, OS);
+ encodeULEB128(S.getTotalSamples(), OS);
+ encodeULEB128(S.getHeadSamples(), OS);
+ encodeULEB128(S.getBodySamples().size(), OS);
+ for (const auto &I : S.getBodySamples()) {
+ LineLocation Loc = I.first;
+ const SampleRecord &Sample = I.second;
+ encodeULEB128(Loc.LineOffset, OS);
+ encodeULEB128(Loc.Discriminator, OS);
+ encodeULEB128(Sample.getSamples(), OS);
+ encodeULEB128(Sample.getCallTargets().size(), OS);
+ for (const auto &J : Sample.getCallTargets()) {
+ std::string Callee = J.first();
+ unsigned CalleeSamples = J.second;
+ OS << Callee;
+ encodeULEB128(0, OS);
+ encodeULEB128(CalleeSamples, OS);
+ }
+ }
+
+ return true;
+}
+
+/// \brief Create a sample profile writer based on the specified format.
+///
+/// \param Filename The file to create.
+///
+/// \param Writer The writer to instantiate according to the specified format.
+///
+/// \param Format Encoding format for the profile file.
+///
+/// \returns an error code indicating the status of the created writer.
+ErrorOr<std::unique_ptr<SampleProfileWriter>>
+SampleProfileWriter::create(StringRef Filename, SampleProfileFormat Format) {
+ std::error_code EC;
+ std::unique_ptr<SampleProfileWriter> Writer;
+
+ if (Format == SPF_Binary)
+ Writer.reset(new SampleProfileWriterBinary(Filename, EC));
+ else if (Format == SPF_Text)
+ Writer.reset(new SampleProfileWriterText(Filename, EC));
+ else
+ EC = sampleprof_error::unrecognized_format;
+
+ if (EC)
+ return EC;
+
+ return std::move(Writer);
+}