16 files changed, 832 insertions, 201 deletions
diff --git a/lib/Fuzzer/CMakeLists.txt b/lib/Fuzzer/CMakeLists.txt
index 81e51d1..bfd87ec 100644
--- a/lib/Fuzzer/CMakeLists.txt
+++ b/lib/Fuzzer/CMakeLists.txt
@@ -1,8 +1,10 @@
-# Disable the coverage instrumentation for the fuzzer itself.
-set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O2 -fsanitize-coverage=0")
-if( LLVM_USE_SANITIZE_COVERAGE  )
+set(LIBFUZZER_FLAGS_BASE "${CMAKE_CXX_FLAGS_RELEASE}")
+# Disable the coverage and sanitizer instrumentation for the fuzzer itself.
+set(CMAKE_CXX_FLAGS_RELEASE "${LIBFUZZER_FLAGS_BASE} -O2 -fno-sanitize=all")
+if( LLVM_USE_SANITIZE_COVERAGE )
   add_library(LLVMFuzzerNoMain OBJECT
     FuzzerCrossOver.cpp
+    FuzzerDFSan.cpp
     FuzzerDriver.cpp
     FuzzerIO.cpp
     FuzzerLoop.cpp
diff --git a/lib/Fuzzer/FuzzerDFSan.cpp b/lib/Fuzzer/FuzzerDFSan.cpp
new file mode 100644
index 0000000..16f8c0f
--- /dev/null
+++ b/lib/Fuzzer/FuzzerDFSan.cpp
@@ -0,0 +1,275 @@
+//===- FuzzerDFSan.cpp - DFSan-based fuzzer mutator -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// DataFlowSanitizer (DFSan) is a tool for
+// generalised dynamic data flow (taint) analysis:
+// http://clang.llvm.org/docs/DataFlowSanitizer.html .
+//
+// This file implements a mutation algorithm based on taint
+// analysis feedback from DFSan.
+//
+// The approach has some similarity to "Taint-based Directed Whitebox Fuzzing"
+// by Vijay Ganesh & Tim Leek & Martin Rinard:
+// http://dspace.mit.edu/openaccess-disseminate/1721.1/59320,
+// but it uses a full blown LLVM IR taint analysis and separate instrumentation
+// to analyze all of the "attack points" at once.
+//
+// Workflow:
+//   * lib/Fuzzer/Fuzzer*.cpp is compiled w/o any instrumentation.
+//   * The code under test is compiled with DFSan *and* with special extra hooks
+//     that are inserted before dfsan. Currently supported hooks:
+//     - __sanitizer_cov_trace_cmp: inserted before every ICMP instruction,
+//       receives the type, size and arguments of ICMP.
+//   * Every call to HOOK(a,b) is replaced by DFSan with
+//     __dfsw_HOOK(a, b, label(a), label(b)) so that __dfsw_HOOK
+//     gets all the taint labels for the arguments.
+//   * At the Fuzzer startup we assign a unique DFSan label
+//     to every byte of the input string (Fuzzer::CurrentUnit) so that for any
+//     chunk of data we know which input bytes it has derived from.
+//   * The __dfsw_* functions (implemented in this file) record the
+//     parameters (i.e. the application data and the corresponding taint labels)
+//     in a global state.
+//   * Fuzzer::MutateWithDFSan() tries to use the data recorded by __dfsw_*
+//     hooks to guide the fuzzing towards new application states.
+//     For example if 4 bytes of data that derive from input bytes {4,5,6,7}
+//     are compared with a constant 12345 and the comparison always yields
+//     the same result, we try to insert 12345, 12344, 12346 into bytes
+//     {4,5,6,7} of the next fuzzed inputs.
+//
+// This code does not function when DFSan is not linked in.
+// Instead of using ifdefs and thus requiring a separate build of lib/Fuzzer
+// we redeclare the dfsan_* interface functions as weak and check if they
+// are nullptr before calling.
+// If this approach proves to be useful we may add attribute(weak) to the
+// dfsan declarations in dfsan_interface.h
+//
+// This module is in the "proof of concept" stage.
+// It is capable of solving only the simplest puzzles
+// like test/dfsan/DFSanSimpleCmpTest.cpp.
+//===----------------------------------------------------------------------===//
+
+/* Example of manual usage:
+(
+  cd $LLVM/lib/Fuzzer/
+  clang  -fPIC -c -g -O2 -std=c++11 Fuzzer*.cpp
+  clang++ -O0 -std=c++11 -fsanitize-coverage=3  \
+    -mllvm -sanitizer-coverage-experimental-trace-compares=1 \
+    -fsanitize=dataflow -fsanitize-blacklist=./dfsan_fuzzer_abi.list  \
+    test/dfsan/DFSanSimpleCmpTest.cpp Fuzzer*.o
+  ./a.out
+)
+*/
+
+#include "FuzzerInternal.h"
+#include <sanitizer/dfsan_interface.h>
+
+#include <cstring>
+#include <iostream>
+#include <unordered_map>
+
+extern "C" {
+__attribute__((weak))
+dfsan_label dfsan_create_label(const char *desc, void *userdata);
+__attribute__((weak))
+void dfsan_set_label(dfsan_label label, void *addr, size_t size);
+__attribute__((weak))
+void dfsan_add_label(dfsan_label label, void *addr, size_t size);
+__attribute__((weak))
+const struct dfsan_label_info *dfsan_get_label_info(dfsan_label label);
+}  // extern "C"
+
+namespace {
+
+// These values are copied from include/llvm/IR/InstrTypes.h.
+// We do not include the LLVM headers here to remain independent.
+// If these values ever change, an assertion in ComputeCmp will fail.
+enum Predicate {
+  ICMP_EQ = 32,  ///< equal
+  ICMP_NE = 33,  ///< not equal
+  ICMP_UGT = 34, ///< unsigned greater than
+  ICMP_UGE = 35, ///< unsigned greater or equal
+  ICMP_ULT = 36, ///< unsigned less than
+  ICMP_ULE = 37, ///< unsigned less or equal
+  ICMP_SGT = 38, ///< signed greater than
+  ICMP_SGE = 39, ///< signed greater or equal
+  ICMP_SLT = 40, ///< signed less than
+  ICMP_SLE = 41, ///< signed less or equal
+};
+
+template <class U, class S>
+bool ComputeCmp(size_t CmpType, U Arg1, U Arg2) {
+  switch(CmpType) {
+    case ICMP_EQ : return Arg1 == Arg2;
+    case ICMP_NE : return Arg1 != Arg2;
+    case ICMP_UGT: return Arg1 > Arg2;
+    case ICMP_UGE: return Arg1 >= Arg2;
+    case ICMP_ULT: return Arg1 < Arg2;
+    case ICMP_ULE: return Arg1 <= Arg2;
+    case ICMP_SGT: return (S)Arg1 > (S)Arg2;
+    case ICMP_SGE: return (S)Arg1 >= (S)Arg2;
+    case ICMP_SLT: return (S)Arg1 < (S)Arg2;
+    case ICMP_SLE: return (S)Arg1 <= (S)Arg2;
+    default: assert(0 && "unsupported CmpType");
+  }
+  return false;
+}
+
+static bool ComputeCmp(size_t CmpSize, size_t CmpType, uint64_t Arg1,
+                       uint64_t Arg2) {
+  if (CmpSize == 8) return ComputeCmp<uint64_t, int64_t>(CmpType, Arg1, Arg2);
+  if (CmpSize == 4) return ComputeCmp<uint32_t, int32_t>(CmpType, Arg1, Arg2);
+  if (CmpSize == 2) return ComputeCmp<uint16_t, int16_t>(CmpType, Arg1, Arg2);
+  if (CmpSize == 1) return ComputeCmp<uint8_t, int8_t>(CmpType, Arg1, Arg2);
+  assert(0 && "unsupported type size");
+  return true;
+}
+
+// As a simplification we use the range of input bytes instead of a set of input
+// bytes.
+struct LabelRange {
+  uint16_t Beg, End;  // Range is [Beg, End), thus Beg==End is an empty range.
+
+  LabelRange(uint16_t Beg = 0, uint16_t End = 0) : Beg(Beg), End(End) {}
+
+  static LabelRange Join(LabelRange LR1, LabelRange LR2) {
+    if (LR1.Beg == LR1.End) return LR2;
+    if (LR2.Beg == LR2.End) return LR1;
+    return {std::min(LR1.Beg, LR2.Beg), std::max(LR1.End, LR2.End)};
+  }
+  LabelRange &Join(LabelRange LR) {
+    return *this = Join(*this, LR);
+  }
+  static LabelRange Singleton(const dfsan_label_info *LI) {
+    uint16_t Idx = (uint16_t)(uintptr_t)LI->userdata;
+    assert(Idx > 0);
+    return {(uint16_t)(Idx - 1), Idx};
+  }
+};
+
+std::ostream &operator<<(std::ostream &os, const LabelRange &LR) {
+  return os << "[" << LR.Beg << "," << LR.End << ")";
+}
+
+class DFSanState {
+ public:
+   DFSanState(const fuzzer::Fuzzer::FuzzingOptions &Options)
+       : Options(Options) {}
+
+  struct CmpSiteInfo {
+    size_t ResCounters[2] = {0, 0};
+    size_t CmpSize = 0;
+    LabelRange LR;
+    std::unordered_map<uint64_t, size_t> CountedConstants;
+  };
+
+  LabelRange GetLabelRange(dfsan_label L);
+  void DFSanCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
+                        uint64_t Arg1, uint64_t Arg2, dfsan_label L1,
+                        dfsan_label L2);
+  bool Mutate(fuzzer::Unit *U);
+
+ private:
+  std::unordered_map<uintptr_t, CmpSiteInfo> PcToCmpSiteInfoMap;
+  LabelRange LabelRanges[1 << (sizeof(dfsan_label) * 8)] = {};
+  const fuzzer::Fuzzer::FuzzingOptions &Options;
+};
+
+LabelRange DFSanState::GetLabelRange(dfsan_label L) {
+  LabelRange &LR = LabelRanges[L];
+  if (LR.Beg < LR.End || L == 0)
+    return LR;
+  const dfsan_label_info *LI = dfsan_get_label_info(L);
+  if (LI->l1 || LI->l2)
+    return LR = LabelRange::Join(GetLabelRange(LI->l1), GetLabelRange(LI->l2));
+  return LR = LabelRange::Singleton(LI);
+}
+
+void DFSanState::DFSanCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
+                                  uint64_t Arg1, uint64_t Arg2, dfsan_label L1,
+                                  dfsan_label L2) {
+  if (L1 == 0 && L2 == 0)
+    return;  // Not actionable.
+  if (L1 != 0 && L2 != 0)
+    return;  // Probably still actionable.
+  bool Res = ComputeCmp(CmpSize, CmpType, Arg1, Arg2);
+  CmpSiteInfo &CSI = PcToCmpSiteInfoMap[PC];
+  CSI.CmpSize = CmpSize;
+  CSI.LR.Join(GetLabelRange(L1)).Join(GetLabelRange(L2));
+  if (!L1) CSI.CountedConstants[Arg1]++;
+  if (!L2) CSI.CountedConstants[Arg2]++;
+  size_t Counter = CSI.ResCounters[Res]++;
+
+  if (Options.Verbosity >= 2  &&
+      (Counter & (Counter - 1)) == 0 &&
+      CSI.ResCounters[!Res] == 0)
+    std::cerr << "DFSAN:"
+              << " PC " << std::hex << PC << std::dec
+              << " S " << CmpSize
+              << " T " << CmpType
+              << " A1 " << Arg1 << " A2 " << Arg2 << " R " << Res
+              << " L" << L1 << GetLabelRange(L1)
+              << " L" << L2 << GetLabelRange(L2)
+              << " LR " << CSI.LR
+              << "\n";
+}
+
+bool DFSanState::Mutate(fuzzer::Unit *U) {
+  for (auto &PCToCmp : PcToCmpSiteInfoMap) {
+    auto &CSI = PCToCmp.second;
+    if (CSI.ResCounters[0] * CSI.ResCounters[1] != 0) continue;
+    if (CSI.ResCounters[0] + CSI.ResCounters[1] < 1000) continue;
+    if (CSI.CountedConstants.size() != 1) continue;
+    uintptr_t C = CSI.CountedConstants.begin()->first;
+    if (U->size() >= CSI.CmpSize) {
+      size_t RangeSize = CSI.LR.End - CSI.LR.Beg;
+      size_t Idx = CSI.LR.Beg + rand() % RangeSize;
+      if (Idx + CSI.CmpSize > U->size()) continue;
+      C += rand() % 5 - 2;
+      memcpy(U->data() + Idx, &C, CSI.CmpSize);
+      return true;
+    }
+  }
+  return false;
+}
+
+static DFSanState *DFSan;
+
+}  // namespace
+
+namespace fuzzer {
+
+bool Fuzzer::MutateWithDFSan(Unit *U) {
+  if (!&dfsan_create_label || !DFSan) return false;
+  return DFSan->Mutate(U);
+}
+
+void Fuzzer::InitializeDFSan() {
+  if (!&dfsan_create_label || !Options.UseDFSan) return;
+  DFSan = new DFSanState(Options);
+  CurrentUnit.resize(Options.MaxLen);
+  for (size_t i = 0; i < static_cast<size_t>(Options.MaxLen); i++) {
+    dfsan_label L = dfsan_create_label("input", (void*)(i + 1));
+    // We assume that no one else has called dfsan_create_label before.
+    assert(L == i + 1);
+    dfsan_set_label(L, &CurrentUnit[i], 1);
+  }
+}
+
+}  // namespace fuzzer
+
+extern "C" {
+void __dfsw___sanitizer_cov_trace_cmp(uint64_t SizeAndType, uint64_t Arg1,
+                                      uint64_t Arg2, dfsan_label L0,
+                                      dfsan_label L1, dfsan_label L2) {
+  assert(L0 == 0);
+  uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+  uint64_t CmpSize = (SizeAndType >> 32) / 8;
+  uint64_t Type = (SizeAndType << 32) >> 32;
+  DFSan->DFSanCmpCallback(PC, CmpSize, Type, Arg1, Arg2, L1, L2);
+}
+}  // extern "C"
diff --git a/lib/Fuzzer/FuzzerDriver.cpp b/lib/Fuzzer/FuzzerDriver.cpp
index 9ccd744..05a699e 100644
--- a/lib/Fuzzer/FuzzerDriver.cpp
+++ b/lib/Fuzzer/FuzzerDriver.cpp
@@ -18,6 +18,10 @@
 #include <thread>
 #include <atomic>
 #include <mutex>
+#include <string>
+#include <sstream>
+#include <algorithm>
+#include <iterator>
 
 namespace fuzzer {
 
@@ -26,19 +30,26 @@ struct FlagDescription {
   const char *Name;
   const char *Description;
   int   Default;
-  int   *Flag;
+  int   *IntFlag;
+  const char **StrFlag;
 };
 
 struct {
-#define FUZZER_FLAG(Type, Name, Default, Description) Type Name;
+#define FUZZER_FLAG_INT(Name, Default, Description) int Name;
+#define FUZZER_FLAG_STRING(Name, Description) const char *Name;
 #include "FuzzerFlags.def"
-#undef FUZZER_FLAG
+#undef FUZZER_FLAG_INT
+#undef FUZZER_FLAG_STRING
 } Flags;
 
 static FlagDescription FlagDescriptions [] {
-#define FUZZER_FLAG(Type, Name, Default, Description) {#Name, Description, Default, &Flags.Name},
+#define FUZZER_FLAG_INT(Name, Default, Description)                            \
+  { #Name, Description, Default, &Flags.Name, nullptr},
+#define FUZZER_FLAG_STRING(Name, Description)                                  \
+  { #Name, Description, 0, nullptr, &Flags.Name },
 #include "FuzzerFlags.def"
-#undef FUZZER_FLAG
+#undef FUZZER_FLAG_INT
+#undef FUZZER_FLAG_STRING
 };
 
 static const size_t kNumFlags =
@@ -79,11 +90,18 @@ static bool ParseOneFlag(const char *Param) {
     const char *Name = FlagDescriptions[F].Name;
     const char *Str = FlagValue(Param, Name);
     if (Str)  {
-      int Val = std::stol(Str);
-      *FlagDescriptions[F].Flag = Val;
-      if (Flags.verbosity >= 2)
-        std::cerr << "Flag: " << Name << " " << Val << "\n";
-      return true;
+      if (FlagDescriptions[F].IntFlag) {
+        int Val = std::stol(Str);
+        *FlagDescriptions[F].IntFlag = Val;
+        if (Flags.verbosity >= 2)
+          std::cerr << "Flag: " << Name << " " << Val << "\n";
+        return true;
+      } else if (FlagDescriptions[F].StrFlag) {
+        *FlagDescriptions[F].StrFlag = Str;
+        if (Flags.verbosity >= 2)
+          std::cerr << "Flag: " << Name << " " << Str << "\n";
+        return true;
+      }
     }
   }
   PrintHelp();
@@ -92,8 +110,12 @@ static bool ParseOneFlag(const char *Param) {
 
 // We don't use any library to minimize dependencies.
 static void ParseFlags(int argc, char **argv) {
-  for (size_t F = 0; F < kNumFlags; F++)
-    *FlagDescriptions[F].Flag = FlagDescriptions[F].Default;
+  for (size_t F = 0; F < kNumFlags; F++) {
+    if (FlagDescriptions[F].IntFlag)
+      *FlagDescriptions[F].IntFlag = FlagDescriptions[F].Default;
+    if (FlagDescriptions[F].StrFlag)
+      *FlagDescriptions[F].StrFlag = nullptr;
+  }
   for (int A = 1; A < argc; A++) {
     if (ParseOneFlag(argv[A])) continue;
     inputs.push_back(argv[A]);
@@ -139,6 +161,26 @@ static int RunInMultipleProcesses(int argc, char **argv, int NumWorkers,
   return HasErrors ? 1 : 0;
 }
 
+std::vector<std::string> ReadTokensFile(const char *TokensFilePath) {
+  if (!TokensFilePath) return {};
+  std::string TokensFileContents = FileToString(TokensFilePath);
+  std::istringstream ISS(TokensFileContents);
+  std::vector<std::string> Res = {std::istream_iterator<std::string>{ISS},
+                                  std::istream_iterator<std::string>{}};
+  Res.push_back(" ");
+  Res.push_back("\t");
+  Res.push_back("\n");
+  return Res;
+}
+
+int ApplyTokens(const Fuzzer &F, const char *InputFilePath) {
+  Unit U = FileToVector(InputFilePath);
+  auto T = F.SubstituteTokens(U);
+  T.push_back(0);
+  std::cout << T.data();
+  return 0;
+}
+
 int FuzzerDriver(int argc, char **argv, UserCallback Callback) {
   using namespace fuzzer;
 
@@ -161,8 +203,10 @@ int FuzzerDriver(int argc, char **argv, UserCallback Callback) {
   Options.UseCounters = Flags.use_counters;
   Options.UseFullCoverageSet = Flags.use_full_coverage_set;
   Options.UseCoveragePairs = Flags.use_coverage_pairs;
+  Options.UseDFSan = Flags.dfsan;
   Options.PreferSmallDuringInitialShuffle =
       Flags.prefer_small_during_initial_shuffle;
+  Options.Tokens = ReadTokensFile(Flags.tokens);
   if (Flags.runs >= 0)
     Options.MaxNumberOfRuns = Flags.runs;
   if (!inputs.empty())
@@ -181,6 +225,16 @@ int FuzzerDriver(int argc, char **argv, UserCallback Callback) {
   if (Flags.timeout > 0)
     SetTimer(Flags.timeout);
 
+  if (Flags.verbosity >= 2) {
+    std::cerr << "Tokens: {";
+    for (auto &T : Options.Tokens)
+      std::cerr << T << ",";
+    std::cerr << "}\n";
+  }
+
+  if (Flags.apply_tokens)
+    return ApplyTokens(F, Flags.apply_tokens);
+
   for (auto &inp : inputs)
     F.ReadDir(inp);
 
diff --git a/lib/Fuzzer/FuzzerFlags.def b/lib/Fuzzer/FuzzerFlags.def
index 08176af..dbaf75d 100644
--- a/lib/Fuzzer/FuzzerFlags.def
+++ b/lib/Fuzzer/FuzzerFlags.def
@@ -6,41 +6,48 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-// Flags. FUZZER_FLAG macro should be defined at the point of inclusion.
-// We are not using any flag parsing library for better portability and
-// independence.
+// Flags. FUZZER_FLAG_INT/FUZZER_FLAG_STRING macros should be defined at the
+// point of inclusion. We are not using any flag parsing library for better
+// portability and independence.
 //===----------------------------------------------------------------------===//
-FUZZER_FLAG(int, verbosity, 1, "Verbosity level.")
-FUZZER_FLAG(int, seed, 0, "Random seed. If 0, seed is generated.")
-FUZZER_FLAG(int, iterations, -1,
+FUZZER_FLAG_INT(verbosity, 1, "Verbosity level.")
+FUZZER_FLAG_INT(seed, 0, "Random seed. If 0, seed is generated.")
+FUZZER_FLAG_INT(iterations, -1,
             "Number of iterations of the fuzzer internal loop"
             " (-1 for infinite iterations).")
-FUZZER_FLAG(int, runs, -1,
+FUZZER_FLAG_INT(runs, -1,
             "Number of individual test runs (-1 for infinite runs).")
-FUZZER_FLAG(int, max_len, 64, "Maximal length of the test input.")
-FUZZER_FLAG(int, cross_over, 1, "If 1, cross over inputs.")
-FUZZER_FLAG(int, mutate_depth, 5,
+FUZZER_FLAG_INT(max_len, 64, "Maximal length of the test input.")
+FUZZER_FLAG_INT(cross_over, 1, "If 1, cross over inputs.")
+FUZZER_FLAG_INT(mutate_depth, 5,
             "Apply this number of consecutive mutations to each input.")
-FUZZER_FLAG(
-    int, prefer_small_during_initial_shuffle, -1,
+FUZZER_FLAG_INT(
+    prefer_small_during_initial_shuffle, -1,
     "If 1, always prefer smaller inputs during the initial corpus shuffle."
     " If 0, never do that. If -1, do it sometimes.")
-FUZZER_FLAG(int, exit_on_first, 0,
+FUZZER_FLAG_INT(exit_on_first, 0,
             "If 1, exit after the first new interesting input is found.")
-FUZZER_FLAG(int, timeout, -1, "Timeout in seconds (if positive).")
-FUZZER_FLAG(int, help, 0, "Print help.")
-FUZZER_FLAG(
-    int, save_minimized_corpus, 0,
+FUZZER_FLAG_INT(timeout, -1, "Timeout in seconds (if positive).")
+FUZZER_FLAG_INT(help, 0, "Print help.")
+FUZZER_FLAG_INT(
+    save_minimized_corpus, 0,
     "If 1, the minimized corpus is saved into the first input directory")
-FUZZER_FLAG(int, use_counters, 0, "Use coverage counters")
-FUZZER_FLAG(int, use_full_coverage_set, 0,
+FUZZER_FLAG_INT(use_counters, 0, "Use coverage counters")
+FUZZER_FLAG_INT(use_full_coverage_set, 0,
             "Experimental: Maximize the number of different full"
             " coverage sets as opposed to maximizing the total coverage."
             " This is potentially MUCH slower, but may discover more paths.")
-FUZZER_FLAG(int, use_coverage_pairs, 0,
+FUZZER_FLAG_INT(use_coverage_pairs, 0,
             "Experimental: Maximize the number of different coverage pairs.")
-FUZZER_FLAG(int, jobs, 0, "Number of jobs to run. If jobs >= 1 we spawn"
+FUZZER_FLAG_INT(jobs, 0, "Number of jobs to run. If jobs >= 1 we spawn"
                           " this number of jobs in separate worker processes"
                           " with stdout/stderr redirected to fuzz-JOB.log.")
-FUZZER_FLAG(int, workers, 0,
+FUZZER_FLAG_INT(workers, 0,
             "Number of simultaneous worker processes to run the jobs.")
+FUZZER_FLAG_INT(dfsan, 1, "Use DFSan for taint-guided mutations. No-op unless "
+                           "the DFSan instrumentation was compiled in.")
+
+FUZZER_FLAG_STRING(tokens, "Use the file with tokens (one token per line) to"
+                           " fuzz a token based input language.")
+FUZZER_FLAG_STRING(apply_tokens, "Read the given input file, substitute bytes "
+                                 " with tokens and write the result to stdout.")
diff --git a/lib/Fuzzer/FuzzerIO.cpp b/lib/Fuzzer/FuzzerIO.cpp
index 224808c..ef23d42 100644
--- a/lib/Fuzzer/FuzzerIO.cpp
+++ b/lib/Fuzzer/FuzzerIO.cpp
@@ -33,6 +33,12 @@ Unit FileToVector(const std::string &Path) {
               std::istreambuf_iterator<char>());
 }
 
+std::string FileToString(const std::string &Path) {
+  std::ifstream T(Path);
+  return std::string((std::istreambuf_iterator<char>(T)),
+                     std::istreambuf_iterator<char>());
+}
+
 void CopyFileToErr(const std::string &Path) {
   std::ifstream T(Path);
   std::copy(std::istreambuf_iterator<char>(T), std::istreambuf_iterator<char>(),
diff --git a/lib/Fuzzer/FuzzerInternal.h b/lib/Fuzzer/FuzzerInternal.h
index e4e5eb7..7787109 100644
--- a/lib/Fuzzer/FuzzerInternal.h
+++ b/lib/Fuzzer/FuzzerInternal.h
@@ -23,7 +23,8 @@ namespace fuzzer {
 typedef std::vector<uint8_t> Unit;
 using namespace std::chrono;
 
-Unit ReadFile(const char *Path);
+std::string FileToString(const std::string &Path);
+Unit FileToVector(const std::string &Path);
 void ReadDirToVectorOfUnits(const char *Path, std::vector<Unit> *V);
 void WriteToFile(const Unit &U, const std::string &Path);
 void CopyFileToErr(const std::string &Path);
@@ -51,17 +52,17 @@ class Fuzzer {
     bool UseCounters = false;
     bool UseFullCoverageSet  = false;
     bool UseCoveragePairs = false;
+    bool UseDFSan = false;
     int PreferSmallDuringInitialShuffle = -1;
     size_t MaxNumberOfRuns = ULONG_MAX;
     std::string OutputCorpus;
+    std::vector<std::string> Tokens;
   };
-  Fuzzer(UserCallback Callback, FuzzingOptions Options)
-      : Callback(Callback), Options(Options) {
-    SetDeathCallback();
-  }
+  Fuzzer(UserCallback Callback, FuzzingOptions Options);
   void AddToCorpus(const Unit &U) { Corpus.push_back(U); }
   size_t Loop(size_t NumIterations);
   void ShuffleAndMinimize();
+  void InitializeDFSan();
   size_t CorpusSize() const { return Corpus.size(); }
   void ReadDir(const std::string &Path) {
     ReadDirToVectorOfUnits(Path.c_str(), &Corpus);
@@ -76,20 +77,28 @@ class Fuzzer {
 
   size_t getTotalNumberOfRuns() { return TotalNumberOfRuns; }
 
-  static void AlarmCallback();
+  static void StaticAlarmCallback();
+
+  Unit SubstituteTokens(const Unit &U) const;
 
  private:
+  void AlarmCallback();
+  void ExecuteCallback(const Unit &U);
   size_t MutateAndTestOne(Unit *U);
   size_t RunOne(const Unit &U);
   size_t RunOneMaximizeTotalCoverage(const Unit &U);
   size_t RunOneMaximizeFullCoverageSet(const Unit &U);
   size_t RunOneMaximizeCoveragePairs(const Unit &U);
   void WriteToOutputCorpus(const Unit &U);
-  static void WriteToCrash(const Unit &U, const char *Prefix);
+  void WriteToCrash(const Unit &U, const char *Prefix);
+  bool MutateWithDFSan(Unit *U);
+  void PrintStats(const char *Where, size_t Cov, const char *End = "\n");
+  void PrintUnitInASCIIOrTokens(const Unit &U, const char *PrintAfter = "");
 
   void SetDeathCallback();
-  static void DeathCallback();
-  static Unit CurrentUnit;
+  static void StaticDeathCallback();
+  void DeathCallback();
+  Unit CurrentUnit;
 
   size_t TotalNumberOfRuns = 0;
 
@@ -108,7 +117,8 @@ class Fuzzer {
   UserCallback Callback;
   FuzzingOptions Options;
   system_clock::time_point ProcessStartTime = system_clock::now();
-  static system_clock::time_point UnitStartTime;
+  system_clock::time_point UnitStartTime;
+  long TimeOfLongestUnitInSeconds = 0;
 };
 
 };  // namespace fuzzer
diff --git a/lib/Fuzzer/FuzzerLoop.cpp b/lib/Fuzzer/FuzzerLoop.cpp
index 563fbf4..9dfe30b 100644
--- a/lib/Fuzzer/FuzzerLoop.cpp
+++ b/lib/Fuzzer/FuzzerLoop.cpp
@@ -16,21 +16,49 @@
 
 namespace fuzzer {
 
-// static
-Unit Fuzzer::CurrentUnit;
-system_clock::time_point Fuzzer::UnitStartTime;
+// Only one Fuzzer per process.
+static Fuzzer *F;
+
+Fuzzer::Fuzzer(UserCallback Callback, FuzzingOptions Options)
+    : Callback(Callback), Options(Options) {
+  SetDeathCallback();
+  InitializeDFSan();
+  assert(!F);
+  F = this;
+}
 
 void Fuzzer::SetDeathCallback() {
-  __sanitizer_set_death_callback(DeathCallback);
+  __sanitizer_set_death_callback(StaticDeathCallback);
+}
+
+void Fuzzer::PrintUnitInASCIIOrTokens(const Unit &U, const char *PrintAfter) {
+  if (Options.Tokens.empty()) {
+    PrintASCII(U, PrintAfter);
+  } else {
+    auto T = SubstituteTokens(U);
+    T.push_back(0);
+    std::cerr << T.data();
+    std::cerr << PrintAfter;
+  }
+}
+
+void Fuzzer::StaticDeathCallback() {
+  assert(F);
+  F->DeathCallback();
 }
 
 void Fuzzer::DeathCallback() {
   std::cerr << "DEATH: " <<  std::endl;
   Print(CurrentUnit, "\n");
-  PrintASCII(CurrentUnit, "\n");
+  PrintUnitInASCIIOrTokens(CurrentUnit, "\n");
   WriteToCrash(CurrentUnit, "crash-");
 }
 
+void Fuzzer::StaticAlarmCallback() {
+  assert(F);
+  F->AlarmCallback();
+}
+
 void Fuzzer::AlarmCallback() {
   size_t Seconds =
       duration_cast<seconds>(system_clock::now() - UnitStartTime).count();
@@ -38,27 +66,40 @@ void Fuzzer::AlarmCallback() {
             << std::endl;
   if (Seconds >= 3) {
     Print(CurrentUnit, "\n");
-    PrintASCII(CurrentUnit, "\n");
+    PrintUnitInASCIIOrTokens(CurrentUnit, "\n");
     WriteToCrash(CurrentUnit, "timeout-");
   }
   exit(1);
 }
 
+void Fuzzer::PrintStats(const char *Where, size_t Cov, const char *End) {
+  if (!Options.Verbosity) return;
+  size_t Seconds = secondsSinceProcessStartUp();
+  size_t ExecPerSec = (Seconds ? TotalNumberOfRuns / Seconds : 0);
+  std::cerr
+      << "#" << TotalNumberOfRuns
+      << "\t" << Where
+      << " cov " << Cov
+      << " bits " << TotalBits()
+      << " units " << Corpus.size()
+      << " exec/s " << ExecPerSec
+      << End;
+}
+
 void Fuzzer::ShuffleAndMinimize() {
+  size_t MaxCov = 0;
   bool PreferSmall =
       (Options.PreferSmallDuringInitialShuffle == 1 ||
        (Options.PreferSmallDuringInitialShuffle == -1 && rand() % 2));
   if (Options.Verbosity)
-    std::cerr << "Shuffle: Size: " << Corpus.size()
-              << " prefer small: " << PreferSmall
-              << "\n";
+    std::cerr << "PreferSmall: " << PreferSmall << "\n";
+  PrintStats("READ  ", 0);
   std::vector<Unit> NewCorpus;
   std::random_shuffle(Corpus.begin(), Corpus.end());
   if (PreferSmall)
     std::stable_sort(
         Corpus.begin(), Corpus.end(),
         [](const Unit &A, const Unit &B) { return A.size() < B.size(); });
-  size_t MaxCov = 0;
   Unit &U = CurrentUnit;
   for (const auto &C : Corpus) {
     for (size_t First = 0; First < 1; First++) {
@@ -77,18 +118,29 @@ void Fuzzer::ShuffleAndMinimize() {
     }
   }
   Corpus = NewCorpus;
-  if (Options.Verbosity)
-    std::cerr << "Shuffle done: " << Corpus.size() << " IC: " << MaxCov << "\n";
+  PrintStats("INITED", MaxCov);
 }
 
 size_t Fuzzer::RunOne(const Unit &U) {
   UnitStartTime = system_clock::now();
   TotalNumberOfRuns++;
+  size_t Res = 0;
   if (Options.UseFullCoverageSet)
-    return RunOneMaximizeFullCoverageSet(U);
-  if (Options.UseCoveragePairs)
-    return RunOneMaximizeCoveragePairs(U);
-  return RunOneMaximizeTotalCoverage(U);
+    Res = RunOneMaximizeFullCoverageSet(U);
+  else if (Options.UseCoveragePairs)
+    Res = RunOneMaximizeCoveragePairs(U);
+  else
+    Res = RunOneMaximizeTotalCoverage(U);
+  auto UnitStopTime = system_clock::now();
+  auto TimeOfUnit =
+      duration_cast<seconds>(UnitStopTime - UnitStartTime).count();
+  if (TimeOfUnit > TimeOfLongestUnitInSeconds) {
+    TimeOfLongestUnitInSeconds = TimeOfUnit;
+    std::cerr << "Longest unit: " << TimeOfLongestUnitInSeconds
+              << " s:\n";
+    Print(U, "\n");
+  }
+  return Res;
 }
 
 static uintptr_t HashOfArrayOfPCs(uintptr_t *PCs, uintptr_t NumPCs) {
@@ -99,12 +151,35 @@ static uintptr_t HashOfArrayOfPCs(uintptr_t *PCs, uintptr_t NumPCs) {
   return Res;
 }
 
+Unit Fuzzer::SubstituteTokens(const Unit &U) const {
+  Unit Res;
+  for (auto Idx : U) {
+    if (Idx < Options.Tokens.size()) {
+      std::string Token = Options.Tokens[Idx];
+      Res.insert(Res.end(), Token.begin(), Token.end());
+    } else {
+      Res.push_back(' ');
+    }
+  }
+  // FIXME: Apply DFSan labels.
+  return Res;
+}
+
+void Fuzzer::ExecuteCallback(const Unit &U) {
+  if (Options.Tokens.empty()) {
+    Callback(U.data(), U.size());
+  } else {
+    auto T = SubstituteTokens(U);
+    Callback(T.data(), T.size());
+  }
+}
+
 // Experimental. Does not yet scale.
 // Fuly reset the current coverage state, run a single unit,
 // collect all coverage pairs and return non-zero if a new pair is observed.
 size_t Fuzzer::RunOneMaximizeCoveragePairs(const Unit &U) {
   __sanitizer_reset_coverage();
-  Callback(U.data(), U.size());
+  ExecuteCallback(U);
   uintptr_t *PCs;
   uintptr_t NumPCs = __sanitizer_get_coverage_guards(&PCs);
   bool HasNewPairs = false;
@@ -129,7 +204,7 @@ size_t Fuzzer::RunOneMaximizeCoveragePairs(const Unit &U) {
 // e.g. test/FullCoverageSetTest.cpp. FIXME: make it scale.
 size_t Fuzzer::RunOneMaximizeFullCoverageSet(const Unit &U) {
   __sanitizer_reset_coverage();
-  Callback(U.data(), U.size());
+  ExecuteCallback(U);
   uintptr_t *PCs;
   uintptr_t NumPCs =__sanitizer_get_coverage_guards(&PCs);
   if (FullCoverageSets.insert(HashOfArrayOfPCs(PCs, NumPCs)).second)
@@ -144,21 +219,16 @@ size_t Fuzzer::RunOneMaximizeTotalCoverage(const Unit &U) {
     __sanitizer_update_counter_bitset_and_clear_counters(0);
   }
   size_t OldCoverage = __sanitizer_get_total_unique_coverage();
-  Callback(U.data(), U.size());
+  ExecuteCallback(U);
   size_t NewCoverage = __sanitizer_get_total_unique_coverage();
   size_t NumNewBits = 0;
   if (Options.UseCounters)
     NumNewBits = __sanitizer_update_counter_bitset_and_clear_counters(
         CounterBitmap.data());
 
-  if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1)) && Options.Verbosity) {
-    size_t Seconds = secondsSinceProcessStartUp();
-    std::cerr
-        << "#" << TotalNumberOfRuns
-        << "\tcov: " << NewCoverage
-        << "\tbits: " << TotalBits()
-        << "\texec/s: " << (Seconds ? TotalNumberOfRuns / Seconds : 0) << "\n";
-  }
+  if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1)) && Options.Verbosity)
+    PrintStats("pulse ", NewCoverage);
+
   if (NewCoverage > OldCoverage || NumNewBits)
     return NewCoverage;
   return 0;
@@ -192,22 +262,18 @@ size_t Fuzzer::MutateAndTestOne(Unit *U) {
   for (int i = 0; i < Options.MutateDepth; i++) {
     if (TotalNumberOfRuns >= Options.MaxNumberOfRuns)
       return NewUnits;
+    MutateWithDFSan(U);
     Mutate(U, Options.MaxLen);
     size_t NewCoverage = RunOne(*U);
     if (NewCoverage) {
       Corpus.push_back(*U);
       NewUnits++;
+      PrintStats("NEW   ", NewCoverage, "");
       if (Options.Verbosity) {
-        std::cerr << "#" << TotalNumberOfRuns
-                  << "\tNEW: " << NewCoverage
-                  << " B: " << TotalBits()
-                  << " L: " << U->size()
-                  << " S: " << Corpus.size()
-                  << " I: " << i
-                  << "\t";
+        std::cerr << " L: " << U->size();
         if (U->size() < 30) {
-          PrintASCII(*U);
-          std::cerr << "\t";
+          std::cerr << " ";
+          PrintUnitInASCIIOrTokens(*U, "\t");
           Print(*U);
         }
         std::cerr << "\n";
diff --git a/lib/Fuzzer/FuzzerUtil.cpp b/lib/Fuzzer/FuzzerUtil.cpp
index 679f289..3635f39 100644
--- a/lib/Fuzzer/FuzzerUtil.cpp
+++ b/lib/Fuzzer/FuzzerUtil.cpp
@@ -19,15 +19,18 @@
 namespace fuzzer {
 
 void Print(const Unit &v, const char *PrintAfter) {
-  std::cerr << v.size() << ": ";
   for (auto x : v)
-    std::cerr << (unsigned) x << " ";
+    std::cerr << "0x" << std::hex << (unsigned) x << std::dec << ",";
   std::cerr << PrintAfter;
 }
 
 void PrintASCII(const Unit &U, const char *PrintAfter) {
-  for (auto X : U)
-    std::cerr << (char)((isascii(X) && X >= ' ') ? X : '?');
+  for (auto X : U) {
+    if (isprint(X))
+      std::cerr << X;
+    else
+      std::cerr << "\\x" << std::hex << (int)(unsigned)X << std::dec;
+  }
   std::cerr << PrintAfter;
 }
 
@@ -43,7 +46,7 @@ std::string Hash(const Unit &in) {
 }
 
 static void AlarmHandler(int, siginfo_t *, void *) {
-  Fuzzer::AlarmCallback();
+  Fuzzer::StaticAlarmCallback();
 }
 
 void SetTimer(int Seconds) {
diff --git a/lib/Fuzzer/README.txt b/lib/Fuzzer/README.txt
index e4d6b4f..79f49b5 100644
--- a/lib/Fuzzer/README.txt
+++ b/lib/Fuzzer/README.txt
@@ -1,112 +1,2 @@
-===============================
-Fuzzer -- a library for coverage-guided fuzz testing.
-===============================
+Move to http://llvm.org/docs/LibFuzzer.html
 
-This library is intended primarily for in-process coverage-guided fuzz testing
-(fuzzing) of other libraries. The typical workflow looks like this:
-
-  * Build the Fuzzer library as a static archive (or just a set of .o files).
-    Note that the Fuzzer contains the main() function.
-    Preferably do *not* use sanitizers while building the Fuzzer.
-  * Build the library you are going to test with -fsanitize-coverage=[234]
-    and one of the sanitizers. We recommend to build the library in several
-    different modes (e.g. asan, msan, lsan, ubsan, etc) and even using different
-    optimizations options (e.g. -O0, -O1, -O2) to diversify testing.
-  * Build a test driver using the same options as the library.
-    The test driver is a C/C++ file containing interesting calls to the library
-    inside a single function:
-    extern "C" void TestOneInput(const uint8_t *Data, size_t Size);
-  * Link the Fuzzer, the library and the driver together into an executable
-    using the same sanitizer options as for the library.
-  * Collect the initial corpus of inputs for the
-    fuzzer (a directory with test inputs, one file per input).
-    The better your inputs are the faster you will find something interesting.
-    Also try to keep your inputs small, otherwise the Fuzzer will run too slow.
-  * Run the fuzzer with the test corpus. As new interesting test cases are
-    discovered they will be added to the corpus. If a bug is discovered by
-    the sanitizer (asan, etc) it will be reported as usual and the reproducer
-    will be written to disk.
-    Each Fuzzer process is single-threaded (unless the library starts its own
-    threads). You can run the Fuzzer on the same corpus in multiple processes.
-    in parallel. For run-time options run the Fuzzer binary with '-help=1'.
-
-
-The Fuzzer is similar in concept to AFL (http://lcamtuf.coredump.cx/afl/),
-but uses in-process Fuzzing, which is more fragile, more restrictive, but
-potentially much faster as it has no overhead for process start-up.
-It uses LLVM's "Sanitizer Coverage" instrumentation to get in-process
-coverage-feedback https://code.google.com/p/address-sanitizer/wiki/AsanCoverage
-
-The code resides in the LLVM repository and is (or will be) used by various
-parts of LLVM, but the Fuzzer itself does not (and should not) depend on any
-part of LLVM and can be used for other projects. Ideally, the Fuzzer's code
-should not have any external dependencies. Right now it uses STL, which may need
-to be fixed later. See also F.A.Q. below.
-
-Examples of usage in LLVM:
-  * clang-format-fuzzer. The inputs are random pieces of C++-like text.
-  * Build (make sure to use fresh clang as the host compiler):
-    cmake -GNinja  -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \
-    -DLLVM_USE_SANITIZER=Address -DLLVM_USE_SANITIZE_COVERAGE=YES \
-    /path/to/llvm -DCMAKE_BUILD_TYPE=Release
-    ninja clang-format-fuzzer
-  * Optionally build other kinds of binaries (asan+Debug, msan, ubsan, etc)
-  * TODO: commit the pre-fuzzed corpus to svn (?).
-  * Run:
-      clang-format-fuzzer CORPUS_DIR
-
-Toy example (see SimpleTest.cpp):
-a simple function that does something interesting if it receives bytes "Hi!".
-  # Build the Fuzzer with asan:
-  % clang++ -std=c++11 -fsanitize=address -fsanitize-coverage=3 -O1 -g \
-     Fuzzer*.cpp test/SimpleTest.cpp
-  # Run the fuzzer with no corpus (assuming on empty input)
-  % ./a.out
-
-===============================================================================
-F.A.Q.
-
-Q. Why Fuzzer does not use any of the LLVM support?
-A. There are two reasons.
-First, we want this library to be used outside of the LLVM w/o users having to
-build the rest of LLVM. This may sound unconvincing for many LLVM folks,
-but in practice the need for building the whole LLVM frightens many potential
-users -- and we want more users to use this code.
-Second, there is a subtle technical reason not to rely on the rest of LLVM, or
-any other large body of code (maybe not even STL). When coverage instrumentation
-is enabled, it will also instrument the LLVM support code which will blow up the
-coverage set of the process (since the fuzzer is in-process). In other words, by
-using more external dependencies we will slow down the fuzzer while the main
-reason for it to exist is extreme speed.
-
-Q. What about Windows then? The Fuzzer contains code that does not build on
-Windows.
-A. The sanitizer coverage support does not work on Windows either as of 01/2015.
-Once it's there, we'll need to re-implement OS-specific parts (I/O, signals).
-
-Q. When this Fuzzer is not a good solution for a problem?
-A.
-  * If the test inputs are validated by the target library and the validator
-    asserts/crashes on invalid inputs, the in-process fuzzer is not applicable
-    (we could use fork() w/o exec, but it comes with extra overhead).
-  * Bugs in the target library may accumulate w/o being detected. E.g. a memory
-    corruption that goes undetected at first and then leads to a crash while
-    testing another input. This is why it is highly recommended to run this
-    in-process fuzzer with all sanitizers to detect most bugs on the spot.
-  * It is harder to protect the in-process fuzzer from excessive memory
-    consumption and infinite loops in the target library (still possible).
-  * The target library should not have significant global state that is not
-    reset between the runs.
-  * Many interesting target libs are not designed in a way that supports
-    the in-process fuzzer interface (e.g. require a file path instead of a
-    byte array).
-  * If a single test run takes a considerable fraction of a second (or
-    more) the speed benefit from the in-process fuzzer is negligible.
-  * If the target library runs persistent threads (that outlive
-    execution of one test) the fuzzing results will be unreliable.
-
-Q. So, what exactly this Fuzzer is good for?
-A. This Fuzzer might be a good choice for testing libraries that have relatively
-small inputs, each input takes < 1ms to run, and the library code is not expected
-to crash on invalid inputs.
-Examples: regular expression matchers, text or binary format parsers.
diff --git a/lib/Fuzzer/cxx_fuzzer_tokens.txt b/lib/Fuzzer/cxx_fuzzer_tokens.txt
new file mode 100644
index 0000000..f3c4f80
--- /dev/null
+++ b/lib/Fuzzer/cxx_fuzzer_tokens.txt
@@ -0,0 +1,218 @@
+#
+##
+`
+~
+!
+@
+$
+%
+^
+&
+*
+(
+)
+_
+-
+_
+=
++
+{
+}
+[
+]
+|
+\
+,
+.
+/
+?
+>
+<
+;
+:
+'
+"
+++
+--
+<<
+>>
++=
+-=
+*=
+/=
+>>=
+<<=
+&=
+|=
+^=
+%=
+!=
+&&
+||
+==
+>=
+<=
+->
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+alignas
+alignof
+and
+and_eq
+asm
+auto
+bitand
+bitor
+bool
+break
+case
+catch
+char
+char16_t
+char32_t
+class
+compl
+concept
+const
+constexpr
+const_cast
+continue
+decltype
+default
+delete
+do
+double
+dynamic_cast
+else
+enum
+explicit
+export
+extern
+false
+float
+for
+friend
+goto
+if
+inline
+int
+long
+mutable
+namespace
+new
+noexcept
+not
+not_eq
+nullptr
+operator
+or
+or_eq
+private
+protected
+public
+register
+reinterpret_cast
+requires
+return
+short
+signed
+sizeof
+static
+static_assert
+static_cast
+struct
+switch
+template
+this
+thread_local
+throw
+true
+try
+typedef
+typeid
+typename
+union
+unsigned
+using
+virtual
+void
+volatile
+wchar_t
+while
+xor
+xor_eq
+if
+elif
+else
+endif
+defined
+ifdef
+ifndef
+define
+undef
+include
+line
+error
+pragma
+override
+final
diff --git a/lib/Fuzzer/dfsan_fuzzer_abi.list b/lib/Fuzzer/dfsan_fuzzer_abi.list
new file mode 100644
index 0000000..7da7522
--- /dev/null
+++ b/lib/Fuzzer/dfsan_fuzzer_abi.list
@@ -0,0 +1,12 @@
+# Replaces __sanitizer_cov_trace_cmp with __dfsw___sanitizer_cov_trace_cmp
+fun:__sanitizer_cov_trace_cmp=custom
+fun:__sanitizer_cov_trace_cmp=uninstrumented
+
+# Ignores coverage callbacks.
+fun:__sanitizer_cov=uninstrumented
+fun:__sanitizer_cov=discard
+fun:__sanitizer_cov_module_init=uninstrumented
+fun:__sanitizer_cov_module_init=discard
+
+# Don't add extra parameters to the Fuzzer callback.
+fun:TestOneInput=uninstrumented
diff --git a/lib/Fuzzer/test/CMakeLists.txt b/lib/Fuzzer/test/CMakeLists.txt
index 08130c6..fb3bf20 100644
--- a/lib/Fuzzer/test/CMakeLists.txt
+++ b/lib/Fuzzer/test/CMakeLists.txt
@@ -2,10 +2,11 @@
 # basic blocks and we'll fail to discover the targets.
 # Also enable the coverage instrumentation back (it is disabled
 # for the Fuzzer lib)
-set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O0 -fsanitize-coverage=4")
+set(CMAKE_CXX_FLAGS_RELEASE "${LIBFUZZER_FLAGS_BASE} -O0 -fsanitize-coverage=4")
 
 set(Tests
   CounterTest
+  CxxTokensTest
   FourIndependentBranchesTest
   FullCoverageSetTest
   InfiniteTest
@@ -14,11 +15,14 @@ set(Tests
   TimeoutTest
   )
 
+set(DFSanTests
+  DFSanSimpleCmpTest
+  )
+
 set(TestBinaries)
 
 foreach(Test ${Tests})
   add_executable(LLVMFuzzer-${Test}
-    EXCLUDE_FROM_ALL
     ${Test}.cpp
     )
   target_link_libraries(LLVMFuzzer-${Test}
@@ -52,6 +56,13 @@ target_link_libraries(LLVMFuzzer-Unittest
 
 set(TestBinaries ${TestBinaries} LLVMFuzzer-Unittest)
 
+add_subdirectory(dfsan)
+
+foreach(Test ${DFSanTests})
+  set(TestBinaries ${TestBinaries} LLVMFuzzer-${Test})
+endforeach()
+
+
 set_target_properties(${TestBinaries}
   PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
   )
diff --git a/lib/Fuzzer/test/CxxTokensTest.cpp b/lib/Fuzzer/test/CxxTokensTest.cpp
new file mode 100644
index 0000000..1addccb
--- /dev/null
+++ b/lib/Fuzzer/test/CxxTokensTest.cpp
@@ -0,0 +1,24 @@
+// Simple test for a fuzzer. The fuzzer must find a sequence of C++ tokens.
+#include <cstdint>
+#include <cstdlib>
+#include <cstddef>
+#include <cstring>
+#include <iostream>
+
+static void Found() {
+  std::cout << "Found the target, exiting\n";
+  exit(1);
+}
+
+extern "C" void TestOneInput(const uint8_t *Data, size_t Size) {
+  // looking for "thread_local unsigned A;"
+  if (Size < 24) return;
+  if (0 == memcmp(&Data[0], "thread_local", 12))
+    if (Data[12] == ' ')
+      if (0 == memcmp(&Data[13], "unsigned", 8))
+        if (Data[21] == ' ')
+          if (Data[22] == 'A')
+            if (Data[23] == ';')
+              Found();
+}
+
diff --git a/lib/Fuzzer/test/dfsan/CMakeLists.txt b/lib/Fuzzer/test/dfsan/CMakeLists.txt
new file mode 100644
index 0000000..b5b874f
--- /dev/null
+++ b/lib/Fuzzer/test/dfsan/CMakeLists.txt
@@ -0,0 +1,17 @@
+# These tests depend on both coverage and dfsan instrumentation.
+
+set(DFSAN_FUZZER_ABI_LIST "${CMAKE_CURRENT_SOURCE_DIR}/../../dfsan_fuzzer_abi.list")
+
+set(CMAKE_CXX_FLAGS_RELEASE
+  "${LIBFUZZER_FLAGS_BASE} -O0 -fno-sanitize=all -fsanitize=dataflow -mllvm -sanitizer-coverage-experimental-trace-compares=1 -fsanitize-blacklist=${DFSAN_FUZZER_ABI_LIST}")
+
+foreach(Test ${DFSanTests})
+  set_source_files_properties(${Test}.cpp PROPERTIES OBJECT_DEPENDS ${DFSAN_FUZZER_ABI_LIST})
+  add_executable(LLVMFuzzer-${Test}
+    ${Test}.cpp
+    )
+  target_link_libraries(LLVMFuzzer-${Test}
+    LLVMFuzzer
+    )
+endforeach()
+
diff --git a/lib/Fuzzer/test/dfsan/DFSanSimpleCmpTest.cpp b/lib/Fuzzer/test/dfsan/DFSanSimpleCmpTest.cpp
new file mode 100644
index 0000000..1162092
--- /dev/null
+++ b/lib/Fuzzer/test/dfsan/DFSanSimpleCmpTest.cpp
@@ -0,0 +1,30 @@
+// Simple test for a fuzzer. The fuzzer must find several narrow ranges.
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <cstdio>
+
+extern "C" void TestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size < 14) return;
+  uint64_t x = 0;
+  int64_t  y = 0;
+  int z = 0;
+  unsigned short a = 0;
+  memcpy(&x, Data, 8);
+  memcpy(&y, Data + Size - 8, 8);
+  memcpy(&z, Data + Size / 2, sizeof(z));
+  memcpy(&a, Data + Size / 2 + 4, sizeof(a));
+
+  if (x > 1234567890 &&
+      x < 1234567895 &&
+      y >= 987654321 &&
+      y <= 987654325 &&
+      z < -10000 &&
+      z >= -10005 &&
+      z != -10003 &&
+      a == 4242) {
+    fprintf(stderr, "Found the target: size %zd (%zd, %zd, %d, %d), exiting.\n",
+            Size, x, y, z, a);
+    exit(1);
+  }
+}
diff --git a/lib/Fuzzer/test/fuzzer.test b/lib/Fuzzer/test/fuzzer.test
index 45691f5..2a0e95f 100644
--- a/lib/Fuzzer/test/fuzzer.test
+++ b/lib/Fuzzer/test/fuzzer.test
@@ -20,3 +20,9 @@ FourIndependentBranchesTest: BINGO
 
 RUN: not ./LLVMFuzzer-CounterTest -use_counters=1 -max_len=6 -seed=1 -timeout=15 2>&1 | FileCheck %s --check-prefix=CounterTest
 CounterTest: BINGO
+
+RUN: not ./LLVMFuzzer-DFSanSimpleCmpTest -seed=1 -timeout=15 2>&1 | FileCheck %s --check-prefix=DFSanSimpleCmpTest
+DFSanSimpleCmpTest: Found the target:
+
+RUN: not ./LLVMFuzzer-CxxTokensTest -seed=1 -timeout=15 -tokens=%S/../cxx_fuzzer_tokens.txt 2>&1 | FileCheck %s --check-prefix=CxxTokensTest
+CxxTokensTest: Found the target, exiting