aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Fuzzer
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Fuzzer')
-rw-r--r--lib/Fuzzer/CMakeLists.txt8
-rw-r--r--lib/Fuzzer/FuzzerDFSan.cpp275
-rw-r--r--lib/Fuzzer/FuzzerDriver.cpp78
-rw-r--r--lib/Fuzzer/FuzzerFlags.def51
-rw-r--r--lib/Fuzzer/FuzzerIO.cpp6
-rw-r--r--lib/Fuzzer/FuzzerInternal.h30
-rw-r--r--lib/Fuzzer/FuzzerLoop.cpp138
-rw-r--r--lib/Fuzzer/FuzzerUtil.cpp13
-rw-r--r--lib/Fuzzer/README.txt112
-rw-r--r--lib/Fuzzer/cxx_fuzzer_tokens.txt218
-rw-r--r--lib/Fuzzer/dfsan_fuzzer_abi.list12
-rw-r--r--lib/Fuzzer/test/CMakeLists.txt15
-rw-r--r--lib/Fuzzer/test/CxxTokensTest.cpp24
-rw-r--r--lib/Fuzzer/test/dfsan/CMakeLists.txt17
-rw-r--r--lib/Fuzzer/test/dfsan/DFSanSimpleCmpTest.cpp30
-rw-r--r--lib/Fuzzer/test/fuzzer.test6
16 files changed, 832 insertions, 201 deletions
diff --git a/lib/Fuzzer/CMakeLists.txt b/lib/Fuzzer/CMakeLists.txt
index 81e51d1..bfd87ec 100644
--- a/lib/Fuzzer/CMakeLists.txt
+++ b/lib/Fuzzer/CMakeLists.txt
@@ -1,8 +1,10 @@
-# Disable the coverage instrumentation for the fuzzer itself.
-set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O2 -fsanitize-coverage=0")
-if( LLVM_USE_SANITIZE_COVERAGE )
+set(LIBFUZZER_FLAGS_BASE "${CMAKE_CXX_FLAGS_RELEASE}")
+# Disable the coverage and sanitizer instrumentation for the fuzzer itself.
+set(CMAKE_CXX_FLAGS_RELEASE "${LIBFUZZER_FLAGS_BASE} -O2 -fno-sanitize=all")
+if( LLVM_USE_SANITIZE_COVERAGE )
add_library(LLVMFuzzerNoMain OBJECT
FuzzerCrossOver.cpp
+ FuzzerDFSan.cpp
FuzzerDriver.cpp
FuzzerIO.cpp
FuzzerLoop.cpp
diff --git a/lib/Fuzzer/FuzzerDFSan.cpp b/lib/Fuzzer/FuzzerDFSan.cpp
new file mode 100644
index 0000000..16f8c0f
--- /dev/null
+++ b/lib/Fuzzer/FuzzerDFSan.cpp
@@ -0,0 +1,275 @@
+//===- FuzzerDFSan.cpp - DFSan-based fuzzer mutator -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// DataFlowSanitizer (DFSan) is a tool for
+// generalised dynamic data flow (taint) analysis:
+// http://clang.llvm.org/docs/DataFlowSanitizer.html .
+//
+// This file implements a mutation algorithm based on taint
+// analysis feedback from DFSan.
+//
+// The approach has some similarity to "Taint-based Directed Whitebox Fuzzing"
+// by Vijay Ganesh & Tim Leek & Martin Rinard:
+// http://dspace.mit.edu/openaccess-disseminate/1721.1/59320,
+// but it uses a full blown LLVM IR taint analysis and separate instrumentation
+// to analyze all of the "attack points" at once.
+//
+// Workflow:
+// * lib/Fuzzer/Fuzzer*.cpp is compiled w/o any instrumentation.
+// * The code under test is compiled with DFSan *and* with special extra hooks
+// that are inserted before dfsan. Currently supported hooks:
+// - __sanitizer_cov_trace_cmp: inserted before every ICMP instruction,
+// receives the type, size and arguments of ICMP.
+// * Every call to HOOK(a,b) is replaced by DFSan with
+// __dfsw_HOOK(a, b, label(a), label(b)) so that __dfsw_HOOK
+// gets all the taint labels for the arguments.
+// * At the Fuzzer startup we assign a unique DFSan label
+// to every byte of the input string (Fuzzer::CurrentUnit) so that for any
+// chunk of data we know which input bytes it has derived from.
+// * The __dfsw_* functions (implemented in this file) record the
+// parameters (i.e. the application data and the corresponding taint labels)
+// in a global state.
+// * Fuzzer::MutateWithDFSan() tries to use the data recorded by __dfsw_*
+// hooks to guide the fuzzing towards new application states.
+// For example if 4 bytes of data that derive from input bytes {4,5,6,7}
+// are compared with a constant 12345 and the comparison always yields
+// the same result, we try to insert 12345, 12344, 12346 into bytes
+// {4,5,6,7} of the next fuzzed inputs.
+//
+// This code does not function when DFSan is not linked in.
+// Instead of using ifdefs and thus requiring a separate build of lib/Fuzzer
+// we redeclare the dfsan_* interface functions as weak and check if they
+// are nullptr before calling.
+// If this approach proves to be useful we may add attribute(weak) to the
+// dfsan declarations in dfsan_interface.h
+//
+// This module is in the "proof of concept" stage.
+// It is capable of solving only the simplest puzzles
+// like test/dfsan/DFSanSimpleCmpTest.cpp.
+//===----------------------------------------------------------------------===//
+
+/* Example of manual usage:
+(
+ cd $LLVM/lib/Fuzzer/
+ clang -fPIC -c -g -O2 -std=c++11 Fuzzer*.cpp
+ clang++ -O0 -std=c++11 -fsanitize-coverage=3 \
+ -mllvm -sanitizer-coverage-experimental-trace-compares=1 \
+ -fsanitize=dataflow -fsanitize-blacklist=./dfsan_fuzzer_abi.list \
+ test/dfsan/DFSanSimpleCmpTest.cpp Fuzzer*.o
+ ./a.out
+)
+*/
+
+#include "FuzzerInternal.h"
+#include <sanitizer/dfsan_interface.h>
+
+#include <cstring>
+#include <iostream>
+#include <unordered_map>
+
+extern "C" {
+__attribute__((weak))
+dfsan_label dfsan_create_label(const char *desc, void *userdata);
+__attribute__((weak))
+void dfsan_set_label(dfsan_label label, void *addr, size_t size);
+__attribute__((weak))
+void dfsan_add_label(dfsan_label label, void *addr, size_t size);
+__attribute__((weak))
+const struct dfsan_label_info *dfsan_get_label_info(dfsan_label label);
+} // extern "C"
+
+namespace {
+
+// These values are copied from include/llvm/IR/InstrTypes.h.
+// We do not include the LLVM headers here to remain independent.
+// If these values ever change, an assertion in ComputeCmp will fail.
+enum Predicate {
+ ICMP_EQ = 32, ///< equal
+ ICMP_NE = 33, ///< not equal
+ ICMP_UGT = 34, ///< unsigned greater than
+ ICMP_UGE = 35, ///< unsigned greater or equal
+ ICMP_ULT = 36, ///< unsigned less than
+ ICMP_ULE = 37, ///< unsigned less or equal
+ ICMP_SGT = 38, ///< signed greater than
+ ICMP_SGE = 39, ///< signed greater or equal
+ ICMP_SLT = 40, ///< signed less than
+ ICMP_SLE = 41, ///< signed less or equal
+};
+
+template <class U, class S>
+bool ComputeCmp(size_t CmpType, U Arg1, U Arg2) {
+ switch(CmpType) {
+ case ICMP_EQ : return Arg1 == Arg2;
+ case ICMP_NE : return Arg1 != Arg2;
+ case ICMP_UGT: return Arg1 > Arg2;
+ case ICMP_UGE: return Arg1 >= Arg2;
+ case ICMP_ULT: return Arg1 < Arg2;
+ case ICMP_ULE: return Arg1 <= Arg2;
+ case ICMP_SGT: return (S)Arg1 > (S)Arg2;
+ case ICMP_SGE: return (S)Arg1 >= (S)Arg2;
+ case ICMP_SLT: return (S)Arg1 < (S)Arg2;
+ case ICMP_SLE: return (S)Arg1 <= (S)Arg2;
+ default: assert(0 && "unsupported CmpType");
+ }
+ return false;
+}
+
+static bool ComputeCmp(size_t CmpSize, size_t CmpType, uint64_t Arg1,
+ uint64_t Arg2) {
+ if (CmpSize == 8) return ComputeCmp<uint64_t, int64_t>(CmpType, Arg1, Arg2);
+ if (CmpSize == 4) return ComputeCmp<uint32_t, int32_t>(CmpType, Arg1, Arg2);
+ if (CmpSize == 2) return ComputeCmp<uint16_t, int16_t>(CmpType, Arg1, Arg2);
+ if (CmpSize == 1) return ComputeCmp<uint8_t, int8_t>(CmpType, Arg1, Arg2);
+ assert(0 && "unsupported type size");
+ return true;
+}
+
+// As a simplification we use the range of input bytes instead of a set of input
+// bytes.
+struct LabelRange {
+ uint16_t Beg, End; // Range is [Beg, End), thus Beg==End is an empty range.
+
+ LabelRange(uint16_t Beg = 0, uint16_t End = 0) : Beg(Beg), End(End) {}
+
+ static LabelRange Join(LabelRange LR1, LabelRange LR2) {
+ if (LR1.Beg == LR1.End) return LR2;
+ if (LR2.Beg == LR2.End) return LR1;
+ return {std::min(LR1.Beg, LR2.Beg), std::max(LR1.End, LR2.End)};
+ }
+ LabelRange &Join(LabelRange LR) {
+ return *this = Join(*this, LR);
+ }
+ static LabelRange Singleton(const dfsan_label_info *LI) {
+ uint16_t Idx = (uint16_t)(uintptr_t)LI->userdata;
+ assert(Idx > 0);
+ return {(uint16_t)(Idx - 1), Idx};
+ }
+};
+
+std::ostream &operator<<(std::ostream &os, const LabelRange &LR) {
+ return os << "[" << LR.Beg << "," << LR.End << ")";
+}
+
+class DFSanState {
+ public:
+ DFSanState(const fuzzer::Fuzzer::FuzzingOptions &Options)
+ : Options(Options) {}
+
+ struct CmpSiteInfo {
+ size_t ResCounters[2] = {0, 0};
+ size_t CmpSize = 0;
+ LabelRange LR;
+ std::unordered_map<uint64_t, size_t> CountedConstants;
+ };
+
+ LabelRange GetLabelRange(dfsan_label L);
+ void DFSanCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
+ uint64_t Arg1, uint64_t Arg2, dfsan_label L1,
+ dfsan_label L2);
+ bool Mutate(fuzzer::Unit *U);
+
+ private:
+ std::unordered_map<uintptr_t, CmpSiteInfo> PcToCmpSiteInfoMap;
+ LabelRange LabelRanges[1 << (sizeof(dfsan_label) * 8)] = {};
+ const fuzzer::Fuzzer::FuzzingOptions &Options;
+};
+
+LabelRange DFSanState::GetLabelRange(dfsan_label L) {
+ LabelRange &LR = LabelRanges[L];
+ if (LR.Beg < LR.End || L == 0)
+ return LR;
+ const dfsan_label_info *LI = dfsan_get_label_info(L);
+ if (LI->l1 || LI->l2)
+ return LR = LabelRange::Join(GetLabelRange(LI->l1), GetLabelRange(LI->l2));
+ return LR = LabelRange::Singleton(LI);
+}
+
+void DFSanState::DFSanCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
+ uint64_t Arg1, uint64_t Arg2, dfsan_label L1,
+ dfsan_label L2) {
+ if (L1 == 0 && L2 == 0)
+ return; // Not actionable.
+ if (L1 != 0 && L2 != 0)
+ return; // Probably still actionable.
+ bool Res = ComputeCmp(CmpSize, CmpType, Arg1, Arg2);
+ CmpSiteInfo &CSI = PcToCmpSiteInfoMap[PC];
+ CSI.CmpSize = CmpSize;
+ CSI.LR.Join(GetLabelRange(L1)).Join(GetLabelRange(L2));
+ if (!L1) CSI.CountedConstants[Arg1]++;
+ if (!L2) CSI.CountedConstants[Arg2]++;
+ size_t Counter = CSI.ResCounters[Res]++;
+
+ if (Options.Verbosity >= 2 &&
+ (Counter & (Counter - 1)) == 0 &&
+ CSI.ResCounters[!Res] == 0)
+ std::cerr << "DFSAN:"
+ << " PC " << std::hex << PC << std::dec
+ << " S " << CmpSize
+ << " T " << CmpType
+ << " A1 " << Arg1 << " A2 " << Arg2 << " R " << Res
+ << " L" << L1 << GetLabelRange(L1)
+ << " L" << L2 << GetLabelRange(L2)
+ << " LR " << CSI.LR
+ << "\n";
+}
+
+bool DFSanState::Mutate(fuzzer::Unit *U) {
+ for (auto &PCToCmp : PcToCmpSiteInfoMap) {
+ auto &CSI = PCToCmp.second;
+ if (CSI.ResCounters[0] * CSI.ResCounters[1] != 0) continue;
+ if (CSI.ResCounters[0] + CSI.ResCounters[1] < 1000) continue;
+ if (CSI.CountedConstants.size() != 1) continue;
+ uintptr_t C = CSI.CountedConstants.begin()->first;
+ if (U->size() >= CSI.CmpSize) {
+ size_t RangeSize = CSI.LR.End - CSI.LR.Beg;
+ size_t Idx = CSI.LR.Beg + rand() % RangeSize;
+ if (Idx + CSI.CmpSize > U->size()) continue;
+ C += rand() % 5 - 2;
+ memcpy(U->data() + Idx, &C, CSI.CmpSize);
+ return true;
+ }
+ }
+ return false;
+}
+
+static DFSanState *DFSan;
+
+} // namespace
+
+namespace fuzzer {
+
+bool Fuzzer::MutateWithDFSan(Unit *U) {
+ if (!&dfsan_create_label || !DFSan) return false;
+ return DFSan->Mutate(U);
+}
+
+void Fuzzer::InitializeDFSan() {
+ if (!&dfsan_create_label || !Options.UseDFSan) return;
+ DFSan = new DFSanState(Options);
+ CurrentUnit.resize(Options.MaxLen);
+ for (size_t i = 0; i < static_cast<size_t>(Options.MaxLen); i++) {
+ dfsan_label L = dfsan_create_label("input", (void*)(i + 1));
+ // We assume that no one else has called dfsan_create_label before.
+ assert(L == i + 1);
+ dfsan_set_label(L, &CurrentUnit[i], 1);
+ }
+}
+
+} // namespace fuzzer
+
+extern "C" {
+void __dfsw___sanitizer_cov_trace_cmp(uint64_t SizeAndType, uint64_t Arg1,
+ uint64_t Arg2, dfsan_label L0,
+ dfsan_label L1, dfsan_label L2) {
+ assert(L0 == 0);
+ uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+ uint64_t CmpSize = (SizeAndType >> 32) / 8;
+ uint64_t Type = (SizeAndType << 32) >> 32;
+ DFSan->DFSanCmpCallback(PC, CmpSize, Type, Arg1, Arg2, L1, L2);
+}
+} // extern "C"
diff --git a/lib/Fuzzer/FuzzerDriver.cpp b/lib/Fuzzer/FuzzerDriver.cpp
index 9ccd744..05a699e 100644
--- a/lib/Fuzzer/FuzzerDriver.cpp
+++ b/lib/Fuzzer/FuzzerDriver.cpp
@@ -18,6 +18,10 @@
#include <thread>
#include <atomic>
#include <mutex>
+#include <string>
+#include <sstream>
+#include <algorithm>
+#include <iterator>
namespace fuzzer {
@@ -26,19 +30,26 @@ struct FlagDescription {
const char *Name;
const char *Description;
int Default;
- int *Flag;
+ int *IntFlag;
+ const char **StrFlag;
};
struct {
-#define FUZZER_FLAG(Type, Name, Default, Description) Type Name;
+#define FUZZER_FLAG_INT(Name, Default, Description) int Name;
+#define FUZZER_FLAG_STRING(Name, Description) const char *Name;
#include "FuzzerFlags.def"
-#undef FUZZER_FLAG
+#undef FUZZER_FLAG_INT
+#undef FUZZER_FLAG_STRING
} Flags;
static FlagDescription FlagDescriptions [] {
-#define FUZZER_FLAG(Type, Name, Default, Description) {#Name, Description, Default, &Flags.Name},
+#define FUZZER_FLAG_INT(Name, Default, Description) \
+ { #Name, Description, Default, &Flags.Name, nullptr},
+#define FUZZER_FLAG_STRING(Name, Description) \
+ { #Name, Description, 0, nullptr, &Flags.Name },
#include "FuzzerFlags.def"
-#undef FUZZER_FLAG
+#undef FUZZER_FLAG_INT
+#undef FUZZER_FLAG_STRING
};
static const size_t kNumFlags =
@@ -79,11 +90,18 @@ static bool ParseOneFlag(const char *Param) {
const char *Name = FlagDescriptions[F].Name;
const char *Str = FlagValue(Param, Name);
if (Str) {
- int Val = std::stol(Str);
- *FlagDescriptions[F].Flag = Val;
- if (Flags.verbosity >= 2)
- std::cerr << "Flag: " << Name << " " << Val << "\n";
- return true;
+ if (FlagDescriptions[F].IntFlag) {
+ int Val = std::stol(Str);
+ *FlagDescriptions[F].IntFlag = Val;
+ if (Flags.verbosity >= 2)
+ std::cerr << "Flag: " << Name << " " << Val << "\n";
+ return true;
+ } else if (FlagDescriptions[F].StrFlag) {
+ *FlagDescriptions[F].StrFlag = Str;
+ if (Flags.verbosity >= 2)
+ std::cerr << "Flag: " << Name << " " << Str << "\n";
+ return true;
+ }
}
}
PrintHelp();
@@ -92,8 +110,12 @@ static bool ParseOneFlag(const char *Param) {
// We don't use any library to minimize dependencies.
static void ParseFlags(int argc, char **argv) {
- for (size_t F = 0; F < kNumFlags; F++)
- *FlagDescriptions[F].Flag = FlagDescriptions[F].Default;
+ for (size_t F = 0; F < kNumFlags; F++) {
+ if (FlagDescriptions[F].IntFlag)
+ *FlagDescriptions[F].IntFlag = FlagDescriptions[F].Default;
+ if (FlagDescriptions[F].StrFlag)
+ *FlagDescriptions[F].StrFlag = nullptr;
+ }
for (int A = 1; A < argc; A++) {
if (ParseOneFlag(argv[A])) continue;
inputs.push_back(argv[A]);
@@ -139,6 +161,26 @@ static int RunInMultipleProcesses(int argc, char **argv, int NumWorkers,
return HasErrors ? 1 : 0;
}
+std::vector<std::string> ReadTokensFile(const char *TokensFilePath) {
+ if (!TokensFilePath) return {};
+ std::string TokensFileContents = FileToString(TokensFilePath);
+ std::istringstream ISS(TokensFileContents);
+ std::vector<std::string> Res = {std::istream_iterator<std::string>{ISS},
+ std::istream_iterator<std::string>{}};
+ Res.push_back(" ");
+ Res.push_back("\t");
+ Res.push_back("\n");
+ return Res;
+}
+
+int ApplyTokens(const Fuzzer &F, const char *InputFilePath) {
+ Unit U = FileToVector(InputFilePath);
+ auto T = F.SubstituteTokens(U);
+ T.push_back(0);
+ std::cout << T.data();
+ return 0;
+}
+
int FuzzerDriver(int argc, char **argv, UserCallback Callback) {
using namespace fuzzer;
@@ -161,8 +203,10 @@ int FuzzerDriver(int argc, char **argv, UserCallback Callback) {
Options.UseCounters = Flags.use_counters;
Options.UseFullCoverageSet = Flags.use_full_coverage_set;
Options.UseCoveragePairs = Flags.use_coverage_pairs;
+ Options.UseDFSan = Flags.dfsan;
Options.PreferSmallDuringInitialShuffle =
Flags.prefer_small_during_initial_shuffle;
+ Options.Tokens = ReadTokensFile(Flags.tokens);
if (Flags.runs >= 0)
Options.MaxNumberOfRuns = Flags.runs;
if (!inputs.empty())
@@ -181,6 +225,16 @@ int FuzzerDriver(int argc, char **argv, UserCallback Callback) {
if (Flags.timeout > 0)
SetTimer(Flags.timeout);
+ if (Flags.verbosity >= 2) {
+ std::cerr << "Tokens: {";
+ for (auto &T : Options.Tokens)
+ std::cerr << T << ",";
+ std::cerr << "}\n";
+ }
+
+ if (Flags.apply_tokens)
+ return ApplyTokens(F, Flags.apply_tokens);
+
for (auto &inp : inputs)
F.ReadDir(inp);
diff --git a/lib/Fuzzer/FuzzerFlags.def b/lib/Fuzzer/FuzzerFlags.def
index 08176af..dbaf75d 100644
--- a/lib/Fuzzer/FuzzerFlags.def
+++ b/lib/Fuzzer/FuzzerFlags.def
@@ -6,41 +6,48 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-// Flags. FUZZER_FLAG macro should be defined at the point of inclusion.
-// We are not using any flag parsing library for better portability and
-// independence.
+// Flags. FUZZER_FLAG_INT/FUZZER_FLAG_STRING macros should be defined at the
+// point of inclusion. We are not using any flag parsing library for better
+// portability and independence.
//===----------------------------------------------------------------------===//
-FUZZER_FLAG(int, verbosity, 1, "Verbosity level.")
-FUZZER_FLAG(int, seed, 0, "Random seed. If 0, seed is generated.")
-FUZZER_FLAG(int, iterations, -1,
+FUZZER_FLAG_INT(verbosity, 1, "Verbosity level.")
+FUZZER_FLAG_INT(seed, 0, "Random seed. If 0, seed is generated.")
+FUZZER_FLAG_INT(iterations, -1,
"Number of iterations of the fuzzer internal loop"
" (-1 for infinite iterations).")
-FUZZER_FLAG(int, runs, -1,
+FUZZER_FLAG_INT(runs, -1,
"Number of individual test runs (-1 for infinite runs).")
-FUZZER_FLAG(int, max_len, 64, "Maximal length of the test input.")
-FUZZER_FLAG(int, cross_over, 1, "If 1, cross over inputs.")
-FUZZER_FLAG(int, mutate_depth, 5,
+FUZZER_FLAG_INT(max_len, 64, "Maximal length of the test input.")
+FUZZER_FLAG_INT(cross_over, 1, "If 1, cross over inputs.")
+FUZZER_FLAG_INT(mutate_depth, 5,
"Apply this number of consecutive mutations to each input.")
-FUZZER_FLAG(
- int, prefer_small_during_initial_shuffle, -1,
+FUZZER_FLAG_INT(
+ prefer_small_during_initial_shuffle, -1,
"If 1, always prefer smaller inputs during the initial corpus shuffle."
" If 0, never do that. If -1, do it sometimes.")
-FUZZER_FLAG(int, exit_on_first, 0,
+FUZZER_FLAG_INT(exit_on_first, 0,
"If 1, exit after the first new interesting input is found.")
-FUZZER_FLAG(int, timeout, -1, "Timeout in seconds (if positive).")
-FUZZER_FLAG(int, help, 0, "Print help.")
-FUZZER_FLAG(
- int, save_minimized_corpus, 0,
+FUZZER_FLAG_INT(timeout, -1, "Timeout in seconds (if positive).")
+FUZZER_FLAG_INT(help, 0, "Print help.")
+FUZZER_FLAG_INT(
+ save_minimized_corpus, 0,
"If 1, the minimized corpus is saved into the first input directory")
-FUZZER_FLAG(int, use_counters, 0, "Use coverage counters")
-FUZZER_FLAG(int, use_full_coverage_set, 0,
+FUZZER_FLAG_INT(use_counters, 0, "Use coverage counters")
+FUZZER_FLAG_INT(use_full_coverage_set, 0,
"Experimental: Maximize the number of different full"
" coverage sets as opposed to maximizing the total coverage."
" This is potentially MUCH slower, but may discover more paths.")
-FUZZER_FLAG(int, use_coverage_pairs, 0,
+FUZZER_FLAG_INT(use_coverage_pairs, 0,
"Experimental: Maximize the number of different coverage pairs.")
-FUZZER_FLAG(int, jobs, 0, "Number of jobs to run. If jobs >= 1 we spawn"
+FUZZER_FLAG_INT(jobs, 0, "Number of jobs to run. If jobs >= 1 we spawn"
" this number of jobs in separate worker processes"
" with stdout/stderr redirected to fuzz-JOB.log.")
-FUZZER_FLAG(int, workers, 0,
+FUZZER_FLAG_INT(workers, 0,
"Number of simultaneous worker processes to run the jobs.")
+FUZZER_FLAG_INT(dfsan, 1, "Use DFSan for taint-guided mutations. No-op unless "
+ "the DFSan instrumentation was compiled in.")
+
+FUZZER_FLAG_STRING(tokens, "Use the file with tokens (one token per line) to"
+ " fuzz a token based input language.")
+FUZZER_FLAG_STRING(apply_tokens, "Read the given input file, substitute bytes "
+ " with tokens and write the result to stdout.")
diff --git a/lib/Fuzzer/FuzzerIO.cpp b/lib/Fuzzer/FuzzerIO.cpp
index 224808c..ef23d42 100644
--- a/lib/Fuzzer/FuzzerIO.cpp
+++ b/lib/Fuzzer/FuzzerIO.cpp
@@ -33,6 +33,12 @@ Unit FileToVector(const std::string &Path) {
std::istreambuf_iterator<char>());
}
+std::string FileToString(const std::string &Path) {
+ std::ifstream T(Path);
+ return std::string((std::istreambuf_iterator<char>(T)),
+ std::istreambuf_iterator<char>());
+}
+
void CopyFileToErr(const std::string &Path) {
std::ifstream T(Path);
std::copy(std::istreambuf_iterator<char>(T), std::istreambuf_iterator<char>(),
diff --git a/lib/Fuzzer/FuzzerInternal.h b/lib/Fuzzer/FuzzerInternal.h
index e4e5eb7..7787109 100644
--- a/lib/Fuzzer/FuzzerInternal.h
+++ b/lib/Fuzzer/FuzzerInternal.h
@@ -23,7 +23,8 @@ namespace fuzzer {
typedef std::vector<uint8_t> Unit;
using namespace std::chrono;
-Unit ReadFile(const char *Path);
+std::string FileToString(const std::string &Path);
+Unit FileToVector(const std::string &Path);
void ReadDirToVectorOfUnits(const char *Path, std::vector<Unit> *V);
void WriteToFile(const Unit &U, const std::string &Path);
void CopyFileToErr(const std::string &Path);
@@ -51,17 +52,17 @@ class Fuzzer {
bool UseCounters = false;
bool UseFullCoverageSet = false;
bool UseCoveragePairs = false;
+ bool UseDFSan = false;
int PreferSmallDuringInitialShuffle = -1;
size_t MaxNumberOfRuns = ULONG_MAX;
std::string OutputCorpus;
+ std::vector<std::string> Tokens;
};
- Fuzzer(UserCallback Callback, FuzzingOptions Options)
- : Callback(Callback), Options(Options) {
- SetDeathCallback();
- }
+ Fuzzer(UserCallback Callback, FuzzingOptions Options);
void AddToCorpus(const Unit &U) { Corpus.push_back(U); }
size_t Loop(size_t NumIterations);
void ShuffleAndMinimize();
+ void InitializeDFSan();
size_t CorpusSize() const { return Corpus.size(); }
void ReadDir(const std::string &Path) {
ReadDirToVectorOfUnits(Path.c_str(), &Corpus);
@@ -76,20 +77,28 @@ class Fuzzer {
size_t getTotalNumberOfRuns() { return TotalNumberOfRuns; }
- static void AlarmCallback();
+ static void StaticAlarmCallback();
+
+ Unit SubstituteTokens(const Unit &U) const;
private:
+ void AlarmCallback();
+ void ExecuteCallback(const Unit &U);
size_t MutateAndTestOne(Unit *U);
size_t RunOne(const Unit &U);
size_t RunOneMaximizeTotalCoverage(const Unit &U);
size_t RunOneMaximizeFullCoverageSet(const Unit &U);
size_t RunOneMaximizeCoveragePairs(const Unit &U);
void WriteToOutputCorpus(const Unit &U);
- static void WriteToCrash(const Unit &U, const char *Prefix);
+ void WriteToCrash(const Unit &U, const char *Prefix);
+ bool MutateWithDFSan(Unit *U);
+ void PrintStats(const char *Where, size_t Cov, const char *End = "\n");
+ void PrintUnitInASCIIOrTokens(const Unit &U, const char *PrintAfter = "");
void SetDeathCallback();
- static void DeathCallback();
- static Unit CurrentUnit;
+ static void StaticDeathCallback();
+ void DeathCallback();
+ Unit CurrentUnit;
size_t TotalNumberOfRuns = 0;
@@ -108,7 +117,8 @@ class Fuzzer {
UserCallback Callback;
FuzzingOptions Options;
system_clock::time_point ProcessStartTime = system_clock::now();
- static system_clock::time_point UnitStartTime;
+ system_clock::time_point UnitStartTime;
+ long TimeOfLongestUnitInSeconds = 0;
};
}; // namespace fuzzer
diff --git a/lib/Fuzzer/FuzzerLoop.cpp b/lib/Fuzzer/FuzzerLoop.cpp
index 563fbf4..9dfe30b 100644
--- a/lib/Fuzzer/FuzzerLoop.cpp
+++ b/lib/Fuzzer/FuzzerLoop.cpp
@@ -16,21 +16,49 @@
namespace fuzzer {
-// static
-Unit Fuzzer::CurrentUnit;
-system_clock::time_point Fuzzer::UnitStartTime;
+// Only one Fuzzer per process.
+static Fuzzer *F;
+
+Fuzzer::Fuzzer(UserCallback Callback, FuzzingOptions Options)
+ : Callback(Callback), Options(Options) {
+ SetDeathCallback();
+ InitializeDFSan();
+ assert(!F);
+ F = this;
+}
void Fuzzer::SetDeathCallback() {
- __sanitizer_set_death_callback(DeathCallback);
+ __sanitizer_set_death_callback(StaticDeathCallback);
+}
+
+void Fuzzer::PrintUnitInASCIIOrTokens(const Unit &U, const char *PrintAfter) {
+ if (Options.Tokens.empty()) {
+ PrintASCII(U, PrintAfter);
+ } else {
+ auto T = SubstituteTokens(U);
+ T.push_back(0);
+ std::cerr << T.data();
+ std::cerr << PrintAfter;
+ }
+}
+
+void Fuzzer::StaticDeathCallback() {
+ assert(F);
+ F->DeathCallback();
}
void Fuzzer::DeathCallback() {
std::cerr << "DEATH: " << std::endl;
Print(CurrentUnit, "\n");
- PrintASCII(CurrentUnit, "\n");
+ PrintUnitInASCIIOrTokens(CurrentUnit, "\n");
WriteToCrash(CurrentUnit, "crash-");
}
+void Fuzzer::StaticAlarmCallback() {
+ assert(F);
+ F->AlarmCallback();
+}
+
void Fuzzer::AlarmCallback() {
size_t Seconds =
duration_cast<seconds>(system_clock::now() - UnitStartTime).count();
@@ -38,27 +66,40 @@ void Fuzzer::AlarmCallback() {
<< std::endl;
if (Seconds >= 3) {
Print(CurrentUnit, "\n");
- PrintASCII(CurrentUnit, "\n");
+ PrintUnitInASCIIOrTokens(CurrentUnit, "\n");
WriteToCrash(CurrentUnit, "timeout-");
}
exit(1);
}
+void Fuzzer::PrintStats(const char *Where, size_t Cov, const char *End) {
+ if (!Options.Verbosity) return;
+ size_t Seconds = secondsSinceProcessStartUp();
+ size_t ExecPerSec = (Seconds ? TotalNumberOfRuns / Seconds : 0);
+ std::cerr
+ << "#" << TotalNumberOfRuns
+ << "\t" << Where
+ << " cov " << Cov
+ << " bits " << TotalBits()
+ << " units " << Corpus.size()
+ << " exec/s " << ExecPerSec
+ << End;
+}
+
void Fuzzer::ShuffleAndMinimize() {
+ size_t MaxCov = 0;
bool PreferSmall =
(Options.PreferSmallDuringInitialShuffle == 1 ||
(Options.PreferSmallDuringInitialShuffle == -1 && rand() % 2));
if (Options.Verbosity)
- std::cerr << "Shuffle: Size: " << Corpus.size()
- << " prefer small: " << PreferSmall
- << "\n";
+ std::cerr << "PreferSmall: " << PreferSmall << "\n";
+ PrintStats("READ ", 0);
std::vector<Unit> NewCorpus;
std::random_shuffle(Corpus.begin(), Corpus.end());
if (PreferSmall)
std::stable_sort(
Corpus.begin(), Corpus.end(),
[](const Unit &A, const Unit &B) { return A.size() < B.size(); });
- size_t MaxCov = 0;
Unit &U = CurrentUnit;
for (const auto &C : Corpus) {
for (size_t First = 0; First < 1; First++) {
@@ -77,18 +118,29 @@ void Fuzzer::ShuffleAndMinimize() {
}
}
Corpus = NewCorpus;
- if (Options.Verbosity)
- std::cerr << "Shuffle done: " << Corpus.size() << " IC: " << MaxCov << "\n";
+ PrintStats("INITED", MaxCov);
}
size_t Fuzzer::RunOne(const Unit &U) {
UnitStartTime = system_clock::now();
TotalNumberOfRuns++;
+ size_t Res = 0;
if (Options.UseFullCoverageSet)
- return RunOneMaximizeFullCoverageSet(U);
- if (Options.UseCoveragePairs)
- return RunOneMaximizeCoveragePairs(U);
- return RunOneMaximizeTotalCoverage(U);
+ Res = RunOneMaximizeFullCoverageSet(U);
+ else if (Options.UseCoveragePairs)
+ Res = RunOneMaximizeCoveragePairs(U);
+ else
+ Res = RunOneMaximizeTotalCoverage(U);
+ auto UnitStopTime = system_clock::now();
+ auto TimeOfUnit =
+ duration_cast<seconds>(UnitStopTime - UnitStartTime).count();
+ if (TimeOfUnit > TimeOfLongestUnitInSeconds) {
+ TimeOfLongestUnitInSeconds = TimeOfUnit;
+ std::cerr << "Longest unit: " << TimeOfLongestUnitInSeconds
+ << " s:\n";
+ Print(U, "\n");
+ }
+ return Res;
}
static uintptr_t HashOfArrayOfPCs(uintptr_t *PCs, uintptr_t NumPCs) {
@@ -99,12 +151,35 @@ static uintptr_t HashOfArrayOfPCs(uintptr_t *PCs, uintptr_t NumPCs) {
return Res;
}
+Unit Fuzzer::SubstituteTokens(const Unit &U) const {
+ Unit Res;
+ for (auto Idx : U) {
+ if (Idx < Options.Tokens.size()) {
+ std::string Token = Options.Tokens[Idx];
+ Res.insert(Res.end(), Token.begin(), Token.end());
+ } else {
+ Res.push_back(' ');
+ }
+ }
+ // FIXME: Apply DFSan labels.
+ return Res;
+}
+
+void Fuzzer::ExecuteCallback(const Unit &U) {
+ if (Options.Tokens.empty()) {
+ Callback(U.data(), U.size());
+ } else {
+ auto T = SubstituteTokens(U);
+ Callback(T.data(), T.size());
+ }
+}
+
// Experimental. Does not yet scale.
// Fuly reset the current coverage state, run a single unit,
// collect all coverage pairs and return non-zero if a new pair is observed.
size_t Fuzzer::RunOneMaximizeCoveragePairs(const Unit &U) {
__sanitizer_reset_coverage();
- Callback(U.data(), U.size());
+ ExecuteCallback(U);
uintptr_t *PCs;
uintptr_t NumPCs = __sanitizer_get_coverage_guards(&PCs);
bool HasNewPairs = false;
@@ -129,7 +204,7 @@ size_t Fuzzer::RunOneMaximizeCoveragePairs(const Unit &U) {
// e.g. test/FullCoverageSetTest.cpp. FIXME: make it scale.
size_t Fuzzer::RunOneMaximizeFullCoverageSet(const Unit &U) {
__sanitizer_reset_coverage();
- Callback(U.data(), U.size());
+ ExecuteCallback(U);
uintptr_t *PCs;
uintptr_t NumPCs =__sanitizer_get_coverage_guards(&PCs);
if (FullCoverageSets.insert(HashOfArrayOfPCs(PCs, NumPCs)).second)
@@ -144,21 +219,16 @@ size_t Fuzzer::RunOneMaximizeTotalCoverage(const Unit &U) {
__sanitizer_update_counter_bitset_and_clear_counters(0);
}
size_t OldCoverage = __sanitizer_get_total_unique_coverage();
- Callback(U.data(), U.size());
+ ExecuteCallback(U);
size_t NewCoverage = __sanitizer_get_total_unique_coverage();
size_t NumNewBits = 0;
if (Options.UseCounters)
NumNewBits = __sanitizer_update_counter_bitset_and_clear_counters(
CounterBitmap.data());
- if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1)) && Options.Verbosity) {
- size_t Seconds = secondsSinceProcessStartUp();
- std::cerr
- << "#" << TotalNumberOfRuns
- << "\tcov: " << NewCoverage
- << "\tbits: " << TotalBits()
- << "\texec/s: " << (Seconds ? TotalNumberOfRuns / Seconds : 0) << "\n";
- }
+ if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1)) && Options.Verbosity)
+ PrintStats("pulse ", NewCoverage);
+
if (NewCoverage > OldCoverage || NumNewBits)
return NewCoverage;
return 0;
@@ -192,22 +262,18 @@ size_t Fuzzer::MutateAndTestOne(Unit *U) {
for (int i = 0; i < Options.MutateDepth; i++) {
if (TotalNumberOfRuns >= Options.MaxNumberOfRuns)
return NewUnits;
+ MutateWithDFSan(U);
Mutate(U, Options.MaxLen);
size_t NewCoverage = RunOne(*U);
if (NewCoverage) {
Corpus.push_back(*U);
NewUnits++;
+ PrintStats("NEW ", NewCoverage, "");
if (Options.Verbosity) {
- std::cerr << "#" << TotalNumberOfRuns
- << "\tNEW: " << NewCoverage
- << " B: " << TotalBits()
- << " L: " << U->size()
- << " S: " << Corpus.size()
- << " I: " << i
- << "\t";
+ std::cerr << " L: " << U->size();
if (U->size() < 30) {
- PrintASCII(*U);
- std::cerr << "\t";
+ std::cerr << " ";
+ PrintUnitInASCIIOrTokens(*U, "\t");
Print(*U);
}
std::cerr << "\n";
diff --git a/lib/Fuzzer/FuzzerUtil.cpp b/lib/Fuzzer/FuzzerUtil.cpp
index 679f289..3635f39 100644
--- a/lib/Fuzzer/FuzzerUtil.cpp
+++ b/lib/Fuzzer/FuzzerUtil.cpp
@@ -19,15 +19,18 @@
namespace fuzzer {
void Print(const Unit &v, const char *PrintAfter) {
- std::cerr << v.size() << ": ";
for (auto x : v)
- std::cerr << (unsigned) x << " ";
+ std::cerr << "0x" << std::hex << (unsigned) x << std::dec << ",";
std::cerr << PrintAfter;
}
void PrintASCII(const Unit &U, const char *PrintAfter) {
- for (auto X : U)
- std::cerr << (char)((isascii(X) && X >= ' ') ? X : '?');
+ for (auto X : U) {
+ if (isprint(X))
+ std::cerr << X;
+ else
+ std::cerr << "\\x" << std::hex << (int)(unsigned)X << std::dec;
+ }
std::cerr << PrintAfter;
}
@@ -43,7 +46,7 @@ std::string Hash(const Unit &in) {
}
static void AlarmHandler(int, siginfo_t *, void *) {
- Fuzzer::AlarmCallback();
+ Fuzzer::StaticAlarmCallback();
}
void SetTimer(int Seconds) {
diff --git a/lib/Fuzzer/README.txt b/lib/Fuzzer/README.txt
index e4d6b4f..79f49b5 100644
--- a/lib/Fuzzer/README.txt
+++ b/lib/Fuzzer/README.txt
@@ -1,112 +1,2 @@
-===============================
-Fuzzer -- a library for coverage-guided fuzz testing.
-===============================
+Move to http://llvm.org/docs/LibFuzzer.html
-This library is intended primarily for in-process coverage-guided fuzz testing
-(fuzzing) of other libraries. The typical workflow looks like this:
-
- * Build the Fuzzer library as a static archive (or just a set of .o files).
- Note that the Fuzzer contains the main() function.
- Preferably do *not* use sanitizers while building the Fuzzer.
- * Build the library you are going to test with -fsanitize-coverage=[234]
- and one of the sanitizers. We recommend to build the library in several
- different modes (e.g. asan, msan, lsan, ubsan, etc) and even using different
- optimizations options (e.g. -O0, -O1, -O2) to diversify testing.
- * Build a test driver using the same options as the library.
- The test driver is a C/C++ file containing interesting calls to the library
- inside a single function:
- extern "C" void TestOneInput(const uint8_t *Data, size_t Size);
- * Link the Fuzzer, the library and the driver together into an executable
- using the same sanitizer options as for the library.
- * Collect the initial corpus of inputs for the
- fuzzer (a directory with test inputs, one file per input).
- The better your inputs are the faster you will find something interesting.
- Also try to keep your inputs small, otherwise the Fuzzer will run too slow.
- * Run the fuzzer with the test corpus. As new interesting test cases are
- discovered they will be added to the corpus. If a bug is discovered by
- the sanitizer (asan, etc) it will be reported as usual and the reproducer
- will be written to disk.
- Each Fuzzer process is single-threaded (unless the library starts its own
- threads). You can run the Fuzzer on the same corpus in multiple processes.
- in parallel. For run-time options run the Fuzzer binary with '-help=1'.
-
-
-The Fuzzer is similar in concept to AFL (http://lcamtuf.coredump.cx/afl/),
-but uses in-process Fuzzing, which is more fragile, more restrictive, but
-potentially much faster as it has no overhead for process start-up.
-It uses LLVM's "Sanitizer Coverage" instrumentation to get in-process
-coverage-feedback https://code.google.com/p/address-sanitizer/wiki/AsanCoverage
-
-The code resides in the LLVM repository and is (or will be) used by various
-parts of LLVM, but the Fuzzer itself does not (and should not) depend on any
-part of LLVM and can be used for other projects. Ideally, the Fuzzer's code
-should not have any external dependencies. Right now it uses STL, which may need
-to be fixed later. See also F.A.Q. below.
-
-Examples of usage in LLVM:
- * clang-format-fuzzer. The inputs are random pieces of C++-like text.
- * Build (make sure to use fresh clang as the host compiler):
- cmake -GNinja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \
- -DLLVM_USE_SANITIZER=Address -DLLVM_USE_SANITIZE_COVERAGE=YES \
- /path/to/llvm -DCMAKE_BUILD_TYPE=Release
- ninja clang-format-fuzzer
- * Optionally build other kinds of binaries (asan+Debug, msan, ubsan, etc)
- * TODO: commit the pre-fuzzed corpus to svn (?).
- * Run:
- clang-format-fuzzer CORPUS_DIR
-
-Toy example (see SimpleTest.cpp):
-a simple function that does something interesting if it receives bytes "Hi!".
- # Build the Fuzzer with asan:
- % clang++ -std=c++11 -fsanitize=address -fsanitize-coverage=3 -O1 -g \
- Fuzzer*.cpp test/SimpleTest.cpp
- # Run the fuzzer with no corpus (assuming on empty input)
- % ./a.out
-
-===============================================================================
-F.A.Q.
-
-Q. Why Fuzzer does not use any of the LLVM support?
-A. There are two reasons.
-First, we want this library to be used outside of the LLVM w/o users having to
-build the rest of LLVM. This may sound unconvincing for many LLVM folks,
-but in practice the need for building the whole LLVM frightens many potential
-users -- and we want more users to use this code.
-Second, there is a subtle technical reason not to rely on the rest of LLVM, or
-any other large body of code (maybe not even STL). When coverage instrumentation
-is enabled, it will also instrument the LLVM support code which will blow up the
-coverage set of the process (since the fuzzer is in-process). In other words, by
-using more external dependencies we will slow down the fuzzer while the main
-reason for it to exist is extreme speed.
-
-Q. What about Windows then? The Fuzzer contains code that does not build on
-Windows.
-A. The sanitizer coverage support does not work on Windows either as of 01/2015.
-Once it's there, we'll need to re-implement OS-specific parts (I/O, signals).
-
-Q. When this Fuzzer is not a good solution for a problem?
-A.
- * If the test inputs are validated by the target library and the validator
- asserts/crashes on invalid inputs, the in-process fuzzer is not applicable
- (we could use fork() w/o exec, but it comes with extra overhead).
- * Bugs in the target library may accumulate w/o being detected. E.g. a memory
- corruption that goes undetected at first and then leads to a crash while
- testing another input. This is why it is highly recommended to run this
- in-process fuzzer with all sanitizers to detect most bugs on the spot.
- * It is harder to protect the in-process fuzzer from excessive memory
- consumption and infinite loops in the target library (still possible).
- * The target library should not have significant global state that is not
- reset between the runs.
- * Many interesting target libs are not designed in a way that supports
- the in-process fuzzer interface (e.g. require a file path instead of a
- byte array).
- * If a single test run takes a considerable fraction of a second (or
- more) the speed benefit from the in-process fuzzer is negligible.
- * If the target library runs persistent threads (that outlive
- execution of one test) the fuzzing results will be unreliable.
-
-Q. So, what exactly this Fuzzer is good for?
-A. This Fuzzer might be a good choice for testing libraries that have relatively
-small inputs, each input takes < 1ms to run, and the library code is not expected
-to crash on invalid inputs.
-Examples: regular expression matchers, text or binary format parsers.
diff --git a/lib/Fuzzer/cxx_fuzzer_tokens.txt b/lib/Fuzzer/cxx_fuzzer_tokens.txt
new file mode 100644
index 0000000..f3c4f80
--- /dev/null
+++ b/lib/Fuzzer/cxx_fuzzer_tokens.txt
@@ -0,0 +1,218 @@
+#
+##
+`
+~
+!
+@
+$
+%
+^
+&
+*
+(
+)
+_
+-
+_
+=
++
+{
+}
+[
+]
+|
+\
+,
+.
+/
+?
+>
+<
+;
+:
+'
+"
+++
+--
+<<
+>>
++=
+-=
+*=
+/=
+>>=
+<<=
+&=
+|=
+^=
+%=
+!=
+&&
+||
+==
+>=
+<=
+->
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+alignas
+alignof
+and
+and_eq
+asm
+auto
+bitand
+bitor
+bool
+break
+case
+catch
+char
+char16_t
+char32_t
+class
+compl
+concept
+const
+constexpr
+const_cast
+continue
+decltype
+default
+delete
+do
+double
+dynamic_cast
+else
+enum
+explicit
+export
+extern
+false
+float
+for
+friend
+goto
+if
+inline
+int
+long
+mutable
+namespace
+new
+noexcept
+not
+not_eq
+nullptr
+operator
+or
+or_eq
+private
+protected
+public
+register
+reinterpret_cast
+requires
+return
+short
+signed
+sizeof
+static
+static_assert
+static_cast
+struct
+switch
+template
+this
+thread_local
+throw
+true
+try
+typedef
+typeid
+typename
+union
+unsigned
+using
+virtual
+void
+volatile
+wchar_t
+while
+xor
+xor_eq
+if
+elif
+else
+endif
+defined
+ifdef
+ifndef
+define
+undef
+include
+line
+error
+pragma
+override
+final
diff --git a/lib/Fuzzer/dfsan_fuzzer_abi.list b/lib/Fuzzer/dfsan_fuzzer_abi.list
new file mode 100644
index 0000000..7da7522
--- /dev/null
+++ b/lib/Fuzzer/dfsan_fuzzer_abi.list
@@ -0,0 +1,12 @@
+# Replaces __sanitizer_cov_trace_cmp with __dfsw___sanitizer_cov_trace_cmp
+fun:__sanitizer_cov_trace_cmp=custom
+fun:__sanitizer_cov_trace_cmp=uninstrumented
+
+# Ignores coverage callbacks.
+fun:__sanitizer_cov=uninstrumented
+fun:__sanitizer_cov=discard
+fun:__sanitizer_cov_module_init=uninstrumented
+fun:__sanitizer_cov_module_init=discard
+
+# Don't add extra parameters to the Fuzzer callback.
+fun:TestOneInput=uninstrumented
diff --git a/lib/Fuzzer/test/CMakeLists.txt b/lib/Fuzzer/test/CMakeLists.txt
index 08130c6..fb3bf20 100644
--- a/lib/Fuzzer/test/CMakeLists.txt
+++ b/lib/Fuzzer/test/CMakeLists.txt
@@ -2,10 +2,11 @@
# basic blocks and we'll fail to discover the targets.
# Also enable the coverage instrumentation back (it is disabled
# for the Fuzzer lib)
-set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O0 -fsanitize-coverage=4")
+set(CMAKE_CXX_FLAGS_RELEASE "${LIBFUZZER_FLAGS_BASE} -O0 -fsanitize-coverage=4")
set(Tests
CounterTest
+ CxxTokensTest
FourIndependentBranchesTest
FullCoverageSetTest
InfiniteTest
@@ -14,11 +15,14 @@ set(Tests
TimeoutTest
)
+set(DFSanTests
+ DFSanSimpleCmpTest
+ )
+
set(TestBinaries)
foreach(Test ${Tests})
add_executable(LLVMFuzzer-${Test}
- EXCLUDE_FROM_ALL
${Test}.cpp
)
target_link_libraries(LLVMFuzzer-${Test}
@@ -52,6 +56,13 @@ target_link_libraries(LLVMFuzzer-Unittest
set(TestBinaries ${TestBinaries} LLVMFuzzer-Unittest)
+add_subdirectory(dfsan)
+
+foreach(Test ${DFSanTests})
+ set(TestBinaries ${TestBinaries} LLVMFuzzer-${Test})
+endforeach()
+
+
set_target_properties(${TestBinaries}
PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
)
diff --git a/lib/Fuzzer/test/CxxTokensTest.cpp b/lib/Fuzzer/test/CxxTokensTest.cpp
new file mode 100644
index 0000000..1addccb
--- /dev/null
+++ b/lib/Fuzzer/test/CxxTokensTest.cpp
@@ -0,0 +1,24 @@
+// Simple test for a fuzzer. The fuzzer must find a sequence of C++ tokens.
+#include <cstdint>
+#include <cstdlib>
+#include <cstddef>
+#include <cstring>
+#include <iostream>
+
+static void Found() {
+ std::cout << "Found the target, exiting\n";
+ exit(1);
+}
+
+extern "C" void TestOneInput(const uint8_t *Data, size_t Size) {
+ // looking for "thread_local unsigned A;"
+ if (Size < 24) return;
+ if (0 == memcmp(&Data[0], "thread_local", 12))
+ if (Data[12] == ' ')
+ if (0 == memcmp(&Data[13], "unsigned", 8))
+ if (Data[21] == ' ')
+ if (Data[22] == 'A')
+ if (Data[23] == ';')
+ Found();
+}
+
diff --git a/lib/Fuzzer/test/dfsan/CMakeLists.txt b/lib/Fuzzer/test/dfsan/CMakeLists.txt
new file mode 100644
index 0000000..b5b874f
--- /dev/null
+++ b/lib/Fuzzer/test/dfsan/CMakeLists.txt
@@ -0,0 +1,17 @@
+# These tests depend on both coverage and dfsan instrumentation.
+
+set(DFSAN_FUZZER_ABI_LIST "${CMAKE_CURRENT_SOURCE_DIR}/../../dfsan_fuzzer_abi.list")
+
+set(CMAKE_CXX_FLAGS_RELEASE
+ "${LIBFUZZER_FLAGS_BASE} -O0 -fno-sanitize=all -fsanitize=dataflow -mllvm -sanitizer-coverage-experimental-trace-compares=1 -fsanitize-blacklist=${DFSAN_FUZZER_ABI_LIST}")
+
+foreach(Test ${DFSanTests})
+ set_source_files_properties(${Test}.cpp PROPERTIES OBJECT_DEPENDS ${DFSAN_FUZZER_ABI_LIST})
+ add_executable(LLVMFuzzer-${Test}
+ ${Test}.cpp
+ )
+ target_link_libraries(LLVMFuzzer-${Test}
+ LLVMFuzzer
+ )
+endforeach()
+
diff --git a/lib/Fuzzer/test/dfsan/DFSanSimpleCmpTest.cpp b/lib/Fuzzer/test/dfsan/DFSanSimpleCmpTest.cpp
new file mode 100644
index 0000000..1162092
--- /dev/null
+++ b/lib/Fuzzer/test/dfsan/DFSanSimpleCmpTest.cpp
@@ -0,0 +1,30 @@
+// Simple test for a fuzzer. The fuzzer must find several narrow ranges.
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <cstdio>
+
+extern "C" void TestOneInput(const uint8_t *Data, size_t Size) {
+ if (Size < 14) return;
+ uint64_t x = 0;
+ int64_t y = 0;
+ int z = 0;
+ unsigned short a = 0;
+ memcpy(&x, Data, 8);
+ memcpy(&y, Data + Size - 8, 8);
+ memcpy(&z, Data + Size / 2, sizeof(z));
+ memcpy(&a, Data + Size / 2 + 4, sizeof(a));
+
+ if (x > 1234567890 &&
+ x < 1234567895 &&
+ y >= 987654321 &&
+ y <= 987654325 &&
+ z < -10000 &&
+ z >= -10005 &&
+ z != -10003 &&
+ a == 4242) {
+ fprintf(stderr, "Found the target: size %zd (%zd, %zd, %d, %d), exiting.\n",
+ Size, x, y, z, a);
+ exit(1);
+ }
+}
diff --git a/lib/Fuzzer/test/fuzzer.test b/lib/Fuzzer/test/fuzzer.test
index 45691f5..2a0e95f 100644
--- a/lib/Fuzzer/test/fuzzer.test
+++ b/lib/Fuzzer/test/fuzzer.test
@@ -20,3 +20,9 @@ FourIndependentBranchesTest: BINGO
RUN: not ./LLVMFuzzer-CounterTest -use_counters=1 -max_len=6 -seed=1 -timeout=15 2>&1 | FileCheck %s --check-prefix=CounterTest
CounterTest: BINGO
+
+RUN: not ./LLVMFuzzer-DFSanSimpleCmpTest -seed=1 -timeout=15 2>&1 | FileCheck %s --check-prefix=DFSanSimpleCmpTest
+DFSanSimpleCmpTest: Found the target:
+
+RUN: not ./LLVMFuzzer-CxxTokensTest -seed=1 -timeout=15 -tokens=%S/../cxx_fuzzer_tokens.txt 2>&1 | FileCheck %s --check-prefix=CxxTokensTest
+CxxTokensTest: Found the target, exiting