diff options
author | Shih-wei Liao <sliao@google.com> | 2012-04-24 11:26:46 -0700 |
---|---|---|
committer | Shih-wei Liao <sliao@google.com> | 2012-04-24 11:26:46 -0700 |
commit | cf5a1461acaace0f3e7d11fbbcfbf635b8c8ea9d (patch) | |
tree | 557137810ae9efc96147d672d372e4dabd0a2440 /lib/Support | |
parent | 4c8fab82874a29dcd2b242533af3ebe7f66bfd74 (diff) | |
parent | fc728fbdc2631ce8f343cf9b7292d218fde7419f (diff) | |
download | external_llvm-cf5a1461acaace0f3e7d11fbbcfbf635b8c8ea9d.zip external_llvm-cf5a1461acaace0f3e7d11fbbcfbf635b8c8ea9d.tar.gz external_llvm-cf5a1461acaace0f3e7d11fbbcfbf635b8c8ea9d.tar.bz2 |
Merge with LLVM upstream r155090.
Conflicts:
lib/Support/Unix/PathV2.inc
Change-Id: I7b89833849f6cbcfa958a33a971d0f7754c9cb2c
Diffstat (limited to 'lib/Support')
-rw-r--r-- | lib/Support/CMakeLists.txt | 3 | ||||
-rw-r--r-- | lib/Support/Dwarf.cpp | 2 | ||||
-rw-r--r-- | lib/Support/FoldingSet.cpp | 6 | ||||
-rw-r--r-- | lib/Support/JSONParser.cpp | 302 | ||||
-rw-r--r-- | lib/Support/Locale.cpp | 10 | ||||
-rw-r--r-- | lib/Support/LocaleGeneric.inc | 17 | ||||
-rw-r--r-- | lib/Support/LocaleWindows.inc | 15 | ||||
-rw-r--r-- | lib/Support/LocaleXlocale.inc | 61 | ||||
-rw-r--r-- | lib/Support/MemoryBuffer.cpp | 11 | ||||
-rw-r--r-- | lib/Support/PathV2.cpp | 11 | ||||
-rw-r--r-- | lib/Support/SmallPtrSet.cpp | 3 | ||||
-rw-r--r-- | lib/Support/SourceMgr.cpp | 48 | ||||
-rw-r--r-- | lib/Support/Triple.cpp | 6 | ||||
-rw-r--r-- | lib/Support/Unix/Path.inc | 11 | ||||
-rw-r--r-- | lib/Support/Unix/PathV2.inc | 10 | ||||
-rw-r--r-- | lib/Support/Unix/Process.inc | 8 | ||||
-rw-r--r-- | lib/Support/Unix/Signals.inc | 4 | ||||
-rw-r--r-- | lib/Support/Windows/Process.inc | 32 | ||||
-rw-r--r-- | lib/Support/YAMLParser.cpp | 2117 | ||||
-rw-r--r-- | lib/Support/raw_ostream.cpp | 13 |
20 files changed, 2363 insertions, 327 deletions
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index 0b69238..9103327 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -32,7 +32,7 @@ add_llvm_library(LLVMSupport IntrusiveRefCntPtr.cpp IsInf.cpp IsNAN.cpp - JSONParser.cpp + Locale.cpp LockFileManager.cpp ManagedStatic.cpp MemoryBuffer.cpp @@ -54,6 +54,7 @@ add_llvm_library(LLVMSupport ToolOutputFile.cpp Triple.cpp Twine.cpp + YAMLParser.cpp raw_os_ostream.cpp raw_ostream.cpp regcomp.c diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp index b317e49..5c59a3e 100644 --- a/lib/Support/Dwarf.cpp +++ b/lib/Support/Dwarf.cpp @@ -95,7 +95,7 @@ const char *llvm::dwarf::TagString(unsigned Tag) { return "DW_TAG_GNU_template_parameter_pack"; case DW_TAG_GNU_formal_parameter_pack: return "DW_TAG_GNU_formal_parameter_pack"; - case DW_TAG_APPLE_Property: return "DW_TAG_APPLE_property"; + case DW_TAG_APPLE_property: return "DW_TAG_APPLE_property"; } return 0; } diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp index e029970..c6282c6 100644 --- a/lib/Support/FoldingSet.cpp +++ b/lib/Support/FoldingSet.cpp @@ -265,15 +265,15 @@ void FoldingSetImpl::GrowHashTable() { FoldingSetImpl::Node *FoldingSetImpl::FindNodeOrInsertPos(const FoldingSetNodeID &ID, void *&InsertPos) { - - void **Bucket = GetBucketFor(ID.ComputeHash(), Buckets, NumBuckets); + unsigned IDHash = ID.ComputeHash(); + void **Bucket = GetBucketFor(IDHash, Buckets, NumBuckets); void *Probe = *Bucket; InsertPos = 0; FoldingSetNodeID TempID; while (Node *NodeInBucket = GetNextPtr(Probe)) { - if (NodeEquals(NodeInBucket, ID, TempID)) + if (NodeEquals(NodeInBucket, ID, IDHash, TempID)) return NodeInBucket; TempID.clear(); diff --git a/lib/Support/JSONParser.cpp b/lib/Support/JSONParser.cpp deleted file mode 100644 index 5dfcf29..0000000 --- a/lib/Support/JSONParser.cpp +++ /dev/null @@ -1,302 +0,0 @@ -//===--- JSONParser.cpp - Simple JSON parser ------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a JSON parser. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Support/JSONParser.h" - -#include "llvm/ADT/Twine.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/MemoryBuffer.h" - -using namespace llvm; - -JSONParser::JSONParser(StringRef Input, SourceMgr *SM) - : SM(SM), Failed(false) { - InputBuffer = MemoryBuffer::getMemBuffer(Input, "JSON"); - SM->AddNewSourceBuffer(InputBuffer, SMLoc()); - End = InputBuffer->getBuffer().end(); - Position = InputBuffer->getBuffer().begin(); -} - -JSONValue *JSONParser::parseRoot() { - if (Position != InputBuffer->getBuffer().begin()) - report_fatal_error("Cannot reuse JSONParser."); - if (isWhitespace()) - nextNonWhitespace(); - if (errorIfAtEndOfFile("'[' or '{' at start of JSON text")) - return 0; - switch (*Position) { - case '[': - return new (ValueAllocator.Allocate<JSONArray>(1)) JSONArray(this); - case '{': - return new (ValueAllocator.Allocate<JSONObject>(1)) JSONObject(this); - default: - setExpectedError("'[' or '{' at start of JSON text", *Position); - return 0; - } -} - -bool JSONParser::validate() { - JSONValue *Root = parseRoot(); - if (Root == NULL) { - return false; - } - return skip(*Root); -} - -bool JSONParser::skip(const JSONAtom &Atom) { - switch(Atom.getKind()) { - case JSONAtom::JK_Array: - case JSONAtom::JK_Object: - return skipContainer(*cast<JSONContainer>(&Atom)); - case JSONAtom::JK_String: - return true; - case JSONAtom::JK_KeyValuePair: - return skip(*cast<JSONKeyValuePair>(&Atom)->Value); - } - llvm_unreachable("Impossible enum value."); -} - -// Sets the current error to: -// "expected <Expected>, but found <Found>". -void JSONParser::setExpectedError(StringRef Expected, StringRef Found) { - SM->PrintMessage(SMLoc::getFromPointer(Position), SourceMgr::DK_Error, - "expected " + Expected + ", but found " + Found + ".", ArrayRef<SMRange>()); - Failed = true; -} - -// Sets the current error to: -// "expected <Expected>, but found <Found>". -void JSONParser::setExpectedError(StringRef Expected, char Found) { - setExpectedError(Expected, ("'" + StringRef(&Found, 1) + "'").str()); -} - -// If there is no character available, returns true and sets the current error -// to: "expected <Expected>, but found EOF.". -bool JSONParser::errorIfAtEndOfFile(StringRef Expected) { - if (Position == End) { - setExpectedError(Expected, "EOF"); - return true; - } - return false; -} - -// Sets the current error if the current character is not C to: -// "expected 'C', but got <current character>". -bool JSONParser::errorIfNotAt(char C, StringRef Message) { - if (*Position != C) { - std::string Expected = - ("'" + StringRef(&C, 1) + "' " + Message).str(); - if (Position == End) - setExpectedError(Expected, "EOF"); - else - setExpectedError(Expected, *Position); - return true; - } - return false; -} - -// Forbidding inlining improves performance by roughly 20%. -// FIXME: Remove once llvm optimizes this to the faster version without hints. -LLVM_ATTRIBUTE_NOINLINE static bool -wasEscaped(StringRef::iterator First, StringRef::iterator Position); - -// Returns whether a character at 'Position' was escaped with a leading '\'. -// 'First' specifies the position of the first character in the string. -static bool wasEscaped(StringRef::iterator First, - StringRef::iterator Position) { - assert(Position - 1 >= First); - StringRef::iterator I = Position - 1; - // We calulate the number of consecutive '\'s before the current position - // by iterating backwards through our string. - while (I >= First && *I == '\\') --I; - // (Position - 1 - I) now contains the number of '\'s before the current - // position. If it is odd, the character at 'Positon' was escaped. - return (Position - 1 - I) % 2 == 1; -} - -// Parses a JSONString, assuming that the current position is on a quote. -JSONString *JSONParser::parseString() { - assert(Position != End); - assert(!isWhitespace()); - if (errorIfNotAt('"', "at start of string")) - return 0; - StringRef::iterator First = Position + 1; - - // Benchmarking shows that this loop is the hot path of the application with - // about 2/3rd of the runtime cycles. Since escaped quotes are not the common - // case, and multiple escaped backslashes before escaped quotes are very rare, - // we pessimize this case to achieve a smaller inner loop in the common case. - // We're doing that by having a quick inner loop that just scans for the next - // quote. Once we find the quote we check the last character to see whether - // the quote might have been escaped. If the last character is not a '\', we - // know the quote was not escaped and have thus found the end of the string. - // If the immediately preceding character was a '\', we have to scan backwards - // to see whether the previous character was actually an escaped backslash, or - // an escape character for the quote. If we find that the current quote was - // escaped, we continue parsing for the next quote and repeat. - // This optimization brings around 30% performance improvements. - do { - // Step over the current quote. - ++Position; - // Find the next quote. - while (Position != End && *Position != '"') - ++Position; - if (errorIfAtEndOfFile("'\"' at end of string")) - return 0; - // Repeat until the previous character was not a '\' or was an escaped - // backslash. - } while (*(Position - 1) == '\\' && wasEscaped(First, Position)); - - return new (ValueAllocator.Allocate<JSONString>()) - JSONString(StringRef(First, Position - First)); -} - - -// Advances the position to the next non-whitespace position. -void JSONParser::nextNonWhitespace() { - do { - ++Position; - } while (isWhitespace()); -} - -// Checks if there is a whitespace character at the current position. -bool JSONParser::isWhitespace() { - return *Position == ' ' || *Position == '\t' || - *Position == '\n' || *Position == '\r'; -} - -bool JSONParser::failed() const { - return Failed; -} - -// Parses a JSONValue, assuming that the current position is at the first -// character of the value. -JSONValue *JSONParser::parseValue() { - assert(Position != End); - assert(!isWhitespace()); - switch (*Position) { - case '[': - return new (ValueAllocator.Allocate<JSONArray>(1)) JSONArray(this); - case '{': - return new (ValueAllocator.Allocate<JSONObject>(1)) JSONObject(this); - case '"': - return parseString(); - default: - setExpectedError("'[', '{' or '\"' at start of value", *Position); - return 0; - } -} - -// Parses a JSONKeyValuePair, assuming that the current position is at the first -// character of the key, value pair. -JSONKeyValuePair *JSONParser::parseKeyValuePair() { - assert(Position != End); - assert(!isWhitespace()); - - JSONString *Key = parseString(); - if (Key == 0) - return 0; - - nextNonWhitespace(); - if (errorIfNotAt(':', "between key and value")) - return 0; - - nextNonWhitespace(); - const JSONValue *Value = parseValue(); - if (Value == 0) - return 0; - - return new (ValueAllocator.Allocate<JSONKeyValuePair>(1)) - JSONKeyValuePair(Key, Value); -} - -/// \brief Parses the first element of a JSON array or object, or closes the -/// array. -/// -/// The method assumes that the current position is before the first character -/// of the element, with possible white space in between. When successful, it -/// returns the new position after parsing the element. Otherwise, if there is -/// no next value, it returns a default constructed StringRef::iterator. -StringRef::iterator JSONParser::parseFirstElement(JSONAtom::Kind ContainerKind, - char StartChar, char EndChar, - const JSONAtom *&Element) { - assert(*Position == StartChar); - Element = 0; - nextNonWhitespace(); - if (errorIfAtEndOfFile("value or end of container at start of container")) - return StringRef::iterator(); - - if (*Position == EndChar) - return StringRef::iterator(); - - Element = parseElement(ContainerKind); - if (Element == 0) - return StringRef::iterator(); - - return Position; -} - -/// \brief Parses the next element of a JSON array or object, or closes the -/// array. -/// -/// The method assumes that the current position is before the ',' which -/// separates the next element from the current element. When successful, it -/// returns the new position after parsing the element. Otherwise, if there is -/// no next value, it returns a default constructed StringRef::iterator. -StringRef::iterator JSONParser::parseNextElement(JSONAtom::Kind ContainerKind, - char EndChar, - const JSONAtom *&Element) { - Element = 0; - nextNonWhitespace(); - if (errorIfAtEndOfFile("',' or end of container for next element")) - return 0; - - if (*Position == ',') { - nextNonWhitespace(); - if (errorIfAtEndOfFile("element in container")) - return StringRef::iterator(); - - Element = parseElement(ContainerKind); - if (Element == 0) - return StringRef::iterator(); - - return Position; - } else if (*Position == EndChar) { - return StringRef::iterator(); - } else { - setExpectedError("',' or end of container for next element", *Position); - return StringRef::iterator(); - } -} - -const JSONAtom *JSONParser::parseElement(JSONAtom::Kind ContainerKind) { - switch (ContainerKind) { - case JSONAtom::JK_Array: - return parseValue(); - case JSONAtom::JK_Object: - return parseKeyValuePair(); - default: - llvm_unreachable("Impossible code path"); - } -} - -bool JSONParser::skipContainer(const JSONContainer &Container) { - for (JSONContainer::AtomIterator I = Container.atom_current(), - E = Container.atom_end(); - I != E; ++I) { - assert(*I != 0); - if (!skip(**I)) - return false; - } - return !failed(); -} diff --git a/lib/Support/Locale.cpp b/lib/Support/Locale.cpp new file mode 100644 index 0000000..17b9b6c --- /dev/null +++ b/lib/Support/Locale.cpp @@ -0,0 +1,10 @@ +#include "llvm/Support/Locale.h" +#include "llvm/Config/config.h" + +#ifdef __APPLE__ +#include "LocaleXlocale.inc" +#elif LLVM_ON_WIN32 +#include "LocaleWindows.inc" +#else +#include "LocaleGeneric.inc" +#endif diff --git a/lib/Support/LocaleGeneric.inc b/lib/Support/LocaleGeneric.inc new file mode 100644 index 0000000..278deee --- /dev/null +++ b/lib/Support/LocaleGeneric.inc @@ -0,0 +1,17 @@ +#include <cwctype> + +namespace llvm { +namespace sys { +namespace locale { + +int columnWidth(StringRef s) { + return s.size(); +} + +bool isPrint(int c) { + return iswprint(c); +} + +} +} +} diff --git a/lib/Support/LocaleWindows.inc b/lib/Support/LocaleWindows.inc new file mode 100644 index 0000000..6827ac1 --- /dev/null +++ b/lib/Support/LocaleWindows.inc @@ -0,0 +1,15 @@ +namespace llvm { +namespace sys { +namespace locale { + +int columnWidth(StringRef s) { + return s.size(); +} + +bool isPrint(int c) { + return ' ' <= c && c <= '~'; +} + +} +} +}
\ No newline at end of file diff --git a/lib/Support/LocaleXlocale.inc b/lib/Support/LocaleXlocale.inc new file mode 100644 index 0000000..f595e7c --- /dev/null +++ b/lib/Support/LocaleXlocale.inc @@ -0,0 +1,61 @@ +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/ManagedStatic.h" +#include <cassert> +#include <xlocale.h> + + +namespace { + struct locale_holder { + locale_holder() + : l(newlocale(LC_CTYPE_MASK,"en_US.UTF-8",LC_GLOBAL_LOCALE)) + { + assert(NULL!=l); + } + ~locale_holder() { + freelocale(l); + } + + int mbswidth(llvm::SmallString<16> s) const { + // this implementation assumes no '\0' in s + assert(s.size()==strlen(s.c_str())); + + size_t size = mbstowcs_l(NULL,s.c_str(),0,l); + assert(size!=(size_t)-1); + if (size==0) + return 0; + llvm::SmallVector<wchar_t,200> ws(size); + size = mbstowcs_l(&ws[0],s.c_str(),ws.size(),l); + assert(ws.size()==size); + return wcswidth_l(&ws[0],ws.size(),l); + } + + int isprint(int c) const { + return iswprint_l(c,l); + } + + private: + + locale_t l; + }; + + llvm::ManagedStatic<locale_holder> l; +} + +namespace llvm { +namespace sys { +namespace locale { + +int columnWidth(StringRef s) { + int width = l->mbswidth(s); + assert(width>=0); + return width; +} + +bool isPrint(int c) { + return l->isprint(c); +} + +} +} +} diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp index 911a03f..16e5c7a 100644 --- a/lib/Support/MemoryBuffer.cpp +++ b/lib/Support/MemoryBuffer.cpp @@ -304,6 +304,16 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename, RealMapOffset)) { result.reset(GetNamedBuffer<MemoryBufferMMapFile>( StringRef(Pages + Delta, MapSize), Filename, RequiresNullTerminator)); + + if (RequiresNullTerminator && result->getBufferEnd()[0] != '\0') { + // There could be a racing issue that resulted in the file being larger + // than the FileSize passed by the caller. We already have an assertion + // for this in MemoryBuffer::init() but have a runtime guarantee that + // the buffer will be null-terminated here, so do a copy that adds a + // null-terminator. + result.reset(MemoryBuffer::getMemBufferCopy(result->getBuffer(), + Filename)); + } return error_code::success(); } } @@ -339,6 +349,7 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename, if (NumRead == 0) { assert(0 && "We got inaccurate FileSize value or fstat reported an " "invalid file size."); + *BufPtr = '\0'; // null-terminate at the actual size. break; } BytesLeft -= NumRead; diff --git a/lib/Support/PathV2.cpp b/lib/Support/PathV2.cpp index 786e1a1..e2a69a6 100644 --- a/lib/Support/PathV2.cpp +++ b/lib/Support/PathV2.cpp @@ -654,12 +654,13 @@ error_code create_directories(const Twine &path, bool &existed) { StringRef p = path.toStringRef(path_storage); StringRef parent = path::parent_path(p); - bool parent_exists; + if (!parent.empty()) { + bool parent_exists; + if (error_code ec = fs::exists(parent, parent_exists)) return ec; - if (error_code ec = fs::exists(parent, parent_exists)) return ec; - - if (!parent_exists) - if (error_code ec = create_directories(parent, existed)) return ec; + if (!parent_exists) + if (error_code ec = create_directories(parent, existed)) return ec; + } return create_directory(p, existed); } diff --git a/lib/Support/SmallPtrSet.cpp b/lib/Support/SmallPtrSet.cpp index 68d9c29..3b53e9f 100644 --- a/lib/Support/SmallPtrSet.cpp +++ b/lib/Support/SmallPtrSet.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/Support/MathExtras.h" #include <algorithm> #include <cstdlib> @@ -102,7 +103,7 @@ bool SmallPtrSetImpl::erase_imp(const void * Ptr) { } const void * const *SmallPtrSetImpl::FindBucketFor(const void *Ptr) const { - unsigned Bucket = Hash(Ptr); + unsigned Bucket = DenseMapInfo<void *>::getHashValue(Ptr) & (CurArraySize-1); unsigned ArraySize = CurArraySize; unsigned ProbeAmt = 1; const void *const *Array = CurArray; diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp index bbe36b2..15278c5 100644 --- a/lib/Support/SourceMgr.cpp +++ b/lib/Support/SourceMgr.cpp @@ -193,7 +193,8 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind, } void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, - const Twine &Msg, ArrayRef<SMRange> Ranges) const { + const Twine &Msg, ArrayRef<SMRange> Ranges, + bool ShowColors) const { SMDiagnostic Diagnostic = GetMessage(Loc, Kind, Msg, Ranges); // Report the message with the diagnostic handler if present. @@ -208,7 +209,7 @@ void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, assert(CurBuf != -1 && "Invalid or unspecified location!"); PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS); - Diagnostic.print(0, OS); + Diagnostic.print(0, OS, ShowColors); } //===----------------------------------------------------------------------===// @@ -225,7 +226,14 @@ SMDiagnostic::SMDiagnostic(const SourceMgr &sm, SMLoc L, const std::string &FN, } -void SMDiagnostic::print(const char *ProgName, raw_ostream &S) const { +void SMDiagnostic::print(const char *ProgName, raw_ostream &S, + bool ShowColors) const { + // Display colors only if OS goes to a tty. + ShowColors &= S.is_displayed(); + + if (ShowColors) + S.changeColor(raw_ostream::SAVEDCOLOR, true); + if (ProgName && ProgName[0]) S << ProgName << ": "; @@ -244,13 +252,33 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S) const { } switch (Kind) { - case SourceMgr::DK_Error: S << "error: "; break; - case SourceMgr::DK_Warning: S << "warning: "; break; - case SourceMgr::DK_Note: S << "note: "; break; + case SourceMgr::DK_Error: + if (ShowColors) + S.changeColor(raw_ostream::RED, true); + S << "error: "; + break; + case SourceMgr::DK_Warning: + if (ShowColors) + S.changeColor(raw_ostream::MAGENTA, true); + S << "warning: "; + break; + case SourceMgr::DK_Note: + if (ShowColors) + S.changeColor(raw_ostream::BLACK, true); + S << "note: "; + break; } - + + if (ShowColors) { + S.resetColor(); + S.changeColor(raw_ostream::SAVEDCOLOR, true); + } + S << Message << '\n'; + if (ShowColors) + S.resetColor(); + if (LineNo == -1 || ColumnNo == -1) return; @@ -292,6 +320,9 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S) const { } S << '\n'; + if (ShowColors) + S.changeColor(raw_ostream::GREEN, true); + // Print out the caret line, matching tabs in the source line. for (unsigned i = 0, e = CaretLine.size(), OutCol = 0; i != e; ++i) { if (i >= LineContents.size() || LineContents[i] != '\t') { @@ -306,6 +337,9 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S) const { ++OutCol; } while (OutCol & 7); } + + if (ShowColors) + S.resetColor(); S << '\n'; } diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index d261c53..44a1b38 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -88,6 +88,8 @@ const char *Triple::getVendorTypeName(VendorType Kind) { case Apple: return "apple"; case PC: return "pc"; case SCEI: return "scei"; + case BGP: return "bgp"; + case BGQ: return "bgq"; } llvm_unreachable("Invalid VendorType!"); @@ -116,6 +118,7 @@ const char *Triple::getOSTypeName(OSType Kind) { case Minix: return "minix"; case RTEMS: return "rtems"; case NativeClient: return "nacl"; + case CNK: return "cnk"; } llvm_unreachable("Invalid OSType"); @@ -258,6 +261,8 @@ static Triple::VendorType parseVendor(StringRef VendorName) { .Case("apple", Triple::Apple) .Case("pc", Triple::PC) .Case("scei", Triple::SCEI) + .Case("bgp", Triple::BGP) + .Case("bgq", Triple::BGQ) .Default(Triple::UnknownVendor); } @@ -282,6 +287,7 @@ static Triple::OSType parseOS(StringRef OSName) { .StartsWith("minix", Triple::Minix) .StartsWith("rtems", Triple::RTEMS) .StartsWith("nacl", Triple::NativeClient) + .StartsWith("cnk", Triple::CNK) .Default(Triple::UnknownOS); } diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc index 418dc07..ddc1e0f 100644 --- a/lib/Support/Unix/Path.inc +++ b/lib/Support/Unix/Path.inc @@ -60,6 +60,11 @@ #include <mach-o/dyld.h> #endif +// For GNU Hurd +#if defined(__GNU__) && !defined(MAXPATHLEN) +# define MAXPATHLEN 4096 +#endif + // Put in a hack for Cygwin which falsely reports that the mkdtemp function // is available when it is not. #ifdef __CYGWIN__ @@ -256,7 +261,7 @@ Path::GetCurrentDirectory() { } #if defined(__FreeBSD__) || defined (__NetBSD__) || \ - defined(__OpenBSD__) || defined(__minix) + defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__) static int test_dir(char buf[PATH_MAX], char ret[PATH_MAX], const char *dir, const char *bin) @@ -308,7 +313,7 @@ getprogpath(char ret[PATH_MAX], const char *bin) free(pv); return (NULL); } -#endif // __FreeBSD__ || __NetBSD__ +#endif // __FreeBSD__ || __NetBSD__ || __FreeBSD_kernel__ /// GetMainExecutable - Return the path to the main executable, given the /// value of argv[0] from program startup. @@ -325,7 +330,7 @@ Path Path::GetMainExecutable(const char *argv0, void *MainAddr) { return Path(link_path); } #elif defined(__FreeBSD__) || defined (__NetBSD__) || \ - defined(__OpenBSD__) || defined(__minix) + defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__) char exe_path[PATH_MAX]; if (getprogpath(exe_path, argv0) != NULL) diff --git a/lib/Support/Unix/PathV2.inc b/lib/Support/Unix/PathV2.inc index 7d79947..7d259a3 100644 --- a/lib/Support/Unix/PathV2.inc +++ b/lib/Support/Unix/PathV2.inc @@ -46,6 +46,11 @@ #include <limits.h> #endif +// For GNU Hurd +#if defined(__GNU__) && !defined(PATH_MAX) +# define PATH_MAX 4096 +#endif + extern "C" int truncate (const char*, off_t); using namespace llvm; @@ -98,7 +103,12 @@ namespace sys { namespace fs { error_code current_path(SmallVectorImpl<char> &result) { +#ifdef MAXPATHLEN result.reserve(MAXPATHLEN); +#else +// For GNU Hurd + result.reserve(1024); +#endif while (true) { if (::getcwd(result.data(), result.capacity()) == 0) { diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc index 5cdb11c..f640462 100644 --- a/lib/Support/Unix/Process.inc +++ b/lib/Support/Unix/Process.inc @@ -136,7 +136,7 @@ int Process::GetCurrentGroupId() { return getgid(); } -#ifdef HAVE_MACH_MACH_H +#if defined(HAVE_MACH_MACH_H) && !defined(__GNU__) #include <mach/mach.h> #endif @@ -150,7 +150,7 @@ void Process::PreventCoreFiles() { setrlimit(RLIMIT_CORE, &rlim); #endif -#ifdef HAVE_MACH_MACH_H +#if defined(HAVE_MACH_MACH_H) && !defined(__GNU__) // Disable crash reporting on Mac OS X 10.0-10.4 // get information about the original set of exception ports for the task @@ -290,6 +290,10 @@ const char *Process::OutputBold(bool bg) { return "\033[1m"; } +const char *Process::OutputReverse() { + return "\033[7m"; +} + const char *Process::ResetColor() { return "\033[0m"; } diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc index c3885e1..399f686 100644 --- a/lib/Support/Unix/Signals.inc +++ b/lib/Support/Unix/Signals.inc @@ -275,9 +275,9 @@ void llvm::sys::PrintStackTraceOnErrorSignal() { kern_return_t ret = task_set_exception_ports(self, mask, - NULL, + MACH_PORT_NULL, EXCEPTION_STATE_IDENTITY | MACH_EXCEPTION_CODES, - NULL); + THREAD_STATE_NONE); (void)ret; } #endif diff --git a/lib/Support/Windows/Process.inc b/lib/Support/Windows/Process.inc index 913b073..9a388b4 100644 --- a/lib/Support/Windows/Process.inc +++ b/lib/Support/Windows/Process.inc @@ -215,6 +215,38 @@ const char *Process::OutputColor(char code, bool bold, bool bg) { return 0; } +static WORD GetConsoleTextAttribute(HANDLE hConsoleOutput) { + CONSOLE_SCREEN_BUFFER_INFO info; + GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &info); + return info.wAttributes; +} + +const char *Process::OutputReverse() { + const WORD attributes + = GetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE)); + + const WORD foreground_mask = FOREGROUND_BLUE | FOREGROUND_GREEN | + FOREGROUND_RED | FOREGROUND_INTENSITY; + const WORD background_mask = BACKGROUND_BLUE | BACKGROUND_GREEN | + BACKGROUND_RED | BACKGROUND_INTENSITY; + const WORD color_mask = foreground_mask | background_mask; + + WORD new_attributes = + ((attributes & FOREGROUND_BLUE )?BACKGROUND_BLUE :0) | + ((attributes & FOREGROUND_GREEN )?BACKGROUND_GREEN :0) | + ((attributes & FOREGROUND_RED )?BACKGROUND_RED :0) | + ((attributes & FOREGROUND_INTENSITY)?BACKGROUND_INTENSITY:0) | + ((attributes & BACKGROUND_BLUE )?FOREGROUND_BLUE :0) | + ((attributes & BACKGROUND_GREEN )?FOREGROUND_GREEN :0) | + ((attributes & BACKGROUND_RED )?FOREGROUND_RED :0) | + ((attributes & BACKGROUND_INTENSITY)?FOREGROUND_INTENSITY:0) | + 0; + new_attributes = (attributes & ~color_mask) | (new_attributes & color_mask); + + SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), new_attributes); + return 0; +} + const char *Process::ResetColor() { SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), defaultColors()); return 0; diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp new file mode 100644 index 0000000..330519f --- /dev/null +++ b/lib/Support/YAMLParser.cpp @@ -0,0 +1,2117 @@ +//===--- YAMLParser.cpp - Simple YAML parser ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a YAML parser. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/YAMLParser.h" + +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/ilist_node.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/SourceMgr.h" + +using namespace llvm; +using namespace yaml; + +enum UnicodeEncodingForm { + UEF_UTF32_LE, //< UTF-32 Little Endian + UEF_UTF32_BE, //< UTF-32 Big Endian + UEF_UTF16_LE, //< UTF-16 Little Endian + UEF_UTF16_BE, //< UTF-16 Big Endian + UEF_UTF8, //< UTF-8 or ascii. + UEF_Unknown //< Not a valid Unicode encoding. +}; + +/// EncodingInfo - Holds the encoding type and length of the byte order mark if +/// it exists. Length is in {0, 2, 3, 4}. +typedef std::pair<UnicodeEncodingForm, unsigned> EncodingInfo; + +/// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode +/// encoding form of \a Input. +/// +/// @param Input A string of length 0 or more. +/// @returns An EncodingInfo indicating the Unicode encoding form of the input +/// and how long the byte order mark is if one exists. +static EncodingInfo getUnicodeEncoding(StringRef Input) { + if (Input.size() == 0) + return std::make_pair(UEF_Unknown, 0); + + switch (uint8_t(Input[0])) { + case 0x00: + if (Input.size() >= 4) { + if ( Input[1] == 0 + && uint8_t(Input[2]) == 0xFE + && uint8_t(Input[3]) == 0xFF) + return std::make_pair(UEF_UTF32_BE, 4); + if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0) + return std::make_pair(UEF_UTF32_BE, 0); + } + + if (Input.size() >= 2 && Input[1] != 0) + return std::make_pair(UEF_UTF16_BE, 0); + return std::make_pair(UEF_Unknown, 0); + case 0xFF: + if ( Input.size() >= 4 + && uint8_t(Input[1]) == 0xFE + && Input[2] == 0 + && Input[3] == 0) + return std::make_pair(UEF_UTF32_LE, 4); + + if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFE) + return std::make_pair(UEF_UTF16_LE, 2); + return std::make_pair(UEF_Unknown, 0); + case 0xFE: + if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFF) + return std::make_pair(UEF_UTF16_BE, 2); + return std::make_pair(UEF_Unknown, 0); + case 0xEF: + if ( Input.size() >= 3 + && uint8_t(Input[1]) == 0xBB + && uint8_t(Input[2]) == 0xBF) + return std::make_pair(UEF_UTF8, 3); + return std::make_pair(UEF_Unknown, 0); + } + + // It could still be utf-32 or utf-16. + if (Input.size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0) + return std::make_pair(UEF_UTF32_LE, 0); + + if (Input.size() >= 2 && Input[1] == 0) + return std::make_pair(UEF_UTF16_LE, 0); + + return std::make_pair(UEF_UTF8, 0); +} + +namespace llvm { +namespace yaml { +/// Token - A single YAML token. +struct Token : ilist_node<Token> { + enum TokenKind { + TK_Error, // Uninitialized token. + TK_StreamStart, + TK_StreamEnd, + TK_VersionDirective, + TK_TagDirective, + TK_DocumentStart, + TK_DocumentEnd, + TK_BlockEntry, + TK_BlockEnd, + TK_BlockSequenceStart, + TK_BlockMappingStart, + TK_FlowEntry, + TK_FlowSequenceStart, + TK_FlowSequenceEnd, + TK_FlowMappingStart, + TK_FlowMappingEnd, + TK_Key, + TK_Value, + TK_Scalar, + TK_Alias, + TK_Anchor, + TK_Tag + } Kind; + + /// A string of length 0 or more whose begin() points to the logical location + /// of the token in the input. + StringRef Range; + + Token() : Kind(TK_Error) {} +}; +} +} + +namespace llvm { +template<> +struct ilist_sentinel_traits<Token> { + Token *createSentinel() const { + return &Sentinel; + } + static void destroySentinel(Token*) {} + + Token *provideInitialHead() const { return createSentinel(); } + Token *ensureHead(Token*) const { return createSentinel(); } + static void noteHead(Token*, Token*) {} + +private: + mutable Token Sentinel; +}; + +template<> +struct ilist_node_traits<Token> { + Token *createNode(const Token &V) { + return new (Alloc.Allocate<Token>()) Token(V); + } + static void deleteNode(Token *V) {} + + void addNodeToList(Token *) {} + void removeNodeFromList(Token *) {} + void transferNodesFromList(ilist_node_traits & /*SrcTraits*/, + ilist_iterator<Token> /*first*/, + ilist_iterator<Token> /*last*/) {} + + BumpPtrAllocator Alloc; +}; +} + +typedef ilist<Token> TokenQueueT; + +namespace { +/// @brief This struct is used to track simple keys. +/// +/// Simple keys are handled by creating an entry in SimpleKeys for each Token +/// which could legally be the start of a simple key. When peekNext is called, +/// if the Token To be returned is referenced by a SimpleKey, we continue +/// tokenizing until that potential simple key has either been found to not be +/// a simple key (we moved on to the next line or went further than 1024 chars). +/// Or when we run into a Value, and then insert a Key token (and possibly +/// others) before the SimpleKey's Tok. +struct SimpleKey { + TokenQueueT::iterator Tok; + unsigned Column; + unsigned Line; + unsigned FlowLevel; + bool IsRequired; + + bool operator ==(const SimpleKey &Other) { + return Tok == Other.Tok; + } +}; +} + +/// @brief The Unicode scalar value of a UTF-8 minimal well-formed code unit +/// subsequence and the subsequence's length in code units (uint8_t). +/// A length of 0 represents an error. +typedef std::pair<uint32_t, unsigned> UTF8Decoded; + +static UTF8Decoded decodeUTF8(StringRef Range) { + StringRef::iterator Position= Range.begin(); + StringRef::iterator End = Range.end(); + // 1 byte: [0x00, 0x7f] + // Bit pattern: 0xxxxxxx + if ((*Position & 0x80) == 0) { + return std::make_pair(*Position, 1); + } + // 2 bytes: [0x80, 0x7ff] + // Bit pattern: 110xxxxx 10xxxxxx + if (Position + 1 != End && + ((*Position & 0xE0) == 0xC0) && + ((*(Position + 1) & 0xC0) == 0x80)) { + uint32_t codepoint = ((*Position & 0x1F) << 6) | + (*(Position + 1) & 0x3F); + if (codepoint >= 0x80) + return std::make_pair(codepoint, 2); + } + // 3 bytes: [0x8000, 0xffff] + // Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx + if (Position + 2 != End && + ((*Position & 0xF0) == 0xE0) && + ((*(Position + 1) & 0xC0) == 0x80) && + ((*(Position + 2) & 0xC0) == 0x80)) { + uint32_t codepoint = ((*Position & 0x0F) << 12) | + ((*(Position + 1) & 0x3F) << 6) | + (*(Position + 2) & 0x3F); + // Codepoints between 0xD800 and 0xDFFF are invalid, as + // they are high / low surrogate halves used by UTF-16. + if (codepoint >= 0x800 && + (codepoint < 0xD800 || codepoint > 0xDFFF)) + return std::make_pair(codepoint, 3); + } + // 4 bytes: [0x10000, 0x10FFFF] + // Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + if (Position + 3 != End && + ((*Position & 0xF8) == 0xF0) && + ((*(Position + 1) & 0xC0) == 0x80) && + ((*(Position + 2) & 0xC0) == 0x80) && + ((*(Position + 3) & 0xC0) == 0x80)) { + uint32_t codepoint = ((*Position & 0x07) << 18) | + ((*(Position + 1) & 0x3F) << 12) | + ((*(Position + 2) & 0x3F) << 6) | + (*(Position + 3) & 0x3F); + if (codepoint >= 0x10000 && codepoint <= 0x10FFFF) + return std::make_pair(codepoint, 4); + } + return std::make_pair(0, 0); +} + +namespace llvm { +namespace yaml { +/// @brief Scans YAML tokens from a MemoryBuffer. +class Scanner { +public: + Scanner(const StringRef Input, SourceMgr &SM); + + /// @brief Parse the next token and return it without popping it. + Token &peekNext(); + + /// @brief Parse the next token and pop it from the queue. + Token getNext(); + + void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message, + ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) { + SM.PrintMessage(Loc, Kind, Message, Ranges); + } + + void setError(const Twine &Message, StringRef::iterator Position) { + if (Current >= End) + Current = End - 1; + + // Don't print out more errors after the first one we encounter. The rest + // are just the result of the first, and have no meaning. + if (!Failed) + printError(SMLoc::getFromPointer(Current), SourceMgr::DK_Error, Message); + Failed = true; + } + + void setError(const Twine &Message) { + setError(Message, Current); + } + + /// @brief Returns true if an error occurred while parsing. + bool failed() { + return Failed; + } + +private: + StringRef currentInput() { + return StringRef(Current, End - Current); + } + + /// @brief Decode a UTF-8 minimal well-formed code unit subsequence starting + /// at \a Position. + /// + /// If the UTF-8 code units starting at Position do not form a well-formed + /// code unit subsequence, then the Unicode scalar value is 0, and the length + /// is 0. + UTF8Decoded decodeUTF8(StringRef::iterator Position) { + return ::decodeUTF8(StringRef(Position, End - Position)); + } + + // The following functions are based on the gramar rules in the YAML spec. The + // style of the function names it meant to closely match how they are written + // in the spec. The number within the [] is the number of the grammar rule in + // the spec. + // + // See 4.2 [Production Naming Conventions] for the meaning of the prefixes. + // + // c- + // A production starting and ending with a special character. + // b- + // A production matching a single line break. + // nb- + // A production starting and ending with a non-break character. + // s- + // A production starting and ending with a white space character. + // ns- + // A production starting and ending with a non-space character. + // l- + // A production matching complete line(s). + + /// @brief Skip a single nb-char[27] starting at Position. + /// + /// A nb-char is 0x9 | [0x20-0x7E] | 0x85 | [0xA0-0xD7FF] | [0xE000-0xFEFE] + /// | [0xFF00-0xFFFD] | [0x10000-0x10FFFF] + /// + /// @returns The code unit after the nb-char, or Position if it's not an + /// nb-char. + StringRef::iterator skip_nb_char(StringRef::iterator Position); + + /// @brief Skip a single b-break[28] starting at Position. + /// + /// A b-break is 0xD 0xA | 0xD | 0xA + /// + /// @returns The code unit after the b-break, or Position if it's not a + /// b-break. + StringRef::iterator skip_b_break(StringRef::iterator Position); + + /// @brief Skip a single s-white[33] starting at Position. + /// + /// A s-white is 0x20 | 0x9 + /// + /// @returns The code unit after the s-white, or Position if it's not a + /// s-white. + StringRef::iterator skip_s_white(StringRef::iterator Position); + + /// @brief Skip a single ns-char[34] starting at Position. + /// + /// A ns-char is nb-char - s-white + /// + /// @returns The code unit after the ns-char, or Position if it's not a + /// ns-char. + StringRef::iterator skip_ns_char(StringRef::iterator Position); + + typedef StringRef::iterator (Scanner::*SkipWhileFunc)(StringRef::iterator); + /// @brief Skip minimal well-formed code unit subsequences until Func + /// returns its input. + /// + /// @returns The code unit after the last minimal well-formed code unit + /// subsequence that Func accepted. + StringRef::iterator skip_while( SkipWhileFunc Func + , StringRef::iterator Position); + + /// @brief Scan ns-uri-char[39]s starting at Cur. + /// + /// This updates Cur and Column while scanning. + /// + /// @returns A StringRef starting at Cur which covers the longest contiguous + /// sequence of ns-uri-char. + StringRef scan_ns_uri_char(); + + /// @brief Scan ns-plain-one-line[133] starting at \a Cur. + StringRef scan_ns_plain_one_line(); + + /// @brief Consume a minimal well-formed code unit subsequence starting at + /// \a Cur. Return false if it is not the same Unicode scalar value as + /// \a Expected. This updates \a Column. + bool consume(uint32_t Expected); + + /// @brief Skip \a Distance UTF-8 code units. Updates \a Cur and \a Column. + void skip(uint32_t Distance); + + /// @brief Return true if the minimal well-formed code unit subsequence at + /// Pos is whitespace or a new line + bool isBlankOrBreak(StringRef::iterator Position); + + /// @brief If IsSimpleKeyAllowed, create and push_back a new SimpleKey. + void saveSimpleKeyCandidate( TokenQueueT::iterator Tok + , unsigned AtColumn + , bool IsRequired); + + /// @brief Remove simple keys that can no longer be valid simple keys. + /// + /// Invalid simple keys are not on the current line or are further than 1024 + /// columns back. + void removeStaleSimpleKeyCandidates(); + + /// @brief Remove all simple keys on FlowLevel \a Level. + void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level); + + /// @brief Unroll indentation in \a Indents back to \a Col. Creates BlockEnd + /// tokens if needed. + bool unrollIndent(int ToColumn); + + /// @brief Increase indent to \a Col. Creates \a Kind token at \a InsertPoint + /// if needed. + bool rollIndent( int ToColumn + , Token::TokenKind Kind + , TokenQueueT::iterator InsertPoint); + + /// @brief Skip whitespace and comments until the start of the next token. + void scanToNextToken(); + + /// @brief Must be the first token generated. + bool scanStreamStart(); + + /// @brief Generate tokens needed to close out the stream. + bool scanStreamEnd(); + + /// @brief Scan a %BLAH directive. + bool scanDirective(); + + /// @brief Scan a ... or ---. + bool scanDocumentIndicator(bool IsStart); + + /// @brief Scan a [ or { and generate the proper flow collection start token. + bool scanFlowCollectionStart(bool IsSequence); + + /// @brief Scan a ] or } and generate the proper flow collection end token. + bool scanFlowCollectionEnd(bool IsSequence); + + /// @brief Scan the , that separates entries in a flow collection. + bool scanFlowEntry(); + + /// @brief Scan the - that starts block sequence entries. + bool scanBlockEntry(); + + /// @brief Scan an explicit ? indicating a key. + bool scanKey(); + + /// @brief Scan an explicit : indicating a value. + bool scanValue(); + + /// @brief Scan a quoted scalar. + bool scanFlowScalar(bool IsDoubleQuoted); + + /// @brief Scan an unquoted scalar. + bool scanPlainScalar(); + + /// @brief Scan an Alias or Anchor starting with * or &. + bool scanAliasOrAnchor(bool IsAlias); + + /// @brief Scan a block scalar starting with | or >. + bool scanBlockScalar(bool IsLiteral); + + /// @brief Scan a tag of the form !stuff. + bool scanTag(); + + /// @brief Dispatch to the next scanning function based on \a *Cur. + bool fetchMoreTokens(); + + /// @brief The SourceMgr used for diagnostics and buffer management. + SourceMgr &SM; + + /// @brief The original input. + MemoryBuffer *InputBuffer; + + /// @brief The current position of the scanner. + StringRef::iterator Current; + + /// @brief The end of the input (one past the last character). + StringRef::iterator End; + + /// @brief Current YAML indentation level in spaces. + int Indent; + + /// @brief Current column number in Unicode code points. + unsigned Column; + + /// @brief Current line number. + unsigned Line; + + /// @brief How deep we are in flow style containers. 0 Means at block level. + unsigned FlowLevel; + + /// @brief Are we at the start of the stream? + bool IsStartOfStream; + + /// @brief Can the next token be the start of a simple key? + bool IsSimpleKeyAllowed; + + /// @brief Is the next token required to start a simple key? + bool IsSimpleKeyRequired; + + /// @brief True if an error has occurred. + bool Failed; + + /// @brief Queue of tokens. This is required to queue up tokens while looking + /// for the end of a simple key. And for cases where a single character + /// can produce multiple tokens (e.g. BlockEnd). + TokenQueueT TokenQueue; + + /// @brief Indentation levels. + SmallVector<int, 4> Indents; + + /// @brief Potential simple keys. + SmallVector<SimpleKey, 4> SimpleKeys; +}; + +} // end namespace yaml +} // end namespace llvm + +/// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result. +static void encodeUTF8( uint32_t UnicodeScalarValue + , SmallVectorImpl<char> &Result) { + if (UnicodeScalarValue <= 0x7F) { + Result.push_back(UnicodeScalarValue & 0x7F); + } else if (UnicodeScalarValue <= 0x7FF) { + uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6); + uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F); + Result.push_back(FirstByte); + Result.push_back(SecondByte); + } else if (UnicodeScalarValue <= 0xFFFF) { + uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12); + uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6); + uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F); + Result.push_back(FirstByte); + Result.push_back(SecondByte); + Result.push_back(ThirdByte); + } else if (UnicodeScalarValue <= 0x10FFFF) { + uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18); + uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12); + uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6); + uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F); + Result.push_back(FirstByte); + Result.push_back(SecondByte); + Result.push_back(ThirdByte); + Result.push_back(FourthByte); + } +} + +bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) { + SourceMgr SM; + Scanner scanner(Input, SM); + while (true) { + Token T = scanner.getNext(); + switch (T.Kind) { + case Token::TK_StreamStart: + OS << "Stream-Start: "; + break; + case Token::TK_StreamEnd: + OS << "Stream-End: "; + break; + case Token::TK_VersionDirective: + OS << "Version-Directive: "; + break; + case Token::TK_TagDirective: + OS << "Tag-Directive: "; + break; + case Token::TK_DocumentStart: + OS << "Document-Start: "; + break; + case Token::TK_DocumentEnd: + OS << "Document-End: "; + break; + case Token::TK_BlockEntry: + OS << "Block-Entry: "; + break; + case Token::TK_BlockEnd: + OS << "Block-End: "; + break; + case Token::TK_BlockSequenceStart: + OS << "Block-Sequence-Start: "; + break; + case Token::TK_BlockMappingStart: + OS << "Block-Mapping-Start: "; + break; + case Token::TK_FlowEntry: + OS << "Flow-Entry: "; + break; + case Token::TK_FlowSequenceStart: + OS << "Flow-Sequence-Start: "; + break; + case Token::TK_FlowSequenceEnd: + OS << "Flow-Sequence-End: "; + break; + case Token::TK_FlowMappingStart: + OS << "Flow-Mapping-Start: "; + break; + case Token::TK_FlowMappingEnd: + OS << "Flow-Mapping-End: "; + break; + case Token::TK_Key: + OS << "Key: "; + break; + case Token::TK_Value: + OS << "Value: "; + break; + case Token::TK_Scalar: + OS << "Scalar: "; + break; + case Token::TK_Alias: + OS << "Alias: "; + break; + case Token::TK_Anchor: + OS << "Anchor: "; + break; + case Token::TK_Tag: + OS << "Tag: "; + break; + case Token::TK_Error: + break; + } + OS << T.Range << "\n"; + if (T.Kind == Token::TK_StreamEnd) + break; + else if (T.Kind == Token::TK_Error) + return false; + } + return true; +} + +bool yaml::scanTokens(StringRef Input) { + llvm::SourceMgr SM; + llvm::yaml::Scanner scanner(Input, SM); + for (;;) { + llvm::yaml::Token T = scanner.getNext(); + if (T.Kind == Token::TK_StreamEnd) + break; + else if (T.Kind == Token::TK_Error) + return false; + } + return true; +} + +std::string yaml::escape(StringRef Input) { + std::string EscapedInput; + for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; ++i) { + if (*i == '\\') + EscapedInput += "\\\\"; + else if (*i == '"') + EscapedInput += "\\\""; + else if (*i == 0) + EscapedInput += "\\0"; + else if (*i == 0x07) + EscapedInput += "\\a"; + else if (*i == 0x08) + EscapedInput += "\\b"; + else if (*i == 0x09) + EscapedInput += "\\t"; + else if (*i == 0x0A) + EscapedInput += "\\n"; + else if (*i == 0x0B) + EscapedInput += "\\v"; + else if (*i == 0x0C) + EscapedInput += "\\f"; + else if (*i == 0x0D) + EscapedInput += "\\r"; + else if (*i == 0x1B) + EscapedInput += "\\e"; + else if (*i >= 0 && *i < 0x20) { // Control characters not handled above. + std::string HexStr = utohexstr(*i); + EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr; + } else if (*i & 0x80) { // UTF-8 multiple code unit subsequence. + UTF8Decoded UnicodeScalarValue + = decodeUTF8(StringRef(i, Input.end() - i)); + if (UnicodeScalarValue.second == 0) { + // Found invalid char. + SmallString<4> Val; + encodeUTF8(0xFFFD, Val); + EscapedInput.insert(EscapedInput.end(), Val.begin(), Val.end()); + // FIXME: Error reporting. + return EscapedInput; + } + if (UnicodeScalarValue.first == 0x85) + EscapedInput += "\\N"; + else if (UnicodeScalarValue.first == 0xA0) + EscapedInput += "\\_"; + else if (UnicodeScalarValue.first == 0x2028) + EscapedInput += "\\L"; + else if (UnicodeScalarValue.first == 0x2029) + EscapedInput += "\\P"; + else { + std::string HexStr = utohexstr(UnicodeScalarValue.first); + if (HexStr.size() <= 2) + EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr; + else if (HexStr.size() <= 4) + EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr; + else if (HexStr.size() <= 8) + EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr; + } + i += UnicodeScalarValue.second - 1; + } else + EscapedInput.push_back(*i); + } + return EscapedInput; +} + +Scanner::Scanner(StringRef Input, SourceMgr &sm) + : SM(sm) + , Indent(-1) + , Column(0) + , Line(0) + , FlowLevel(0) + , IsStartOfStream(true) + , IsSimpleKeyAllowed(true) + , IsSimpleKeyRequired(false) + , Failed(false) { + InputBuffer = MemoryBuffer::getMemBuffer(Input, "YAML"); + SM.AddNewSourceBuffer(InputBuffer, SMLoc()); + Current = InputBuffer->getBufferStart(); + End = InputBuffer->getBufferEnd(); +} + +Token &Scanner::peekNext() { + // If the current token is a possible simple key, keep parsing until we + // can confirm. + bool NeedMore = false; + while (true) { + if (TokenQueue.empty() || NeedMore) { + if (!fetchMoreTokens()) { + TokenQueue.clear(); + TokenQueue.push_back(Token()); + return TokenQueue.front(); + } + } + assert(!TokenQueue.empty() && + "fetchMoreTokens lied about getting tokens!"); + + removeStaleSimpleKeyCandidates(); + SimpleKey SK; + SK.Tok = TokenQueue.front(); + if (std::find(SimpleKeys.begin(), SimpleKeys.end(), SK) + == SimpleKeys.end()) + break; + else + NeedMore = true; + } + return TokenQueue.front(); +} + +Token Scanner::getNext() { + Token Ret = peekNext(); + // TokenQueue can be empty if there was an error getting the next token. + if (!TokenQueue.empty()) + TokenQueue.pop_front(); + + // There cannot be any referenced Token's if the TokenQueue is empty. So do a + // quick deallocation of them all. + if (TokenQueue.empty()) { + TokenQueue.Alloc.Reset(); + } + + return Ret; +} + +StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) { + // Check 7 bit c-printable - b-char. + if ( *Position == 0x09 + || (*Position >= 0x20 && *Position <= 0x7E)) + return Position + 1; + + // Check for valid UTF-8. + if (uint8_t(*Position) & 0x80) { + UTF8Decoded u8d = decodeUTF8(Position); + if ( u8d.second != 0 + && u8d.first != 0xFEFF + && ( u8d.first == 0x85 + || ( u8d.first >= 0xA0 + && u8d.first <= 0xD7FF) + || ( u8d.first >= 0xE000 + && u8d.first <= 0xFFFD) + || ( u8d.first >= 0x10000 + && u8d.first <= 0x10FFFF))) + return Position + u8d.second; + } + return Position; +} + +StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) { + if (*Position == 0x0D) { + if (Position + 1 != End && *(Position + 1) == 0x0A) + return Position + 2; + return Position + 1; + } + + if (*Position == 0x0A) + return Position + 1; + return Position; +} + + +StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) { + if (Position == End) + return Position; + if (*Position == ' ' || *Position == '\t') + return Position + 1; + return Position; +} + +StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) { + if (Position == End) + return Position; + if (*Position == ' ' || *Position == '\t') + return Position; + return skip_nb_char(Position); +} + +StringRef::iterator Scanner::skip_while( SkipWhileFunc Func + , StringRef::iterator Position) { + while (true) { + StringRef::iterator i = (this->*Func)(Position); + if (i == Position) + break; + Position = i; + } + return Position; +} + +static bool is_ns_hex_digit(const char C) { + return (C >= '0' && C <= '9') + || (C >= 'a' && C <= 'z') + || (C >= 'A' && C <= 'Z'); +} + +static bool is_ns_word_char(const char C) { + return C == '-' + || (C >= 'a' && C <= 'z') + || (C >= 'A' && C <= 'Z'); +} + +StringRef Scanner::scan_ns_uri_char() { + StringRef::iterator Start = Current; + while (true) { + if (Current == End) + break; + if (( *Current == '%' + && Current + 2 < End + && is_ns_hex_digit(*(Current + 1)) + && is_ns_hex_digit(*(Current + 2))) + || is_ns_word_char(*Current) + || StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]") + != StringRef::npos) { + ++Current; + ++Column; + } else + break; + } + return StringRef(Start, Current - Start); +} + +StringRef Scanner::scan_ns_plain_one_line() { + StringRef::iterator start = Current; + // The first character must already be verified. + ++Current; + while (true) { + if (Current == End) { + break; + } else if (*Current == ':') { + // Check if the next character is a ns-char. + if (Current + 1 == End) + break; + StringRef::iterator i = skip_ns_char(Current + 1); + if (Current + 1 != i) { + Current = i; + Column += 2; // Consume both the ':' and ns-char. + } else + break; + } else if (*Current == '#') { + // Check if the previous character was a ns-char. + // The & 0x80 check is to check for the trailing byte of a utf-8 + if (*(Current - 1) & 0x80 || skip_ns_char(Current - 1) == Current) { + ++Current; + ++Column; + } else + break; + } else { + StringRef::iterator i = skip_nb_char(Current); + if (i == Current) + break; + Current = i; + ++Column; + } + } + return StringRef(start, Current - start); +} + +bool Scanner::consume(uint32_t Expected) { + if (Expected >= 0x80) + report_fatal_error("Not dealing with this yet"); + if (Current == End) + return false; + if (uint8_t(*Current) >= 0x80) + report_fatal_error("Not dealing with this yet"); + if (uint8_t(*Current) == Expected) { + ++Current; + ++Column; + return true; + } + return false; +} + +void Scanner::skip(uint32_t Distance) { + Current += Distance; + Column += Distance; +} + +bool Scanner::isBlankOrBreak(StringRef::iterator Position) { + if (Position == End) + return false; + if ( *Position == ' ' || *Position == '\t' + || *Position == '\r' || *Position == '\n') + return true; + return false; +} + +void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok + , unsigned AtColumn + , bool IsRequired) { + if (IsSimpleKeyAllowed) { + SimpleKey SK; + SK.Tok = Tok; + SK.Line = Line; + SK.Column = AtColumn; + SK.IsRequired = IsRequired; + SK.FlowLevel = FlowLevel; + SimpleKeys.push_back(SK); + } +} + +void Scanner::removeStaleSimpleKeyCandidates() { + for (SmallVectorImpl<SimpleKey>::iterator i = SimpleKeys.begin(); + i != SimpleKeys.end();) { + if (i->Line != Line || i->Column + 1024 < Column) { + if (i->IsRequired) + setError( "Could not find expected : for simple key" + , i->Tok->Range.begin()); + i = SimpleKeys.erase(i); + } else + ++i; + } +} + +void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) { + if (!SimpleKeys.empty() && (SimpleKeys.end() - 1)->FlowLevel == Level) + SimpleKeys.pop_back(); +} + +bool Scanner::unrollIndent(int ToColumn) { + Token T; + // Indentation is ignored in flow. + if (FlowLevel != 0) + return true; + + while (Indent > ToColumn) { + T.Kind = Token::TK_BlockEnd; + T.Range = StringRef(Current, 1); + TokenQueue.push_back(T); + Indent = Indents.pop_back_val(); + } + + return true; +} + +bool Scanner::rollIndent( int ToColumn + , Token::TokenKind Kind + , TokenQueueT::iterator InsertPoint) { + if (FlowLevel) + return true; + if (Indent < ToColumn) { + Indents.push_back(Indent); + Indent = ToColumn; + + Token T; + T.Kind = Kind; + T.Range = StringRef(Current, 0); + TokenQueue.insert(InsertPoint, T); + } + return true; +} + +void Scanner::scanToNextToken() { + while (true) { + while (*Current == ' ' || *Current == '\t') { + skip(1); + } + + // Skip comment. + if (*Current == '#') { + while (true) { + // This may skip more than one byte, thus Column is only incremented + // for code points. + StringRef::iterator i = skip_nb_char(Current); + if (i == Current) + break; + Current = i; + ++Column; + } + } + + // Skip EOL. + StringRef::iterator i = skip_b_break(Current); + if (i == Current) + break; + Current = i; + ++Line; + Column = 0; + // New lines may start a simple key. + if (!FlowLevel) + IsSimpleKeyAllowed = true; + } +} + +bool Scanner::scanStreamStart() { + IsStartOfStream = false; + + EncodingInfo EI = getUnicodeEncoding(currentInput()); + + Token T; + T.Kind = Token::TK_StreamStart; + T.Range = StringRef(Current, EI.second); + TokenQueue.push_back(T); + Current += EI.second; + return true; +} + +bool Scanner::scanStreamEnd() { + // Force an ending new line if one isn't present. + if (Column != 0) { + Column = 0; + ++Line; + } + + unrollIndent(-1); + SimpleKeys.clear(); + IsSimpleKeyAllowed = false; + + Token T; + T.Kind = Token::TK_StreamEnd; + T.Range = StringRef(Current, 0); + TokenQueue.push_back(T); + return true; +} + +bool Scanner::scanDirective() { + // Reset the indentation level. + unrollIndent(-1); + SimpleKeys.clear(); + IsSimpleKeyAllowed = false; + + StringRef::iterator Start = Current; + consume('%'); + StringRef::iterator NameStart = Current; + Current = skip_while(&Scanner::skip_ns_char, Current); + StringRef Name(NameStart, Current - NameStart); + Current = skip_while(&Scanner::skip_s_white, Current); + + if (Name == "YAML") { + Current = skip_while(&Scanner::skip_ns_char, Current); + Token T; + T.Kind = Token::TK_VersionDirective; + T.Range = StringRef(Start, Current - Start); + TokenQueue.push_back(T); + return true; + } + return false; +} + +bool Scanner::scanDocumentIndicator(bool IsStart) { + unrollIndent(-1); + SimpleKeys.clear(); + IsSimpleKeyAllowed = false; + + Token T; + T.Kind = IsStart ? Token::TK_DocumentStart : Token::TK_DocumentEnd; + T.Range = StringRef(Current, 3); + skip(3); + TokenQueue.push_back(T); + return true; +} + +bool Scanner::scanFlowCollectionStart(bool IsSequence) { + Token T; + T.Kind = IsSequence ? Token::TK_FlowSequenceStart + : Token::TK_FlowMappingStart; + T.Range = StringRef(Current, 1); + skip(1); + TokenQueue.push_back(T); + + // [ and { may begin a simple key. + saveSimpleKeyCandidate(TokenQueue.back(), Column - 1, false); + + // And may also be followed by a simple key. + IsSimpleKeyAllowed = true; + ++FlowLevel; + return true; +} + +bool Scanner::scanFlowCollectionEnd(bool IsSequence) { + removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); + IsSimpleKeyAllowed = false; + Token T; + T.Kind = IsSequence ? Token::TK_FlowSequenceEnd + : Token::TK_FlowMappingEnd; + T.Range = StringRef(Current, 1); + skip(1); + TokenQueue.push_back(T); + if (FlowLevel) + --FlowLevel; + return true; +} + +bool Scanner::scanFlowEntry() { + removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); + IsSimpleKeyAllowed = true; + Token T; + T.Kind = Token::TK_FlowEntry; + T.Range = StringRef(Current, 1); + skip(1); + TokenQueue.push_back(T); + return true; +} + +bool Scanner::scanBlockEntry() { + rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end()); + removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); + IsSimpleKeyAllowed = true; + Token T; + T.Kind = Token::TK_BlockEntry; + T.Range = StringRef(Current, 1); + skip(1); + TokenQueue.push_back(T); + return true; +} + +bool Scanner::scanKey() { + if (!FlowLevel) + rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end()); + + removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); + IsSimpleKeyAllowed = !FlowLevel; + + Token T; + T.Kind = Token::TK_Key; + T.Range = StringRef(Current, 1); + skip(1); + TokenQueue.push_back(T); + return true; +} + +bool Scanner::scanValue() { + // If the previous token could have been a simple key, insert the key token + // into the token queue. + if (!SimpleKeys.empty()) { + SimpleKey SK = SimpleKeys.pop_back_val(); + Token T; + T.Kind = Token::TK_Key; + T.Range = SK.Tok->Range; + TokenQueueT::iterator i, e; + for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i) { + if (i == SK.Tok) + break; + } + assert(i != e && "SimpleKey not in token queue!"); + i = TokenQueue.insert(i, T); + + // We may also need to add a Block-Mapping-Start token. + rollIndent(SK.Column, Token::TK_BlockMappingStart, i); + + IsSimpleKeyAllowed = false; + } else { + if (!FlowLevel) + rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end()); + IsSimpleKeyAllowed = !FlowLevel; + } + + Token T; + T.Kind = Token::TK_Value; + T.Range = StringRef(Current, 1); + skip(1); + TokenQueue.push_back(T); + return true; +} + +// Forbidding inlining improves performance by roughly 20%. +// FIXME: Remove once llvm optimizes this to the faster version without hints. +LLVM_ATTRIBUTE_NOINLINE static bool +wasEscaped(StringRef::iterator First, StringRef::iterator Position); + +// Returns whether a character at 'Position' was escaped with a leading '\'. +// 'First' specifies the position of the first character in the string. +static bool wasEscaped(StringRef::iterator First, + StringRef::iterator Position) { + assert(Position - 1 >= First); + StringRef::iterator I = Position - 1; + // We calculate the number of consecutive '\'s before the current position + // by iterating backwards through our string. + while (I >= First && *I == '\\') --I; + // (Position - 1 - I) now contains the number of '\'s before the current + // position. If it is odd, the character at 'Position' was escaped. + return (Position - 1 - I) % 2 == 1; +} + +bool Scanner::scanFlowScalar(bool IsDoubleQuoted) { + StringRef::iterator Start = Current; + unsigned ColStart = Column; + if (IsDoubleQuoted) { + do { + ++Current; + while (Current != End && *Current != '"') + ++Current; + // Repeat until the previous character was not a '\' or was an escaped + // backslash. + } while (*(Current - 1) == '\\' && wasEscaped(Start + 1, Current)); + } else { + skip(1); + while (true) { + // Skip a ' followed by another '. + if (Current + 1 < End && *Current == '\'' && *(Current + 1) == '\'') { + skip(2); + continue; + } else if (*Current == '\'') + break; + StringRef::iterator i = skip_nb_char(Current); + if (i == Current) { + i = skip_b_break(Current); + if (i == Current) + break; + Current = i; + Column = 0; + ++Line; + } else { + if (i == End) + break; + Current = i; + ++Column; + } + } + } + skip(1); // Skip ending quote. + Token T; + T.Kind = Token::TK_Scalar; + T.Range = StringRef(Start, Current - Start); + TokenQueue.push_back(T); + + saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false); + + IsSimpleKeyAllowed = false; + + return true; +} + +bool Scanner::scanPlainScalar() { + StringRef::iterator Start = Current; + unsigned ColStart = Column; + unsigned LeadingBlanks = 0; + assert(Indent >= -1 && "Indent must be >= -1 !"); + unsigned indent = static_cast<unsigned>(Indent + 1); + while (true) { + if (*Current == '#') + break; + + while (!isBlankOrBreak(Current)) { + if ( FlowLevel && *Current == ':' + && !(isBlankOrBreak(Current + 1) || *(Current + 1) == ',')) { + setError("Found unexpected ':' while scanning a plain scalar", Current); + return false; + } + + // Check for the end of the plain scalar. + if ( (*Current == ':' && isBlankOrBreak(Current + 1)) + || ( FlowLevel + && (StringRef(Current, 1).find_first_of(",:?[]{}") + != StringRef::npos))) + break; + + StringRef::iterator i = skip_nb_char(Current); + if (i == Current) + break; + Current = i; + ++Column; + } + + // Are we at the end? + if (!isBlankOrBreak(Current)) + break; + + // Eat blanks. + StringRef::iterator Tmp = Current; + while (isBlankOrBreak(Tmp)) { + StringRef::iterator i = skip_s_white(Tmp); + if (i != Tmp) { + if (LeadingBlanks && (Column < indent) && *Tmp == '\t') { + setError("Found invalid tab character in indentation", Tmp); + return false; + } + Tmp = i; + ++Column; + } else { + i = skip_b_break(Tmp); + if (!LeadingBlanks) + LeadingBlanks = 1; + Tmp = i; + Column = 0; + ++Line; + } + } + + if (!FlowLevel && Column < indent) + break; + + Current = Tmp; + } + if (Start == Current) { + setError("Got empty plain scalar", Start); + return false; + } + Token T; + T.Kind = Token::TK_Scalar; + T.Range = StringRef(Start, Current - Start); + TokenQueue.push_back(T); + + // Plain scalars can be simple keys. + saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false); + + IsSimpleKeyAllowed = false; + + return true; +} + +bool Scanner::scanAliasOrAnchor(bool IsAlias) { + StringRef::iterator Start = Current; + unsigned ColStart = Column; + skip(1); + while(true) { + if ( *Current == '[' || *Current == ']' + || *Current == '{' || *Current == '}' + || *Current == ',' + || *Current == ':') + break; + StringRef::iterator i = skip_ns_char(Current); + if (i == Current) + break; + Current = i; + ++Column; + } + + if (Start == Current) { + setError("Got empty alias or anchor", Start); + return false; + } + + Token T; + T.Kind = IsAlias ? Token::TK_Alias : Token::TK_Anchor; + T.Range = StringRef(Start, Current - Start); + TokenQueue.push_back(T); + + // Alias and anchors can be simple keys. + saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false); + + IsSimpleKeyAllowed = false; + + return true; +} + +bool Scanner::scanBlockScalar(bool IsLiteral) { + StringRef::iterator Start = Current; + skip(1); // Eat | or > + while(true) { + StringRef::iterator i = skip_nb_char(Current); + if (i == Current) { + if (Column == 0) + break; + i = skip_b_break(Current); + if (i != Current) { + // We got a line break. + Column = 0; + ++Line; + Current = i; + continue; + } else { + // There was an error, which should already have been printed out. + return false; + } + } + Current = i; + ++Column; + } + + if (Start == Current) { + setError("Got empty block scalar", Start); + return false; + } + + Token T; + T.Kind = Token::TK_Scalar; + T.Range = StringRef(Start, Current - Start); + TokenQueue.push_back(T); + return true; +} + +bool Scanner::scanTag() { + StringRef::iterator Start = Current; + unsigned ColStart = Column; + skip(1); // Eat !. + if (Current == End || isBlankOrBreak(Current)); // An empty tag. + else if (*Current == '<') { + skip(1); + scan_ns_uri_char(); + if (!consume('>')) + return false; + } else { + // FIXME: Actually parse the c-ns-shorthand-tag rule. + Current = skip_while(&Scanner::skip_ns_char, Current); + } + + Token T; + T.Kind = Token::TK_Tag; + T.Range = StringRef(Start, Current - Start); + TokenQueue.push_back(T); + + // Tags can be simple keys. + saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false); + + IsSimpleKeyAllowed = false; + + return true; +} + +bool Scanner::fetchMoreTokens() { + if (IsStartOfStream) + return scanStreamStart(); + + scanToNextToken(); + + if (Current == End) + return scanStreamEnd(); + + removeStaleSimpleKeyCandidates(); + + unrollIndent(Column); + + if (Column == 0 && *Current == '%') + return scanDirective(); + + if (Column == 0 && Current + 4 <= End + && *Current == '-' + && *(Current + 1) == '-' + && *(Current + 2) == '-' + && (Current + 3 == End || isBlankOrBreak(Current + 3))) + return scanDocumentIndicator(true); + + if (Column == 0 && Current + 4 <= End + && *Current == '.' + && *(Current + 1) == '.' + && *(Current + 2) == '.' + && (Current + 3 == End || isBlankOrBreak(Current + 3))) + return scanDocumentIndicator(false); + + if (*Current == '[') + return scanFlowCollectionStart(true); + + if (*Current == '{') + return scanFlowCollectionStart(false); + + if (*Current == ']') + return scanFlowCollectionEnd(true); + + if (*Current == '}') + return scanFlowCollectionEnd(false); + + if (*Current == ',') + return scanFlowEntry(); + + if (*Current == '-' && isBlankOrBreak(Current + 1)) + return scanBlockEntry(); + + if (*Current == '?' && (FlowLevel || isBlankOrBreak(Current + 1))) + return scanKey(); + + if (*Current == ':' && (FlowLevel || isBlankOrBreak(Current + 1))) + return scanValue(); + + if (*Current == '*') + return scanAliasOrAnchor(true); + + if (*Current == '&') + return scanAliasOrAnchor(false); + + if (*Current == '!') + return scanTag(); + + if (*Current == '|' && !FlowLevel) + return scanBlockScalar(true); + + if (*Current == '>' && !FlowLevel) + return scanBlockScalar(false); + + if (*Current == '\'') + return scanFlowScalar(false); + + if (*Current == '"') + return scanFlowScalar(true); + + // Get a plain scalar. + StringRef FirstChar(Current, 1); + if (!(isBlankOrBreak(Current) + || FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos) + || (*Current == '-' && !isBlankOrBreak(Current + 1)) + || (!FlowLevel && (*Current == '?' || *Current == ':') + && isBlankOrBreak(Current + 1)) + || (!FlowLevel && *Current == ':' + && Current + 2 < End + && *(Current + 1) == ':' + && !isBlankOrBreak(Current + 2))) + return scanPlainScalar(); + + setError("Unrecognized character while tokenizing."); + return false; +} + +Stream::Stream(StringRef Input, SourceMgr &SM) + : scanner(new Scanner(Input, SM)) + , CurrentDoc(0) {} + +Stream::~Stream() {} + +bool Stream::failed() { return scanner->failed(); } + +void Stream::printError(Node *N, const Twine &Msg) { + SmallVector<SMRange, 1> Ranges; + Ranges.push_back(N->getSourceRange()); + scanner->printError( N->getSourceRange().Start + , SourceMgr::DK_Error + , Msg + , Ranges); +} + +void Stream::handleYAMLDirective(const Token &t) { + // TODO: Ensure version is 1.x. +} + +document_iterator Stream::begin() { + if (CurrentDoc) + report_fatal_error("Can only iterate over the stream once"); + + // Skip Stream-Start. + scanner->getNext(); + + CurrentDoc.reset(new Document(*this)); + return document_iterator(CurrentDoc); +} + +document_iterator Stream::end() { + return document_iterator(); +} + +void Stream::skip() { + for (document_iterator i = begin(), e = end(); i != e; ++i) + i->skip(); +} + +Node::Node(unsigned int Type, OwningPtr<Document> &D, StringRef A) + : Doc(D) + , TypeID(Type) + , Anchor(A) { + SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin()); + SourceRange = SMRange(Start, Start); +} + +Token &Node::peekNext() { + return Doc->peekNext(); +} + +Token Node::getNext() { + return Doc->getNext(); +} + +Node *Node::parseBlockNode() { + return Doc->parseBlockNode(); +} + +BumpPtrAllocator &Node::getAllocator() { + return Doc->NodeAllocator; +} + +void Node::setError(const Twine &Msg, Token &Tok) const { + Doc->setError(Msg, Tok); +} + +bool Node::failed() const { + return Doc->failed(); +} + + + +StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const { + // TODO: Handle newlines properly. We need to remove leading whitespace. + if (Value[0] == '"') { // Double quoted. + // Pull off the leading and trailing "s. + StringRef UnquotedValue = Value.substr(1, Value.size() - 2); + // Search for characters that would require unescaping the value. + StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n"); + if (i != StringRef::npos) + return unescapeDoubleQuoted(UnquotedValue, i, Storage); + return UnquotedValue; + } else if (Value[0] == '\'') { // Single quoted. + // Pull off the leading and trailing 's. + StringRef UnquotedValue = Value.substr(1, Value.size() - 2); + StringRef::size_type i = UnquotedValue.find('\''); + if (i != StringRef::npos) { + // We're going to need Storage. + Storage.clear(); + Storage.reserve(UnquotedValue.size()); + for (; i != StringRef::npos; i = UnquotedValue.find('\'')) { + StringRef Valid(UnquotedValue.begin(), i); + Storage.insert(Storage.end(), Valid.begin(), Valid.end()); + Storage.push_back('\''); + UnquotedValue = UnquotedValue.substr(i + 2); + } + Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end()); + return StringRef(Storage.begin(), Storage.size()); + } + return UnquotedValue; + } + // Plain or block. + size_t trimtrail = Value.rfind(' '); + return Value.drop_back( + trimtrail == StringRef::npos ? 0 : Value.size() - trimtrail); +} + +StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue + , StringRef::size_type i + , SmallVectorImpl<char> &Storage) + const { + // Use Storage to build proper value. + Storage.clear(); + Storage.reserve(UnquotedValue.size()); + for (; i != StringRef::npos; i = UnquotedValue.find_first_of("\\\r\n")) { + // Insert all previous chars into Storage. + StringRef Valid(UnquotedValue.begin(), i); + Storage.insert(Storage.end(), Valid.begin(), Valid.end()); + // Chop off inserted chars. + UnquotedValue = UnquotedValue.substr(i); + + assert(!UnquotedValue.empty() && "Can't be empty!"); + + // Parse escape or line break. + switch (UnquotedValue[0]) { + case '\r': + case '\n': + Storage.push_back('\n'); + if ( UnquotedValue.size() > 1 + && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n')) + UnquotedValue = UnquotedValue.substr(1); + UnquotedValue = UnquotedValue.substr(1); + break; + default: + if (UnquotedValue.size() == 1) + // TODO: Report error. + break; + UnquotedValue = UnquotedValue.substr(1); + switch (UnquotedValue[0]) { + default: { + Token T; + T.Range = StringRef(UnquotedValue.begin(), 1); + setError("Unrecognized escape code!", T); + return ""; + } + case '\r': + case '\n': + // Remove the new line. + if ( UnquotedValue.size() > 1 + && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n')) + UnquotedValue = UnquotedValue.substr(1); + // If this was just a single byte newline, it will get skipped + // below. + break; + case '0': + Storage.push_back(0x00); + break; + case 'a': + Storage.push_back(0x07); + break; + case 'b': + Storage.push_back(0x08); + break; + case 't': + case 0x09: + Storage.push_back(0x09); + break; + case 'n': + Storage.push_back(0x0A); + break; + case 'v': + Storage.push_back(0x0B); + break; + case 'f': + Storage.push_back(0x0C); + break; + case 'r': + Storage.push_back(0x0D); + break; + case 'e': + Storage.push_back(0x1B); + break; + case ' ': + Storage.push_back(0x20); + break; + case '"': + Storage.push_back(0x22); + break; + case '/': + Storage.push_back(0x2F); + break; + case '\\': + Storage.push_back(0x5C); + break; + case 'N': + encodeUTF8(0x85, Storage); + break; + case '_': + encodeUTF8(0xA0, Storage); + break; + case 'L': + encodeUTF8(0x2028, Storage); + break; + case 'P': + encodeUTF8(0x2029, Storage); + break; + case 'x': { + if (UnquotedValue.size() < 3) + // TODO: Report error. + break; + unsigned int UnicodeScalarValue; + UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue); + encodeUTF8(UnicodeScalarValue, Storage); + UnquotedValue = UnquotedValue.substr(2); + break; + } + case 'u': { + if (UnquotedValue.size() < 5) + // TODO: Report error. + break; + unsigned int UnicodeScalarValue; + UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue); + encodeUTF8(UnicodeScalarValue, Storage); + UnquotedValue = UnquotedValue.substr(4); + break; + } + case 'U': { + if (UnquotedValue.size() < 9) + // TODO: Report error. + break; + unsigned int UnicodeScalarValue; + UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue); + encodeUTF8(UnicodeScalarValue, Storage); + UnquotedValue = UnquotedValue.substr(8); + break; + } + } + UnquotedValue = UnquotedValue.substr(1); + } + } + Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end()); + return StringRef(Storage.begin(), Storage.size()); +} + +Node *KeyValueNode::getKey() { + if (Key) + return Key; + // Handle implicit null keys. + { + Token &t = peekNext(); + if ( t.Kind == Token::TK_BlockEnd + || t.Kind == Token::TK_Value + || t.Kind == Token::TK_Error) { + return Key = new (getAllocator()) NullNode(Doc); + } + if (t.Kind == Token::TK_Key) + getNext(); // skip TK_Key. + } + + // Handle explicit null keys. + Token &t = peekNext(); + if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Value) { + return Key = new (getAllocator()) NullNode(Doc); + } + + // We've got a normal key. + return Key = parseBlockNode(); +} + +Node *KeyValueNode::getValue() { + if (Value) + return Value; + getKey()->skip(); + if (failed()) + return Value = new (getAllocator()) NullNode(Doc); + + // Handle implicit null values. + { + Token &t = peekNext(); + if ( t.Kind == Token::TK_BlockEnd + || t.Kind == Token::TK_FlowMappingEnd + || t.Kind == Token::TK_Key + || t.Kind == Token::TK_FlowEntry + || t.Kind == Token::TK_Error) { + return Value = new (getAllocator()) NullNode(Doc); + } + + if (t.Kind != Token::TK_Value) { + setError("Unexpected token in Key Value.", t); + return Value = new (getAllocator()) NullNode(Doc); + } + getNext(); // skip TK_Value. + } + + // Handle explicit null values. + Token &t = peekNext(); + if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Key) { + return Value = new (getAllocator()) NullNode(Doc); + } + + // We got a normal value. + return Value = parseBlockNode(); +} + +void MappingNode::increment() { + if (failed()) { + IsAtEnd = true; + CurrentEntry = 0; + return; + } + if (CurrentEntry) { + CurrentEntry->skip(); + if (Type == MT_Inline) { + IsAtEnd = true; + CurrentEntry = 0; + return; + } + } + Token T = peekNext(); + if (T.Kind == Token::TK_Key || T.Kind == Token::TK_Scalar) { + // KeyValueNode eats the TK_Key. That way it can detect null keys. + CurrentEntry = new (getAllocator()) KeyValueNode(Doc); + } else if (Type == MT_Block) { + switch (T.Kind) { + case Token::TK_BlockEnd: + getNext(); + IsAtEnd = true; + CurrentEntry = 0; + break; + default: + setError("Unexpected token. Expected Key or Block End", T); + case Token::TK_Error: + IsAtEnd = true; + CurrentEntry = 0; + } + } else { + switch (T.Kind) { + case Token::TK_FlowEntry: + // Eat the flow entry and recurse. + getNext(); + return increment(); + case Token::TK_FlowMappingEnd: + getNext(); + case Token::TK_Error: + // Set this to end iterator. + IsAtEnd = true; + CurrentEntry = 0; + break; + default: + setError( "Unexpected token. Expected Key, Flow Entry, or Flow " + "Mapping End." + , T); + IsAtEnd = true; + CurrentEntry = 0; + } + } +} + +void SequenceNode::increment() { + if (failed()) { + IsAtEnd = true; + CurrentEntry = 0; + return; + } + if (CurrentEntry) + CurrentEntry->skip(); + Token T = peekNext(); + if (SeqType == ST_Block) { + switch (T.Kind) { + case Token::TK_BlockEntry: + getNext(); + CurrentEntry = parseBlockNode(); + if (CurrentEntry == 0) { // An error occurred. + IsAtEnd = true; + CurrentEntry = 0; + } + break; + case Token::TK_BlockEnd: + getNext(); + IsAtEnd = true; + CurrentEntry = 0; + break; + default: + setError( "Unexpected token. Expected Block Entry or Block End." + , T); + case Token::TK_Error: + IsAtEnd = true; + CurrentEntry = 0; + } + } else if (SeqType == ST_Indentless) { + switch (T.Kind) { + case Token::TK_BlockEntry: + getNext(); + CurrentEntry = parseBlockNode(); + if (CurrentEntry == 0) { // An error occurred. + IsAtEnd = true; + CurrentEntry = 0; + } + break; + default: + case Token::TK_Error: + IsAtEnd = true; + CurrentEntry = 0; + } + } else if (SeqType == ST_Flow) { + switch (T.Kind) { + case Token::TK_FlowEntry: + // Eat the flow entry and recurse. + getNext(); + WasPreviousTokenFlowEntry = true; + return increment(); + case Token::TK_FlowSequenceEnd: + getNext(); + case Token::TK_Error: + // Set this to end iterator. + IsAtEnd = true; + CurrentEntry = 0; + break; + case Token::TK_StreamEnd: + case Token::TK_DocumentEnd: + case Token::TK_DocumentStart: + setError("Could not find closing ]!", T); + // Set this to end iterator. + IsAtEnd = true; + CurrentEntry = 0; + break; + default: + if (!WasPreviousTokenFlowEntry) { + setError("Expected , between entries!", T); + IsAtEnd = true; + CurrentEntry = 0; + break; + } + // Otherwise it must be a flow entry. + CurrentEntry = parseBlockNode(); + if (!CurrentEntry) { + IsAtEnd = true; + } + WasPreviousTokenFlowEntry = false; + break; + } + } +} + +Document::Document(Stream &S) : stream(S), Root(0) { + if (parseDirectives()) + expectToken(Token::TK_DocumentStart); + Token &T = peekNext(); + if (T.Kind == Token::TK_DocumentStart) + getNext(); +} + +bool Document::skip() { + if (stream.scanner->failed()) + return false; + if (!Root) + getRoot(); + Root->skip(); + Token &T = peekNext(); + if (T.Kind == Token::TK_StreamEnd) + return false; + if (T.Kind == Token::TK_DocumentEnd) { + getNext(); + return skip(); + } + return true; +} + +Token &Document::peekNext() { + return stream.scanner->peekNext(); +} + +Token Document::getNext() { + return stream.scanner->getNext(); +} + +void Document::setError(const Twine &Message, Token &Location) const { + stream.scanner->setError(Message, Location.Range.begin()); +} + +bool Document::failed() const { + return stream.scanner->failed(); +} + +Node *Document::parseBlockNode() { + Token T = peekNext(); + // Handle properties. + Token AnchorInfo; +parse_property: + switch (T.Kind) { + case Token::TK_Alias: + getNext(); + return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1)); + case Token::TK_Anchor: + if (AnchorInfo.Kind == Token::TK_Anchor) { + setError("Already encountered an anchor for this node!", T); + return 0; + } + AnchorInfo = getNext(); // Consume TK_Anchor. + T = peekNext(); + goto parse_property; + case Token::TK_Tag: + getNext(); // Skip TK_Tag. + T = peekNext(); + goto parse_property; + default: + break; + } + + switch (T.Kind) { + case Token::TK_BlockEntry: + // We got an unindented BlockEntry sequence. This is not terminated with + // a BlockEnd. + // Don't eat the TK_BlockEntry, SequenceNode needs it. + return new (NodeAllocator) SequenceNode( stream.CurrentDoc + , AnchorInfo.Range.substr(1) + , SequenceNode::ST_Indentless); + case Token::TK_BlockSequenceStart: + getNext(); + return new (NodeAllocator) + SequenceNode( stream.CurrentDoc + , AnchorInfo.Range.substr(1) + , SequenceNode::ST_Block); + case Token::TK_BlockMappingStart: + getNext(); + return new (NodeAllocator) + MappingNode( stream.CurrentDoc + , AnchorInfo.Range.substr(1) + , MappingNode::MT_Block); + case Token::TK_FlowSequenceStart: + getNext(); + return new (NodeAllocator) + SequenceNode( stream.CurrentDoc + , AnchorInfo.Range.substr(1) + , SequenceNode::ST_Flow); + case Token::TK_FlowMappingStart: + getNext(); + return new (NodeAllocator) + MappingNode( stream.CurrentDoc + , AnchorInfo.Range.substr(1) + , MappingNode::MT_Flow); + case Token::TK_Scalar: + getNext(); + return new (NodeAllocator) + ScalarNode( stream.CurrentDoc + , AnchorInfo.Range.substr(1) + , T.Range); + case Token::TK_Key: + // Don't eat the TK_Key, KeyValueNode expects it. + return new (NodeAllocator) + MappingNode( stream.CurrentDoc + , AnchorInfo.Range.substr(1) + , MappingNode::MT_Inline); + case Token::TK_DocumentStart: + case Token::TK_DocumentEnd: + case Token::TK_StreamEnd: + default: + // TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not + // !!null null. + return new (NodeAllocator) NullNode(stream.CurrentDoc); + case Token::TK_Error: + return 0; + } + llvm_unreachable("Control flow shouldn't reach here."); + return 0; +} + +bool Document::parseDirectives() { + bool isDirective = false; + while (true) { + Token T = peekNext(); + if (T.Kind == Token::TK_TagDirective) { + handleTagDirective(getNext()); + isDirective = true; + } else if (T.Kind == Token::TK_VersionDirective) { + stream.handleYAMLDirective(getNext()); + isDirective = true; + } else + break; + } + return isDirective; +} + +bool Document::expectToken(int TK) { + Token T = getNext(); + if (T.Kind != TK) { + setError("Unexpected token", T); + return false; + } + return true; +} + +OwningPtr<Document> document_iterator::NullDoc; diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index 72d3986..86cdca1 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -633,6 +633,19 @@ raw_ostream &raw_fd_ostream::resetColor() { return *this; } +raw_ostream &raw_fd_ostream::reverseColor() { + if (sys::Process::ColorNeedsFlush()) + flush(); + const char *colorcode = sys::Process::OutputReverse(); + if (colorcode) { + size_t len = strlen(colorcode); + write(colorcode, len); + // don't account colors towards output characters + pos -= len; + } + return *this; +} + bool raw_fd_ostream::is_displayed() const { return sys::Process::FileDescriptorIsDisplayed(FD); } |