diff options
author | Stephen Hines <srhines@google.com> | 2014-02-11 20:01:10 -0800 |
---|---|---|
committer | Stephen Hines <srhines@google.com> | 2014-02-11 20:01:10 -0800 |
commit | ce9904c6ea8fd669978a8eefb854b330eb9828ff (patch) | |
tree | 2418ee2e96ea220977c8fb74959192036ab5b133 /lib/Support | |
parent | c27b10b198c1d9e9b51f2303994313ec2778edd7 (diff) | |
parent | dbb832b83351cec97b025b61c26536ef50c3181c (diff) | |
download | external_llvm-ce9904c6ea8fd669978a8eefb854b330eb9828ff.zip external_llvm-ce9904c6ea8fd669978a8eefb854b330eb9828ff.tar.gz external_llvm-ce9904c6ea8fd669978a8eefb854b330eb9828ff.tar.bz2 |
Merge remote-tracking branch 'upstream/release_34' into merge-20140211
Conflicts:
lib/Linker/LinkModules.cpp
lib/Support/Unix/Signals.inc
Change-Id: Ia54f291fa5dc828052d2412736e8495c1282aa64
Diffstat (limited to 'lib/Support')
49 files changed, 1628 insertions, 1076 deletions
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index 34bc6b6..676e2d4 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -3546,11 +3546,14 @@ void APFloat::toString(SmallVectorImpl<char> &Str, // Set FormatPrecision if zero. We want to do this before we // truncate trailing zeros, as those are part of the precision. if (!FormatPrecision) { - // It's an interesting question whether to use the nominal - // precision or the active precision here for denormals. - - // FormatPrecision = ceil(significandBits / lg_2(10)) - FormatPrecision = (semantics->precision * 59 + 195) / 196; + // We use enough digits so the number can be round-tripped back to an + // APFloat. The formula comes from "How to Print Floating-Point Numbers + // Accurately" by Steele and White. + // FIXME: Using a formula based purely on the precision is conservative; + // we can print fewer digits depending on the actual value being printed. + + // FormatPrecision = 2 + floor(significandBits / lg_2(10)) + FormatPrecision = 2 + semantics->precision * 59 / 196; } // Ignore trailing binary zeros. diff --git a/lib/Support/Allocator.cpp b/lib/Support/Allocator.cpp index 3c4191b..6e7a541 100644 --- a/lib/Support/Allocator.cpp +++ b/lib/Support/Allocator.cpp @@ -26,6 +26,10 @@ BumpPtrAllocator::BumpPtrAllocator(size_t size, size_t threshold, : SlabSize(size), SizeThreshold(std::min(size, threshold)), Allocator(allocator), CurSlab(0), BytesAllocated(0) { } +BumpPtrAllocator::BumpPtrAllocator(size_t size, size_t threshold) + : SlabSize(size), SizeThreshold(std::min(size, threshold)), + Allocator(DefaultSlabAllocator), CurSlab(0), BytesAllocated(0) { } + BumpPtrAllocator::~BumpPtrAllocator() { DeallocateSlabs(CurSlab); } @@ -167,9 +171,6 @@ void BumpPtrAllocator::PrintStats() const { << " (includes alignment, etc)\n"; } -MallocSlabAllocator BumpPtrAllocator::DefaultSlabAllocator = - MallocSlabAllocator(); - SlabAllocator::~SlabAllocator() { } MallocSlabAllocator::~MallocSlabAllocator() { } diff --git a/lib/Support/BlockFrequency.cpp b/lib/Support/BlockFrequency.cpp index 5e45e46..00efe90 100644 --- a/lib/Support/BlockFrequency.cpp +++ b/lib/Support/BlockFrequency.cpp @@ -19,52 +19,69 @@ using namespace llvm; /// Multiply FREQ by N and store result in W array. -static void mult96bit(uint64_t freq, uint32_t N, uint64_t W[2]) { +static void mult96bit(uint64_t freq, uint32_t N, uint32_t W[3]) { uint64_t u0 = freq & UINT32_MAX; uint64_t u1 = freq >> 32; - // Represent 96-bit value as w[2]:w[1]:w[0]; - uint32_t w[3] = { 0, 0, 0 }; - + // Represent 96-bit value as W[2]:W[1]:W[0]; uint64_t t = u0 * N; uint64_t k = t >> 32; - w[0] = t; + W[0] = t; t = u1 * N + k; - w[1] = t; - w[2] = t >> 32; - - // W[1] - higher bits. - // W[0] - lower bits. - W[0] = w[0] + ((uint64_t) w[1] << 32); - W[1] = w[2]; + W[1] = t; + W[2] = t >> 32; } - -/// Divide 96-bit value stored in W array by D. -/// Return 64-bit quotient, saturated to UINT64_MAX on overflow. -static uint64_t div96bit(uint64_t W[2], uint32_t D) { - uint64_t y = W[0]; - uint64_t x = W[1]; - unsigned i; - - assert(x != 0 && "This is really a 64-bit division"); - - // This long division algorithm automatically saturates on overflow. - for (i = 0; i < 64 && x; ++i) { - uint32_t t = -((x >> 31) & 1); // Splat bit 31 to bits 0-31. - x = (x << 1) | (y >> 63); - y = y << 1; - if ((x | t) >= D) { - x -= D; - ++y; +/// Divide 96-bit value stored in W[2]:W[1]:W[0] by D. Since our word size is a +/// 32 bit unsigned integer, we can use a short division algorithm. +static uint64_t divrem96bit(uint32_t W[3], uint32_t D, uint32_t *Rout) { + // We assume that W[2] is non-zero since if W[2] is not then the user should + // just use hardware division. + assert(W[2] && "This routine assumes that W[2] is non-zero since if W[2] is " + "zero, the caller should just use 64/32 hardware."); + uint32_t Q[3] = { 0, 0, 0 }; + + // The generalized short division algorithm sets i to m + n - 1, where n is + // the number of words in the divisior and m is the number of words by which + // the divident exceeds the divisor (i.e. m + n == the length of the dividend + // in words). Due to our assumption that W[2] is non-zero, we know that the + // dividend is of length 3 implying since n is 1 that m = 2. Thus we set i to + // m + n - 1 = 2 + 1 - 1 = 2. + uint32_t R = 0; + for (int i = 2; i >= 0; --i) { + uint64_t PartialD = uint64_t(R) << 32 | W[i]; + if (PartialD == 0) { + Q[i] = 0; + R = 0; + } else if (PartialD < D) { + Q[i] = 0; + R = uint32_t(PartialD); + } else if (PartialD == D) { + Q[i] = 1; + R = 0; + } else { + Q[i] = uint32_t(PartialD / D); + R = uint32_t(PartialD - (Q[i] * D)); } } - return y << (64 - i); -} + // If Q[2] is non-zero, then we overflowed. + uint64_t Result; + if (Q[2]) { + Result = UINT64_MAX; + R = D; + } else { + // Form the final uint64_t result, avoiding endianness issues. + Result = uint64_t(Q[0]) | (uint64_t(Q[1]) << 32); + } + + if (Rout) + *Rout = R; + return Result; +} -void BlockFrequency::scale(uint32_t N, uint32_t D) { +uint32_t BlockFrequency::scale(uint32_t N, uint32_t D) { assert(D != 0 && "Division by zero"); // Calculate Frequency * N. @@ -75,15 +92,16 @@ void BlockFrequency::scale(uint32_t N, uint32_t D) { // If the product fits in 64 bits, just use built-in division. if (MulHi <= UINT32_MAX && MulRes >= MulLo) { Frequency = MulRes / D; - return; + return MulRes % D; } // Product overflowed, use 96-bit operations. - // 96-bit value represented as W[1]:W[0]. - uint64_t W[2]; + // 96-bit value represented as W[2]:W[1]:W[0]. + uint32_t W[3]; + uint32_t R; mult96bit(Frequency, N, W); - Frequency = div96bit(W, D); - return; + Frequency = divrem96bit(W, D, &R); + return R; } BlockFrequency &BlockFrequency::operator*=(const BranchProbability &Prob) { @@ -127,6 +145,10 @@ BlockFrequency::operator+(const BlockFrequency &Prob) const { return Freq; } +uint32_t BlockFrequency::scale(const BranchProbability &Prob) { + return scale(Prob.getNumerator(), Prob.getDenominator()); +} + void BlockFrequency::print(raw_ostream &OS) const { // Convert fixed-point number to decimal. OS << Frequency / getEntryFrequency() << "."; diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index 5823836..3aecf3f 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -54,6 +54,7 @@ add_llvm_library(LLVMSupport ToolOutputFile.cpp Triple.cpp Twine.cpp + Unicode.cpp YAMLParser.cpp YAMLTraits.cpp raw_os_ostream.cpp diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index a47af27..44a88d8 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -60,6 +60,8 @@ TEMPLATE_INSTANTIATION(class opt<char>); TEMPLATE_INSTANTIATION(class opt<bool>); } } // end namespace llvm::cl +// Pin the vtables to this file. +void GenericOptionValue::anchor() {} void OptionValue<boolOrDefault>::anchor() {} void OptionValue<std::string>::anchor() {} void Option::anchor() {} @@ -73,6 +75,7 @@ void parser<double>::anchor() {} void parser<float>::anchor() {} void parser<std::string>::anchor() {} void parser<char>::anchor() {} +void StringSaver::anchor() {} //===----------------------------------------------------------------------===// diff --git a/lib/Support/Compression.cpp b/lib/Support/Compression.cpp index fd8a874..b5ddb70 100644 --- a/lib/Support/Compression.cpp +++ b/lib/Support/Compression.cpp @@ -81,6 +81,10 @@ zlib::Status zlib::uncompress(StringRef InputBuffer, return Res; } +uint32_t zlib::crc32(StringRef Buffer) { + return ::crc32(0, (const Bytef *)Buffer.data(), Buffer.size()); +} + #else bool zlib::isAvailable() { return false; } zlib::Status zlib::compress(StringRef InputBuffer, @@ -93,5 +97,8 @@ zlib::Status zlib::uncompress(StringRef InputBuffer, size_t UncompressedSize) { return zlib::StatusUnsupported; } +uint32_t zlib::crc32(StringRef Buffer) { + llvm_unreachable("zlib::crc32 is unavailable"); +} #endif diff --git a/lib/Support/ConstantRange.cpp b/lib/Support/ConstantRange.cpp index bb38cd1..265b6e9 100644 --- a/lib/Support/ConstantRange.cpp +++ b/lib/Support/ConstantRange.cpp @@ -144,9 +144,6 @@ bool ConstantRange::isSignWrappedSet() const { /// getSetSize - Return the number of elements in this set. /// APInt ConstantRange::getSetSize() const { - if (isEmptySet()) - return APInt(getBitWidth()+1, 0); - if (isFullSet()) { APInt Size(getBitWidth()+1, 0); Size.setBit(getBitWidth()); @@ -448,6 +445,11 @@ ConstantRange ConstantRange::signExtend(uint32_t DstTySize) const { unsigned SrcTySize = getBitWidth(); assert(SrcTySize < DstTySize && "Not a value extension"); + + // special case: [X, INT_MIN) -- not really wrapping around + if (Upper.isMinSignedValue()) + return ConstantRange(Lower.sext(DstTySize), Upper.zext(DstTySize)); + if (isFullSet() || isSignWrappedSet()) { return ConstantRange(APInt::getHighBitsSet(DstTySize,DstTySize-SrcTySize+1), APInt::getLowBitsSet(DstTySize, SrcTySize-1) + 1); diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp index d2a3895..92c370d 100644 --- a/lib/Support/CrashRecoveryContext.cpp +++ b/lib/Support/CrashRecoveryContext.cpp @@ -11,6 +11,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/Config/config.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/ThreadLocal.h" #include <cstdio> @@ -21,7 +22,7 @@ namespace { struct CrashRecoveryContextImpl; -static sys::ThreadLocal<const CrashRecoveryContextImpl> CurrentContext; +static ManagedStatic<sys::ThreadLocal<const CrashRecoveryContextImpl> > CurrentContext; struct CrashRecoveryContextImpl { CrashRecoveryContext *CRC; @@ -34,11 +35,11 @@ public: CrashRecoveryContextImpl(CrashRecoveryContext *CRC) : CRC(CRC), Failed(false), SwitchedThread(false) { - CurrentContext.set(this); + CurrentContext->set(this); } ~CrashRecoveryContextImpl() { if (!SwitchedThread) - CurrentContext.erase(); + CurrentContext->erase(); } /// \brief Called when the separate crash-recovery thread was finished, to @@ -48,7 +49,7 @@ public: void HandleCrash() { // Eliminate the current context entry, to avoid re-entering in case the // cleanup code crashes. - CurrentContext.erase(); + CurrentContext->erase(); assert(!Failed && "Crash recovery context already failed!"); Failed = true; @@ -62,10 +63,10 @@ public: } -static sys::Mutex gCrashRecoveryContexMutex; +static ManagedStatic<sys::Mutex> gCrashRecoveryContextMutex; static bool gCrashRecoveryEnabled = false; -static sys::ThreadLocal<const CrashRecoveryContextCleanup> +static ManagedStatic<sys::ThreadLocal<const CrashRecoveryContextCleanup> > tlIsRecoveringFromCrash; CrashRecoveryContextCleanup::~CrashRecoveryContextCleanup() {} @@ -73,7 +74,7 @@ CrashRecoveryContextCleanup::~CrashRecoveryContextCleanup() {} CrashRecoveryContext::~CrashRecoveryContext() { // Reclaim registered resources. CrashRecoveryContextCleanup *i = head; - tlIsRecoveringFromCrash.set(head); + tlIsRecoveringFromCrash->set(head); while (i) { CrashRecoveryContextCleanup *tmp = i; i = tmp->next; @@ -81,21 +82,21 @@ CrashRecoveryContext::~CrashRecoveryContext() { tmp->recoverResources(); delete tmp; } - tlIsRecoveringFromCrash.erase(); + tlIsRecoveringFromCrash->erase(); CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl; delete CRCI; } bool CrashRecoveryContext::isRecoveringFromCrash() { - return tlIsRecoveringFromCrash.get() != 0; + return tlIsRecoveringFromCrash->get() != 0; } CrashRecoveryContext *CrashRecoveryContext::GetCurrent() { if (!gCrashRecoveryEnabled) return 0; - const CrashRecoveryContextImpl *CRCI = CurrentContext.get(); + const CrashRecoveryContextImpl *CRCI = CurrentContext->get(); if (!CRCI) return 0; @@ -154,7 +155,7 @@ CrashRecoveryContext::unregisterCleanup(CrashRecoveryContextCleanup *cleanup) { static LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo) { // Lookup the current thread local recovery object. - const CrashRecoveryContextImpl *CRCI = CurrentContext.get(); + const CrashRecoveryContextImpl *CRCI = CurrentContext->get(); if (!CRCI) { // Something has gone horribly wrong, so let's just tell everyone @@ -182,7 +183,7 @@ static LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo) static sys::ThreadLocal<const void> sCurrentExceptionHandle; void CrashRecoveryContext::Enable() { - sys::ScopedLock L(gCrashRecoveryContexMutex); + sys::ScopedLock L(*gCrashRecoveryContextMutex); if (gCrashRecoveryEnabled) return; @@ -198,7 +199,7 @@ void CrashRecoveryContext::Enable() { } void CrashRecoveryContext::Disable() { - sys::ScopedLock L(gCrashRecoveryContexMutex); + sys::ScopedLock L(*gCrashRecoveryContextMutex); if (!gCrashRecoveryEnabled) return; @@ -236,7 +237,7 @@ static struct sigaction PrevActions[NumSignals]; static void CrashRecoverySignalHandler(int Signal) { // Lookup the current thread local recovery object. - const CrashRecoveryContextImpl *CRCI = CurrentContext.get(); + const CrashRecoveryContextImpl *CRCI = CurrentContext->get(); if (!CRCI) { // We didn't find a crash recovery context -- this means either we got a @@ -267,7 +268,7 @@ static void CrashRecoverySignalHandler(int Signal) { } void CrashRecoveryContext::Enable() { - sys::ScopedLock L(gCrashRecoveryContexMutex); + sys::ScopedLock L(*gCrashRecoveryContextMutex); if (gCrashRecoveryEnabled) return; @@ -286,7 +287,7 @@ void CrashRecoveryContext::Enable() { } void CrashRecoveryContext::Disable() { - sys::ScopedLock L(gCrashRecoveryContexMutex); + sys::ScopedLock L(*gCrashRecoveryContextMutex); if (!gCrashRecoveryEnabled) return; diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp index 8a80139..c000b63 100644 --- a/lib/Support/Dwarf.cpp +++ b/lib/Support/Dwarf.cpp @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" + using namespace llvm; using namespace dwarf; @@ -59,8 +61,8 @@ const char *llvm::dwarf::TagString(unsigned Tag) { case DW_TAG_namelist_item: return "DW_TAG_namelist_item"; case DW_TAG_packed_type: return "DW_TAG_packed_type"; case DW_TAG_subprogram: return "DW_TAG_subprogram"; - case DW_TAG_template_type_parameter: return "DW_TAG_template_type_parameter"; - case DW_TAG_template_value_parameter:return "DW_TAG_template_value_parameter"; + case DW_TAG_template_type_parameter: return "DW_TAG_template_type_parameter"; + case DW_TAG_template_value_parameter: return "DW_TAG_template_value_parameter"; case DW_TAG_thrown_type: return "DW_TAG_thrown_type"; case DW_TAG_try_block: return "DW_TAG_try_block"; case DW_TAG_variant_part: return "DW_TAG_variant_part"; @@ -454,10 +456,11 @@ const char *llvm::dwarf::OperationEncodingString(unsigned Encoding) { case DW_OP_bit_piece: return "DW_OP_bit_piece"; case DW_OP_implicit_value: return "DW_OP_implicit_value"; case DW_OP_stack_value: return "DW_OP_stack_value"; - case DW_OP_lo_user: return "DW_OP_lo_user"; - case DW_OP_hi_user: return "DW_OP_hi_user"; - // DWARF5 Fission Proposal Op Extensions + // GNU thread-local storage + case DW_OP_GNU_push_tls_address: return "DW_OP_GNU_push_tls_address"; + + // DWARF5 Fission Proposal Op Extensions case DW_OP_GNU_addr_index: return "DW_OP_GNU_addr_index"; case DW_OP_GNU_const_index: return "DW_OP_GNU_const_index"; } @@ -723,3 +726,51 @@ const char *llvm::dwarf::CallFrameString(unsigned Encoding) { } return 0; } + +const char *llvm::dwarf::AtomTypeString(unsigned AT) { + switch (AT) { + case dwarf::DW_ATOM_null: + return "DW_ATOM_null"; + case dwarf::DW_ATOM_die_offset: + return "DW_ATOM_die_offset"; + case DW_ATOM_cu_offset: + return "DW_ATOM_cu_offset"; + case DW_ATOM_die_tag: + return "DW_ATOM_die_tag"; + case DW_ATOM_type_flags: + return "DW_ATOM_type_flags"; + } + return 0; +} + +const char *llvm::dwarf::GDBIndexEntryKindString(GDBIndexEntryKind Kind) { + switch (Kind) { + case GIEK_NONE: + return "NONE"; + case GIEK_TYPE: + return "TYPE"; + case GIEK_VARIABLE: + return "VARIABLE"; + case GIEK_FUNCTION: + return "FUNCTION"; + case GIEK_OTHER: + return "OTHER"; + case GIEK_UNUSED5: + return "UNUSED5"; + case GIEK_UNUSED6: + return "UNUSED6"; + case GIEK_UNUSED7: + return "UNUSED7"; + } + llvm_unreachable("Unknown GDBIndexEntryKind value"); +} + +const char *llvm::dwarf::GDBIndexEntryLinkageString(GDBIndexEntryLinkage Linkage) { + switch (Linkage) { + case GIEL_EXTERNAL: + return "EXTERNAL"; + case GIEL_STATIC: + return "STATIC"; + } + llvm_unreachable("Unknown GDBIndexEntryLinkage value"); +} diff --git a/lib/Support/DynamicLibrary.cpp b/lib/Support/DynamicLibrary.cpp index f14cb45..a825c68 100644 --- a/lib/Support/DynamicLibrary.cpp +++ b/lib/Support/DynamicLibrary.cpp @@ -14,39 +14,22 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/ManagedStatic.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringMap.h" #include "llvm/Config/config.h" #include "llvm/Support/Mutex.h" +#include "llvm-c/Support.h" #include <cstdio> #include <cstring> // Collection of symbol name/value pairs to be searched prior to any libraries. -static llvm::StringMap<void *> *ExplicitSymbols = 0; - -namespace { - -struct ExplicitSymbolsDeleter { - ~ExplicitSymbolsDeleter() { - delete ExplicitSymbols; - } -}; - -} - -static ExplicitSymbolsDeleter Dummy; - - -static llvm::sys::SmartMutex<true>& getMutex() { - static llvm::sys::SmartMutex<true> HandlesMutex; - return HandlesMutex; -} +static llvm::ManagedStatic<llvm::StringMap<void *> > ExplicitSymbols; +static llvm::ManagedStatic<llvm::sys::SmartMutex<true> > SymbolsMutex; void llvm::sys::DynamicLibrary::AddSymbol(StringRef symbolName, void *symbolValue) { - SmartScopedLock<true> lock(getMutex()); - if (ExplicitSymbols == 0) - ExplicitSymbols = new StringMap<void*>(); + SmartScopedLock<true> lock(*SymbolsMutex); (*ExplicitSymbols)[symbolName] = symbolValue; } @@ -72,7 +55,7 @@ static DenseSet<void *> *OpenedHandles = 0; DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename, std::string *errMsg) { - SmartScopedLock<true> lock(getMutex()); + SmartScopedLock<true> lock(*SymbolsMutex); void *handle = dlopen(filename, RTLD_LAZY|RTLD_GLOBAL); if (handle == 0) { @@ -126,10 +109,10 @@ void *SearchForAddressOfSpecialSymbol(const char* symbolName); } void* DynamicLibrary::SearchForAddressOfSymbol(const char *symbolName) { - SmartScopedLock<true> Lock(getMutex()); + SmartScopedLock<true> Lock(*SymbolsMutex); // First check symbols added via AddSymbol(). - if (ExplicitSymbols) { + if (ExplicitSymbols.isConstructed()) { StringMap<void *>::iterator i = ExplicitSymbols->find(symbolName); if (i != ExplicitSymbols->end()) @@ -187,3 +170,11 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char *symbolName) { } #endif // LLVM_ON_WIN32 + +//===----------------------------------------------------------------------===// +// C API. +//===----------------------------------------------------------------------===// + +LLVMBool LLVMLoadLibraryPermanently(const char* Filename) { + return llvm::sys::DynamicLibrary::LoadLibraryPermanently(Filename); +} diff --git a/lib/Support/Errno.cpp b/lib/Support/Errno.cpp index ed17f60..1eefa3e 100644 --- a/lib/Support/Errno.cpp +++ b/lib/Support/Errno.cpp @@ -39,28 +39,27 @@ std::string StrError(int errnum) { char buffer[MaxErrStrLen]; buffer[0] = '\0'; std::string str; + if (errnum == 0) + return str; + #ifdef HAVE_STRERROR_R // strerror_r is thread-safe. - if (errnum) -# if defined(__GLIBC__) && defined(_GNU_SOURCE) - // glibc defines its own incompatible version of strerror_r - // which may not use the buffer supplied. - str = strerror_r(errnum,buffer,MaxErrStrLen-1); -# else - strerror_r(errnum,buffer,MaxErrStrLen-1); - str = buffer; -# endif +#if defined(__GLIBC__) && defined(_GNU_SOURCE) + // glibc defines its own incompatible version of strerror_r + // which may not use the buffer supplied. + str = strerror_r(errnum, buffer, MaxErrStrLen - 1); +#else + strerror_r(errnum, buffer, MaxErrStrLen - 1); + str = buffer; +#endif #elif HAVE_DECL_STRERROR_S // "Windows Secure API" - if (errnum) { - strerror_s(buffer, MaxErrStrLen - 1, errnum); - str = buffer; - } + strerror_s(buffer, MaxErrStrLen - 1, errnum); + str = buffer; #elif defined(HAVE_STRERROR) // Copy the thread un-safe result of strerror into // the buffer as fast as possible to minimize impact // of collision of strerror in multiple threads. - if (errnum) - str = strerror(errnum); + str = strerror(errnum); #else // Strange that this system doesn't even have strerror // but, oh well, just use a generic message diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp index 9425445..1eafb96 100644 --- a/lib/Support/ErrorHandling.cpp +++ b/lib/Support/ErrorHandling.cpp @@ -20,6 +20,7 @@ #include "llvm/Support/Signals.h" #include "llvm/Support/Threading.h" #include "llvm/Support/raw_ostream.h" +#include "llvm-c/Core.h" #include <cassert> #include <cstdlib> @@ -102,3 +103,19 @@ void llvm::llvm_unreachable_internal(const char *msg, const char *file, LLVM_BUILTIN_UNREACHABLE; #endif } + +static void bindingsErrorHandler(void *user_data, const std::string& reason, + bool gen_crash_diag) { + LLVMFatalErrorHandler handler = + LLVM_EXTENSION reinterpret_cast<LLVMFatalErrorHandler>(user_data); + handler(reason.c_str()); +} + +void LLVMInstallFatalErrorHandler(LLVMFatalErrorHandler Handler) { + install_fatal_error_handler(bindingsErrorHandler, + LLVM_EXTENSION reinterpret_cast<void *>(Handler)); +} + +void LLVMResetFatalErrorHandler() { + remove_fatal_error_handler(); +} diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp index 7a9400d..85be415 100644 --- a/lib/Support/GraphWriter.cpp +++ b/lib/Support/GraphWriter.cpp @@ -219,5 +219,8 @@ void llvm::DisplayGraph(StringRef FilenameRef, bool wait, errs() << "Running 'dotty' program... "; if (!ExecGraphViewer(dotty, args, Filename, wait, ErrMsg)) return; +#else + (void)Filename; + (void)ErrMsg; #endif } diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index 90e4389..6e9a5c9 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -52,8 +52,54 @@ using namespace llvm; /// GetX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in the /// specified arguments. If we can't run cpuid on the host, return true. -static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX, - unsigned *rEBX, unsigned *rECX, unsigned *rEDX) { +static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, + unsigned *rECX, unsigned *rEDX) { +#if defined(__GNUC__) || defined(__clang__) + #if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64) + // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually. + asm ("movq\t%%rbx, %%rsi\n\t" + "cpuid\n\t" + "xchgq\t%%rbx, %%rsi\n\t" + : "=a" (*rEAX), + "=S" (*rEBX), + "=c" (*rECX), + "=d" (*rEDX) + : "a" (value)); + return false; + #elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86) + asm ("movl\t%%ebx, %%esi\n\t" + "cpuid\n\t" + "xchgl\t%%ebx, %%esi\n\t" + : "=a" (*rEAX), + "=S" (*rEBX), + "=c" (*rECX), + "=d" (*rEDX) + : "a" (value)); + return false; +// pedantic #else returns to appease -Wunreachable-code (so we don't generate +// postprocessed code that looks like "return true; return false;") + #else + return true; + #endif +#elif defined(_MSC_VER) + // The MSVC intrinsic is portable across x86 and x64. + int registers[4]; + __cpuid(registers, value); + *rEAX = registers[0]; + *rEBX = registers[1]; + *rECX = registers[2]; + *rEDX = registers[3]; + return false; +#else + return true; +#endif +} + +/// GetX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return the +/// 4 values in the specified arguments. If we can't run cpuid on the host, +/// return true. +bool GetX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX, + unsigned *rEBX, unsigned *rECX, unsigned *rEDX) { #if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64) #if defined(__GNUC__) // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually. @@ -64,16 +110,22 @@ static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX, "=S" (*rEBX), "=c" (*rECX), "=d" (*rEDX) - : "a" (value)); + : "a" (value), + "c" (subleaf)); return false; #elif defined(_MSC_VER) - int registers[4]; - __cpuid(registers, value); - *rEAX = registers[0]; - *rEBX = registers[1]; - *rECX = registers[2]; - *rEDX = registers[3]; - return false; + // __cpuidex was added in MSVC++ 9.0 SP1 + #if (_MSC_VER > 1500) || (_MSC_VER == 1500 && _MSC_FULL_VER >= 150030729) + int registers[4]; + __cpuidex(registers, value, subleaf); + *rEAX = registers[0]; + *rEBX = registers[1]; + *rECX = registers[2]; + *rEDX = registers[3]; + return false; + #else + return true; + #endif #else return true; #endif @@ -86,11 +138,13 @@ static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX, "=S" (*rEBX), "=c" (*rECX), "=d" (*rEDX) - : "a" (value)); + : "a" (value), + "c" (subleaf)); return false; #elif defined(_MSC_VER) __asm { mov eax,value + mov ecx,subleaf cpuid mov esi,rEAX mov dword ptr [esi],eax @@ -102,8 +156,6 @@ static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX, mov dword ptr [esi],edx } return false; -// pedantic #else returns to appease -Wunreachable-code (so we don't generate -// postprocessed code that looks like "return true; return false;") #else return true; #endif @@ -148,6 +200,14 @@ std::string sys::getHostCPUName() { unsigned Model = 0; DetectX86FamilyModel(EAX, Family, Model); + union { + unsigned u[3]; + char c[12]; + } text; + + GetX86CpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1); + + unsigned MaxLeaf = EAX; bool HasSSE3 = (ECX & 0x1); bool HasSSE41 = (ECX & 0x80000); // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV @@ -155,15 +215,12 @@ std::string sys::getHostCPUName() { // switch, then we have full AVX support. const unsigned AVXBits = (1 << 27) | (1 << 28); bool HasAVX = ((ECX & AVXBits) == AVXBits) && OSHasAVXSupport(); + bool HasAVX2 = HasAVX && MaxLeaf >= 0x7 && + !GetX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX) && + (EBX & 0x20); GetX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); bool Em64T = (EDX >> 29) & 0x1; - union { - unsigned u[3]; - char c[12]; - } text; - - GetX86CpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1); if (memcmp(text.c, "GenuineIntel", 12) == 0) { switch (Family) { case 3: @@ -271,10 +328,20 @@ std::string sys::getHostCPUName() { // Ivy Bridge: case 58: + case 62: // Ivy Bridge EP // Not all Ivy Bridge processors support AVX (such as the Pentium // versions instead of the i7 versions). return HasAVX ? "core-avx-i" : "corei7"; + // Haswell: + case 60: + case 63: + case 69: + case 70: + // Not all Haswell processors support AVX too (such as the Pentium + // versions instead of the i7 versions). + return HasAVX2 ? "core-avx2" : "corei7"; + case 28: // Most 45 nm Intel Atom processors case 38: // 45 nm Atom Lincroft case 39: // 32 nm Atom Medfield @@ -282,6 +349,12 @@ std::string sys::getHostCPUName() { case 54: // 32 nm Atom Midview return "atom"; + // Atom Silvermont codes from the Intel software optimization guide. + case 55: + case 74: + case 77: + return "slm"; + default: return (Em64T) ? "x86-64" : "i686"; } case 15: { @@ -359,9 +432,11 @@ std::string sys::getHostCPUName() { case 21: if (!HasAVX) // If the OS doesn't support AVX provide a sane fallback. return "btver1"; - if (Model > 15 && Model <= 31) - return "bdver2"; - return "bdver1"; + if (Model >= 0x30) + return "bdver3"; // 30h-3Fh: Steamroller + if (Model >= 0x10) + return "bdver2"; // 10h-1Fh: Piledriver + return "bdver1"; // 00h-0Fh: Bulldozer case 22: if (!HasAVX) // If the OS doesn't support AVX provide a sane fallback. return "btver1"; @@ -546,6 +621,48 @@ std::string sys::getHostCPUName() { return "generic"; } +#elif defined(__linux__) && defined(__s390x__) +std::string sys::getHostCPUName() { + // STIDP is a privileged operation, so use /proc/cpuinfo instead. + // Note: We cannot mmap /proc/cpuinfo here and then process the resulting + // memory buffer because the 'file' has 0 size (it can be read from only + // as a stream). + + std::string Err; + DataStreamer *DS = getDataFileStreamer("/proc/cpuinfo", &Err); + if (!DS) { + DEBUG(dbgs() << "Unable to open /proc/cpuinfo: " << Err << "\n"); + return "generic"; + } + + // The "processor 0:" line comes after a fair amount of other information, + // including a cache breakdown, but this should be plenty. + char buffer[2048]; + size_t CPUInfoSize = DS->GetBytes((unsigned char*) buffer, sizeof(buffer)); + delete DS; + + StringRef Str(buffer, CPUInfoSize); + SmallVector<StringRef, 32> Lines; + Str.split(Lines, "\n"); + for (unsigned I = 0, E = Lines.size(); I != E; ++I) { + if (Lines[I].startswith("processor ")) { + size_t Pos = Lines[I].find("machine = "); + if (Pos != StringRef::npos) { + Pos += sizeof("machine = ") - 1; + unsigned int Id; + if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) { + if (Id >= 2827) + return "zEC12"; + if (Id >= 2817) + return "z196"; + } + } + break; + } + } + + return "generic"; +} #else std::string sys::getHostCPUName() { return "generic"; diff --git a/lib/Support/Locale.cpp b/lib/Support/Locale.cpp index 17b9b6c..35ddf7f 100644 --- a/lib/Support/Locale.cpp +++ b/lib/Support/Locale.cpp @@ -1,10 +1,31 @@ #include "llvm/Support/Locale.h" -#include "llvm/Config/config.h" +#include "llvm/Support/Unicode.h" -#ifdef __APPLE__ -#include "LocaleXlocale.inc" -#elif LLVM_ON_WIN32 -#include "LocaleWindows.inc" +namespace llvm { +namespace sys { +namespace locale { + +int columnWidth(StringRef Text) { +#if LLVM_ON_WIN32 + return Text.size(); #else -#include "LocaleGeneric.inc" + return llvm::sys::unicode::columnWidthUTF8(Text); #endif +} + +bool isPrint(int UCS) { +#if LLVM_ON_WIN32 + // Restrict characters that we'll try to print to the the lower part of ASCII + // except for the control characters (0x20 - 0x7E). In general one can not + // reliably output code points U+0080 and higher using narrow character C/C++ + // output functions in Windows, because the meaning of the upper 128 codes is + // determined by the active code page in the console. + return ' ' <= UCS && UCS <= '~'; +#else + return llvm::sys::unicode::isPrintable(UCS); +#endif +} + +} // namespace locale +} // namespace sys +} // namespace llvm diff --git a/lib/Support/LocaleGeneric.inc b/lib/Support/LocaleGeneric.inc deleted file mode 100644 index 3a939b8..0000000 --- a/lib/Support/LocaleGeneric.inc +++ /dev/null @@ -1,382 +0,0 @@ -//===- llvm/Support/LocaleGeneric.inc - Locale-dependent stuff -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements llvm::sys::locale::columnWidth and -// llvm::sys::locale::isPrint functions for UTF-8 locales. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/Support/ConvertUTF.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/UnicodeCharRanges.h" - -namespace llvm { -namespace sys { -namespace locale { - -enum ColumnWidthErrors { - ErrorInvalidUTF8 = -2, - ErrorNonPrintableCharacter = -1 -}; - -/// Determines if a character is likely to be displayed correctly on the -/// terminal. Exact implementation would have to depend on the specific -/// terminal, so we define the semantic that should be suitable for generic case -/// of a terminal capable to output Unicode characters. -/// All characters from the Unicode codepoint range are considered printable -/// except for: -/// * C0 and C1 control character ranges; -/// * default ignorable code points as per 5.21 of -/// http://www.unicode.org/versions/Unicode6.2.0/UnicodeStandard-6.2.pdf -/// * format characters (category = Cf); -/// * surrogates (category = Cs); -/// * unassigned characters (category = Cn). -/// \return true if the character is considered printable. -bool isPrint(int UCS) { - // Sorted list of non-overlapping intervals of code points that are not - // supposed to be printable. - static const UnicodeCharRange NonPrintableRanges[] = { - { 0x0000, 0x001F }, { 0x007F, 0x009F }, { 0x00AD, 0x00AD }, - { 0x034F, 0x034F }, { 0x0378, 0x0379 }, { 0x037F, 0x0383 }, - { 0x038B, 0x038B }, { 0x038D, 0x038D }, { 0x03A2, 0x03A2 }, - { 0x0528, 0x0530 }, { 0x0557, 0x0558 }, { 0x0560, 0x0560 }, - { 0x0588, 0x0588 }, { 0x058B, 0x058E }, { 0x0590, 0x0590 }, - { 0x05C8, 0x05CF }, { 0x05EB, 0x05EF }, { 0x05F5, 0x0605 }, - { 0x061C, 0x061D }, { 0x06DD, 0x06DD }, { 0x070E, 0x070F }, - { 0x074B, 0x074C }, { 0x07B2, 0x07BF }, { 0x07FB, 0x07FF }, - { 0x082E, 0x082F }, { 0x083F, 0x083F }, { 0x085C, 0x085D }, - { 0x085F, 0x089F }, { 0x08A1, 0x08A1 }, { 0x08AD, 0x08E3 }, - { 0x08FF, 0x08FF }, { 0x0978, 0x0978 }, { 0x0980, 0x0980 }, - { 0x0984, 0x0984 }, { 0x098D, 0x098E }, { 0x0991, 0x0992 }, - { 0x09A9, 0x09A9 }, { 0x09B1, 0x09B1 }, { 0x09B3, 0x09B5 }, - { 0x09BA, 0x09BB }, { 0x09C5, 0x09C6 }, { 0x09C9, 0x09CA }, - { 0x09CF, 0x09D6 }, { 0x09D8, 0x09DB }, { 0x09DE, 0x09DE }, - { 0x09E4, 0x09E5 }, { 0x09FC, 0x0A00 }, { 0x0A04, 0x0A04 }, - { 0x0A0B, 0x0A0E }, { 0x0A11, 0x0A12 }, { 0x0A29, 0x0A29 }, - { 0x0A31, 0x0A31 }, { 0x0A34, 0x0A34 }, { 0x0A37, 0x0A37 }, - { 0x0A3A, 0x0A3B }, { 0x0A3D, 0x0A3D }, { 0x0A43, 0x0A46 }, - { 0x0A49, 0x0A4A }, { 0x0A4E, 0x0A50 }, { 0x0A52, 0x0A58 }, - { 0x0A5D, 0x0A5D }, { 0x0A5F, 0x0A65 }, { 0x0A76, 0x0A80 }, - { 0x0A84, 0x0A84 }, { 0x0A8E, 0x0A8E }, { 0x0A92, 0x0A92 }, - { 0x0AA9, 0x0AA9 }, { 0x0AB1, 0x0AB1 }, { 0x0AB4, 0x0AB4 }, - { 0x0ABA, 0x0ABB }, { 0x0AC6, 0x0AC6 }, { 0x0ACA, 0x0ACA }, - { 0x0ACE, 0x0ACF }, { 0x0AD1, 0x0ADF }, { 0x0AE4, 0x0AE5 }, - { 0x0AF2, 0x0B00 }, { 0x0B04, 0x0B04 }, { 0x0B0D, 0x0B0E }, - { 0x0B11, 0x0B12 }, { 0x0B29, 0x0B29 }, { 0x0B31, 0x0B31 }, - { 0x0B34, 0x0B34 }, { 0x0B3A, 0x0B3B }, { 0x0B45, 0x0B46 }, - { 0x0B49, 0x0B4A }, { 0x0B4E, 0x0B55 }, { 0x0B58, 0x0B5B }, - { 0x0B5E, 0x0B5E }, { 0x0B64, 0x0B65 }, { 0x0B78, 0x0B81 }, - { 0x0B84, 0x0B84 }, { 0x0B8B, 0x0B8D }, { 0x0B91, 0x0B91 }, - { 0x0B96, 0x0B98 }, { 0x0B9B, 0x0B9B }, { 0x0B9D, 0x0B9D }, - { 0x0BA0, 0x0BA2 }, { 0x0BA5, 0x0BA7 }, { 0x0BAB, 0x0BAD }, - { 0x0BBA, 0x0BBD }, { 0x0BC3, 0x0BC5 }, { 0x0BC9, 0x0BC9 }, - { 0x0BCE, 0x0BCF }, { 0x0BD1, 0x0BD6 }, { 0x0BD8, 0x0BE5 }, - { 0x0BFB, 0x0C00 }, { 0x0C04, 0x0C04 }, { 0x0C0D, 0x0C0D }, - { 0x0C11, 0x0C11 }, { 0x0C29, 0x0C29 }, { 0x0C34, 0x0C34 }, - { 0x0C3A, 0x0C3C }, { 0x0C45, 0x0C45 }, { 0x0C49, 0x0C49 }, - { 0x0C4E, 0x0C54 }, { 0x0C57, 0x0C57 }, { 0x0C5A, 0x0C5F }, - { 0x0C64, 0x0C65 }, { 0x0C70, 0x0C77 }, { 0x0C80, 0x0C81 }, - { 0x0C84, 0x0C84 }, { 0x0C8D, 0x0C8D }, { 0x0C91, 0x0C91 }, - { 0x0CA9, 0x0CA9 }, { 0x0CB4, 0x0CB4 }, { 0x0CBA, 0x0CBB }, - { 0x0CC5, 0x0CC5 }, { 0x0CC9, 0x0CC9 }, { 0x0CCE, 0x0CD4 }, - { 0x0CD7, 0x0CDD }, { 0x0CDF, 0x0CDF }, { 0x0CE4, 0x0CE5 }, - { 0x0CF0, 0x0CF0 }, { 0x0CF3, 0x0D01 }, { 0x0D04, 0x0D04 }, - { 0x0D0D, 0x0D0D }, { 0x0D11, 0x0D11 }, { 0x0D3B, 0x0D3C }, - { 0x0D45, 0x0D45 }, { 0x0D49, 0x0D49 }, { 0x0D4F, 0x0D56 }, - { 0x0D58, 0x0D5F }, { 0x0D64, 0x0D65 }, { 0x0D76, 0x0D78 }, - { 0x0D80, 0x0D81 }, { 0x0D84, 0x0D84 }, { 0x0D97, 0x0D99 }, - { 0x0DB2, 0x0DB2 }, { 0x0DBC, 0x0DBC }, { 0x0DBE, 0x0DBF }, - { 0x0DC7, 0x0DC9 }, { 0x0DCB, 0x0DCE }, { 0x0DD5, 0x0DD5 }, - { 0x0DD7, 0x0DD7 }, { 0x0DE0, 0x0DF1 }, { 0x0DF5, 0x0E00 }, - { 0x0E3B, 0x0E3E }, { 0x0E5C, 0x0E80 }, { 0x0E83, 0x0E83 }, - { 0x0E85, 0x0E86 }, { 0x0E89, 0x0E89 }, { 0x0E8B, 0x0E8C }, - { 0x0E8E, 0x0E93 }, { 0x0E98, 0x0E98 }, { 0x0EA0, 0x0EA0 }, - { 0x0EA4, 0x0EA4 }, { 0x0EA6, 0x0EA6 }, { 0x0EA8, 0x0EA9 }, - { 0x0EAC, 0x0EAC }, { 0x0EBA, 0x0EBA }, { 0x0EBE, 0x0EBF }, - { 0x0EC5, 0x0EC5 }, { 0x0EC7, 0x0EC7 }, { 0x0ECE, 0x0ECF }, - { 0x0EDA, 0x0EDB }, { 0x0EE0, 0x0EFF }, { 0x0F48, 0x0F48 }, - { 0x0F6D, 0x0F70 }, { 0x0F98, 0x0F98 }, { 0x0FBD, 0x0FBD }, - { 0x0FCD, 0x0FCD }, { 0x0FDB, 0x0FFF }, { 0x10C6, 0x10C6 }, - { 0x10C8, 0x10CC }, { 0x10CE, 0x10CF }, { 0x115F, 0x1160 }, - { 0x1249, 0x1249 }, { 0x124E, 0x124F }, { 0x1257, 0x1257 }, - { 0x1259, 0x1259 }, { 0x125E, 0x125F }, { 0x1289, 0x1289 }, - { 0x128E, 0x128F }, { 0x12B1, 0x12B1 }, { 0x12B6, 0x12B7 }, - { 0x12BF, 0x12BF }, { 0x12C1, 0x12C1 }, { 0x12C6, 0x12C7 }, - { 0x12D7, 0x12D7 }, { 0x1311, 0x1311 }, { 0x1316, 0x1317 }, - { 0x135B, 0x135C }, { 0x137D, 0x137F }, { 0x139A, 0x139F }, - { 0x13F5, 0x13FF }, { 0x169D, 0x169F }, { 0x16F1, 0x16FF }, - { 0x170D, 0x170D }, { 0x1715, 0x171F }, { 0x1737, 0x173F }, - { 0x1754, 0x175F }, { 0x176D, 0x176D }, { 0x1771, 0x1771 }, - { 0x1774, 0x177F }, { 0x17B4, 0x17B5 }, { 0x17DE, 0x17DF }, - { 0x17EA, 0x17EF }, { 0x17FA, 0x17FF }, { 0x180B, 0x180D }, - { 0x180F, 0x180F }, { 0x181A, 0x181F }, { 0x1878, 0x187F }, - { 0x18AB, 0x18AF }, { 0x18F6, 0x18FF }, { 0x191D, 0x191F }, - { 0x192C, 0x192F }, { 0x193C, 0x193F }, { 0x1941, 0x1943 }, - { 0x196E, 0x196F }, { 0x1975, 0x197F }, { 0x19AC, 0x19AF }, - { 0x19CA, 0x19CF }, { 0x19DB, 0x19DD }, { 0x1A1C, 0x1A1D }, - { 0x1A5F, 0x1A5F }, { 0x1A7D, 0x1A7E }, { 0x1A8A, 0x1A8F }, - { 0x1A9A, 0x1A9F }, { 0x1AAE, 0x1AFF }, { 0x1B4C, 0x1B4F }, - { 0x1B7D, 0x1B7F }, { 0x1BF4, 0x1BFB }, { 0x1C38, 0x1C3A }, - { 0x1C4A, 0x1C4C }, { 0x1C80, 0x1CBF }, { 0x1CC8, 0x1CCF }, - { 0x1CF7, 0x1CFF }, { 0x1DE7, 0x1DFB }, { 0x1F16, 0x1F17 }, - { 0x1F1E, 0x1F1F }, { 0x1F46, 0x1F47 }, { 0x1F4E, 0x1F4F }, - { 0x1F58, 0x1F58 }, { 0x1F5A, 0x1F5A }, { 0x1F5C, 0x1F5C }, - { 0x1F5E, 0x1F5E }, { 0x1F7E, 0x1F7F }, { 0x1FB5, 0x1FB5 }, - { 0x1FC5, 0x1FC5 }, { 0x1FD4, 0x1FD5 }, { 0x1FDC, 0x1FDC }, - { 0x1FF0, 0x1FF1 }, { 0x1FF5, 0x1FF5 }, { 0x1FFF, 0x1FFF }, - { 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x206F }, - { 0x2072, 0x2073 }, { 0x208F, 0x208F }, { 0x209D, 0x209F }, - { 0x20BB, 0x20CF }, { 0x20F1, 0x20FF }, { 0x218A, 0x218F }, - { 0x23F4, 0x23FF }, { 0x2427, 0x243F }, { 0x244B, 0x245F }, - { 0x2700, 0x2700 }, { 0x2B4D, 0x2B4F }, { 0x2B5A, 0x2BFF }, - { 0x2C2F, 0x2C2F }, { 0x2C5F, 0x2C5F }, { 0x2CF4, 0x2CF8 }, - { 0x2D26, 0x2D26 }, { 0x2D28, 0x2D2C }, { 0x2D2E, 0x2D2F }, - { 0x2D68, 0x2D6E }, { 0x2D71, 0x2D7E }, { 0x2D97, 0x2D9F }, - { 0x2DA7, 0x2DA7 }, { 0x2DAF, 0x2DAF }, { 0x2DB7, 0x2DB7 }, - { 0x2DBF, 0x2DBF }, { 0x2DC7, 0x2DC7 }, { 0x2DCF, 0x2DCF }, - { 0x2DD7, 0x2DD7 }, { 0x2DDF, 0x2DDF }, { 0x2E3C, 0x2E7F }, - { 0x2E9A, 0x2E9A }, { 0x2EF4, 0x2EFF }, { 0x2FD6, 0x2FEF }, - { 0x2FFC, 0x2FFF }, { 0x3040, 0x3040 }, { 0x3097, 0x3098 }, - { 0x3100, 0x3104 }, { 0x312E, 0x3130 }, { 0x3164, 0x3164 }, - { 0x318F, 0x318F }, { 0x31BB, 0x31BF }, { 0x31E4, 0x31EF }, - { 0x321F, 0x321F }, { 0x32FF, 0x32FF }, { 0x4DB6, 0x4DBF }, - { 0x9FCD, 0x9FFF }, { 0xA48D, 0xA48F }, { 0xA4C7, 0xA4CF }, - { 0xA62C, 0xA63F }, { 0xA698, 0xA69E }, { 0xA6F8, 0xA6FF }, - { 0xA78F, 0xA78F }, { 0xA794, 0xA79F }, { 0xA7AB, 0xA7F7 }, - { 0xA82C, 0xA82F }, { 0xA83A, 0xA83F }, { 0xA878, 0xA87F }, - { 0xA8C5, 0xA8CD }, { 0xA8DA, 0xA8DF }, { 0xA8FC, 0xA8FF }, - { 0xA954, 0xA95E }, { 0xA97D, 0xA97F }, { 0xA9CE, 0xA9CE }, - { 0xA9DA, 0xA9DD }, { 0xA9E0, 0xA9FF }, { 0xAA37, 0xAA3F }, - { 0xAA4E, 0xAA4F }, { 0xAA5A, 0xAA5B }, { 0xAA7C, 0xAA7F }, - { 0xAAC3, 0xAADA }, { 0xAAF7, 0xAB00 }, { 0xAB07, 0xAB08 }, - { 0xAB0F, 0xAB10 }, { 0xAB17, 0xAB1F }, { 0xAB27, 0xAB27 }, - { 0xAB2F, 0xABBF }, { 0xABEE, 0xABEF }, { 0xABFA, 0xABFF }, - { 0xD7A4, 0xD7AF }, { 0xD7C7, 0xD7CA }, { 0xD7FC, 0xDFFF }, - { 0xFA6E, 0xFA6F }, { 0xFADA, 0xFAFF }, { 0xFB07, 0xFB12 }, - { 0xFB18, 0xFB1C }, { 0xFB37, 0xFB37 }, { 0xFB3D, 0xFB3D }, - { 0xFB3F, 0xFB3F }, { 0xFB42, 0xFB42 }, { 0xFB45, 0xFB45 }, - { 0xFBC2, 0xFBD2 }, { 0xFD40, 0xFD4F }, { 0xFD90, 0xFD91 }, - { 0xFDC8, 0xFDEF }, { 0xFDFE, 0xFE0F }, { 0xFE1A, 0xFE1F }, - { 0xFE27, 0xFE2F }, { 0xFE53, 0xFE53 }, { 0xFE67, 0xFE67 }, - { 0xFE6C, 0xFE6F }, { 0xFE75, 0xFE75 }, { 0xFEFD, 0xFEFF }, - { 0xFF00, 0xFF00 }, { 0xFFA0, 0xFFA0 }, { 0xFFBF, 0xFFC1 }, - { 0xFFC8, 0xFFC9 }, { 0xFFD0, 0xFFD1 }, { 0xFFD8, 0xFFD9 }, - { 0xFFDD, 0xFFDF }, { 0xFFE7, 0xFFE7 }, { 0xFFEF, 0xFFFB }, - { 0xFFFE, 0xFFFF }, { 0x1000C, 0x1000C }, { 0x10027, 0x10027 }, - { 0x1003B, 0x1003B }, { 0x1003E, 0x1003E }, { 0x1004E, 0x1004F }, - { 0x1005E, 0x1007F }, { 0x100FB, 0x100FF }, { 0x10103, 0x10106 }, - { 0x10134, 0x10136 }, { 0x1018B, 0x1018F }, { 0x1019C, 0x101CF }, - { 0x101FE, 0x1027F }, { 0x1029D, 0x1029F }, { 0x102D1, 0x102FF }, - { 0x1031F, 0x1031F }, { 0x10324, 0x1032F }, { 0x1034B, 0x1037F }, - { 0x1039E, 0x1039E }, { 0x103C4, 0x103C7 }, { 0x103D6, 0x103FF }, - { 0x1049E, 0x1049F }, { 0x104AA, 0x107FF }, { 0x10806, 0x10807 }, - { 0x10809, 0x10809 }, { 0x10836, 0x10836 }, { 0x10839, 0x1083B }, - { 0x1083D, 0x1083E }, { 0x10856, 0x10856 }, { 0x10860, 0x108FF }, - { 0x1091C, 0x1091E }, { 0x1093A, 0x1093E }, { 0x10940, 0x1097F }, - { 0x109B8, 0x109BD }, { 0x109C0, 0x109FF }, { 0x10A04, 0x10A04 }, - { 0x10A07, 0x10A0B }, { 0x10A14, 0x10A14 }, { 0x10A18, 0x10A18 }, - { 0x10A34, 0x10A37 }, { 0x10A3B, 0x10A3E }, { 0x10A48, 0x10A4F }, - { 0x10A59, 0x10A5F }, { 0x10A80, 0x10AFF }, { 0x10B36, 0x10B38 }, - { 0x10B56, 0x10B57 }, { 0x10B73, 0x10B77 }, { 0x10B80, 0x10BFF }, - { 0x10C49, 0x10E5F }, { 0x10E7F, 0x10FFF }, { 0x1104E, 0x11051 }, - { 0x11070, 0x1107F }, { 0x110BD, 0x110BD }, { 0x110C2, 0x110CF }, - { 0x110E9, 0x110EF }, { 0x110FA, 0x110FF }, { 0x11135, 0x11135 }, - { 0x11144, 0x1117F }, { 0x111C9, 0x111CF }, { 0x111DA, 0x1167F }, - { 0x116B8, 0x116BF }, { 0x116CA, 0x11FFF }, { 0x1236F, 0x123FF }, - { 0x12463, 0x1246F }, { 0x12474, 0x12FFF }, { 0x1342F, 0x167FF }, - { 0x16A39, 0x16EFF }, { 0x16F45, 0x16F4F }, { 0x16F7F, 0x16F8E }, - { 0x16FA0, 0x1AFFF }, { 0x1B002, 0x1CFFF }, { 0x1D0F6, 0x1D0FF }, - { 0x1D127, 0x1D128 }, { 0x1D173, 0x1D17A }, { 0x1D1DE, 0x1D1FF }, - { 0x1D246, 0x1D2FF }, { 0x1D357, 0x1D35F }, { 0x1D372, 0x1D3FF }, - { 0x1D455, 0x1D455 }, { 0x1D49D, 0x1D49D }, { 0x1D4A0, 0x1D4A1 }, - { 0x1D4A3, 0x1D4A4 }, { 0x1D4A7, 0x1D4A8 }, { 0x1D4AD, 0x1D4AD }, - { 0x1D4BA, 0x1D4BA }, { 0x1D4BC, 0x1D4BC }, { 0x1D4C4, 0x1D4C4 }, - { 0x1D506, 0x1D506 }, { 0x1D50B, 0x1D50C }, { 0x1D515, 0x1D515 }, - { 0x1D51D, 0x1D51D }, { 0x1D53A, 0x1D53A }, { 0x1D53F, 0x1D53F }, - { 0x1D545, 0x1D545 }, { 0x1D547, 0x1D549 }, { 0x1D551, 0x1D551 }, - { 0x1D6A6, 0x1D6A7 }, { 0x1D7CC, 0x1D7CD }, { 0x1D800, 0x1EDFF }, - { 0x1EE04, 0x1EE04 }, { 0x1EE20, 0x1EE20 }, { 0x1EE23, 0x1EE23 }, - { 0x1EE25, 0x1EE26 }, { 0x1EE28, 0x1EE28 }, { 0x1EE33, 0x1EE33 }, - { 0x1EE38, 0x1EE38 }, { 0x1EE3A, 0x1EE3A }, { 0x1EE3C, 0x1EE41 }, - { 0x1EE43, 0x1EE46 }, { 0x1EE48, 0x1EE48 }, { 0x1EE4A, 0x1EE4A }, - { 0x1EE4C, 0x1EE4C }, { 0x1EE50, 0x1EE50 }, { 0x1EE53, 0x1EE53 }, - { 0x1EE55, 0x1EE56 }, { 0x1EE58, 0x1EE58 }, { 0x1EE5A, 0x1EE5A }, - { 0x1EE5C, 0x1EE5C }, { 0x1EE5E, 0x1EE5E }, { 0x1EE60, 0x1EE60 }, - { 0x1EE63, 0x1EE63 }, { 0x1EE65, 0x1EE66 }, { 0x1EE6B, 0x1EE6B }, - { 0x1EE73, 0x1EE73 }, { 0x1EE78, 0x1EE78 }, { 0x1EE7D, 0x1EE7D }, - { 0x1EE7F, 0x1EE7F }, { 0x1EE8A, 0x1EE8A }, { 0x1EE9C, 0x1EEA0 }, - { 0x1EEA4, 0x1EEA4 }, { 0x1EEAA, 0x1EEAA }, { 0x1EEBC, 0x1EEEF }, - { 0x1EEF2, 0x1EFFF }, { 0x1F02C, 0x1F02F }, { 0x1F094, 0x1F09F }, - { 0x1F0AF, 0x1F0B0 }, { 0x1F0BF, 0x1F0C0 }, { 0x1F0D0, 0x1F0D0 }, - { 0x1F0E0, 0x1F0FF }, { 0x1F10B, 0x1F10F }, { 0x1F12F, 0x1F12F }, - { 0x1F16C, 0x1F16F }, { 0x1F19B, 0x1F1E5 }, { 0x1F203, 0x1F20F }, - { 0x1F23B, 0x1F23F }, { 0x1F249, 0x1F24F }, { 0x1F252, 0x1F2FF }, - { 0x1F321, 0x1F32F }, { 0x1F336, 0x1F336 }, { 0x1F37D, 0x1F37F }, - { 0x1F394, 0x1F39F }, { 0x1F3C5, 0x1F3C5 }, { 0x1F3CB, 0x1F3DF }, - { 0x1F3F1, 0x1F3FF }, { 0x1F43F, 0x1F43F }, { 0x1F441, 0x1F441 }, - { 0x1F4F8, 0x1F4F8 }, { 0x1F4FD, 0x1F4FF }, { 0x1F53E, 0x1F53F }, - { 0x1F544, 0x1F54F }, { 0x1F568, 0x1F5FA }, { 0x1F641, 0x1F644 }, - { 0x1F650, 0x1F67F }, { 0x1F6C6, 0x1F6FF }, { 0x1F774, 0x1FFFF }, - { 0x2A6D7, 0x2A6FF }, { 0x2B735, 0x2B73F }, { 0x2B81E, 0x2F7FF }, - { 0x2FA1E, 0xF0000 }, { 0xFFFFE, 0xFFFFF }, { 0x10FFFE, 0x10FFFF } - }; - - return UCS >= 0 && UCS <= 0x10FFFF && !isCharInSet(UCS, NonPrintableRanges); -} - -/// Gets the number of positions a character is likely to occupy when output -/// on a terminal ("character width"). This depends on the implementation of the -/// terminal, and there's no standard definition of character width. -/// The implementation defines it in a way that is expected to be compatible -/// with a generic Unicode-capable terminal. -/// \return Character width: -/// * ErrorNonPrintableCharacter (-1) for non-printable characters (as -/// identified by isPrint); -/// * 0 for non-spacing and enclosing combining marks; -/// * 2 for CJK characters excluding halfwidth forms; -/// * 1 for all remaining characters. -static inline int charWidth(int UCS) -{ - if (!isPrint(UCS)) - return ErrorNonPrintableCharacter; - - // Sorted list of non-spacing and enclosing combining mark intervals as - // defined in "3.6 Combination" of - // http://www.unicode.org/versions/Unicode6.2.0/UnicodeStandard-6.2.pdf - static const UnicodeCharRange CombiningCharacters[] = { - { 0x0300, 0x036F }, { 0x0483, 0x0489 }, { 0x0591, 0x05BD }, - { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, { 0x05C4, 0x05C5 }, - { 0x05C7, 0x05C7 }, { 0x0610, 0x061A }, { 0x064B, 0x065F }, - { 0x0670, 0x0670 }, { 0x06D6, 0x06DC }, { 0x06DF, 0x06E4 }, - { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED }, { 0x0711, 0x0711 }, - { 0x0730, 0x074A }, { 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, - { 0x0816, 0x0819 }, { 0x081B, 0x0823 }, { 0x0825, 0x0827 }, - { 0x0829, 0x082D }, { 0x0859, 0x085B }, { 0x08E4, 0x08FE }, - { 0x0900, 0x0902 }, { 0x093A, 0x093A }, { 0x093C, 0x093C }, - { 0x0941, 0x0948 }, { 0x094D, 0x094D }, { 0x0951, 0x0957 }, - { 0x0962, 0x0963 }, { 0x0981, 0x0981 }, { 0x09BC, 0x09BC }, - { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 }, - { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 }, - { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D }, { 0x0A51, 0x0A51 }, - { 0x0A70, 0x0A71 }, { 0x0A75, 0x0A75 }, { 0x0A81, 0x0A82 }, - { 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, - { 0x0ACD, 0x0ACD }, { 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, - { 0x0B3C, 0x0B3C }, { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B44 }, - { 0x0B4D, 0x0B4D }, { 0x0B56, 0x0B56 }, { 0x0B62, 0x0B63 }, - { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 }, { 0x0BCD, 0x0BCD }, - { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 }, { 0x0C4A, 0x0C4D }, - { 0x0C55, 0x0C56 }, { 0x0C62, 0x0C63 }, { 0x0CBC, 0x0CBC }, - { 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, - { 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D44 }, { 0x0D4D, 0x0D4D }, - { 0x0D62, 0x0D63 }, { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, - { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, - { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, - { 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, - { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, - { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, - { 0x0F8D, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, - { 0x102D, 0x1030 }, { 0x1032, 0x1037 }, { 0x1039, 0x103A }, - { 0x103D, 0x103E }, { 0x1058, 0x1059 }, { 0x105E, 0x1060 }, - { 0x1071, 0x1074 }, { 0x1082, 0x1082 }, { 0x1085, 0x1086 }, - { 0x108D, 0x108D }, { 0x109D, 0x109D }, { 0x135D, 0x135F }, - { 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 }, - { 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD }, - { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD }, - { 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 }, - { 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B }, - { 0x1A17, 0x1A18 }, { 0x1A56, 0x1A56 }, { 0x1A58, 0x1A5E }, - { 0x1A60, 0x1A60 }, { 0x1A62, 0x1A62 }, { 0x1A65, 0x1A6C }, - { 0x1A73, 0x1A7C }, { 0x1A7F, 0x1A7F }, { 0x1B00, 0x1B03 }, - { 0x1B34, 0x1B34 }, { 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, - { 0x1B42, 0x1B42 }, { 0x1B6B, 0x1B73 }, { 0x1B80, 0x1B81 }, - { 0x1BA2, 0x1BA5 }, { 0x1BA8, 0x1BA9 }, { 0x1BAB, 0x1BAB }, - { 0x1BE6, 0x1BE6 }, { 0x1BE8, 0x1BE9 }, { 0x1BED, 0x1BED }, - { 0x1BEF, 0x1BF1 }, { 0x1C2C, 0x1C33 }, { 0x1C36, 0x1C37 }, - { 0x1CD0, 0x1CD2 }, { 0x1CD4, 0x1CE0 }, { 0x1CE2, 0x1CE8 }, - { 0x1CED, 0x1CED }, { 0x1CF4, 0x1CF4 }, { 0x1DC0, 0x1DE6 }, - { 0x1DFC, 0x1DFF }, { 0x20D0, 0x20F0 }, { 0x2CEF, 0x2CF1 }, - { 0x2D7F, 0x2D7F }, { 0x2DE0, 0x2DFF }, { 0x302A, 0x302D }, - { 0x3099, 0x309A }, { 0xA66F, 0xA672 }, { 0xA674, 0xA67D }, - { 0xA69F, 0xA69F }, { 0xA6F0, 0xA6F1 }, { 0xA802, 0xA802 }, - { 0xA806, 0xA806 }, { 0xA80B, 0xA80B }, { 0xA825, 0xA826 }, - { 0xA8C4, 0xA8C4 }, { 0xA8E0, 0xA8F1 }, { 0xA926, 0xA92D }, - { 0xA947, 0xA951 }, { 0xA980, 0xA982 }, { 0xA9B3, 0xA9B3 }, - { 0xA9B6, 0xA9B9 }, { 0xA9BC, 0xA9BC }, { 0xAA29, 0xAA2E }, - { 0xAA31, 0xAA32 }, { 0xAA35, 0xAA36 }, { 0xAA43, 0xAA43 }, - { 0xAA4C, 0xAA4C }, { 0xAAB0, 0xAAB0 }, { 0xAAB2, 0xAAB4 }, - { 0xAAB7, 0xAAB8 }, { 0xAABE, 0xAABF }, { 0xAAC1, 0xAAC1 }, - { 0xAAEC, 0xAAED }, { 0xAAF6, 0xAAF6 }, { 0xABE5, 0xABE5 }, - { 0xABE8, 0xABE8 }, { 0xABED, 0xABED }, { 0xFB1E, 0xFB1E }, - { 0xFE00, 0xFE0F }, { 0xFE20, 0xFE26 }, { 0x101FD, 0x101FD }, - { 0x10A01, 0x10A03 }, { 0x10A05, 0x10A06 }, { 0x10A0C, 0x10A0F }, - { 0x10A38, 0x10A3A }, { 0x10A3F, 0x10A3F }, { 0x11001, 0x11001 }, - { 0x11038, 0x11046 }, { 0x11080, 0x11081 }, { 0x110B3, 0x110B6 }, - { 0x110B9, 0x110BA }, { 0x11100, 0x11102 }, { 0x11127, 0x1112B }, - { 0x1112D, 0x11134 }, { 0x11180, 0x11181 }, { 0x111B6, 0x111BE }, - { 0x116AB, 0x116AB }, { 0x116AD, 0x116AD }, { 0x116B0, 0x116B5 }, - { 0x116B7, 0x116B7 }, { 0x16F8F, 0x16F92 }, { 0x1D167, 0x1D169 }, - { 0x1D17B, 0x1D182 }, { 0x1D185, 0x1D18B }, { 0x1D1AA, 0x1D1AD }, - { 0x1D242, 0x1D244 }, { 0xE0100, 0xE01EF }, - }; - - if (isCharInSet(UCS, CombiningCharacters)) - return 0; - - static const UnicodeCharRange DoubleWidthCharacters[] = { - // Hangul Jamo - { 0x1100, 0x11FF }, - // Deprecated fullwidth angle brackets - { 0x2329, 0x232A }, - // CJK Misc, CJK Unified Ideographs, Yijing Hexagrams, Yi - // excluding U+303F (IDEOGRAPHIC HALF FILL SPACE) - { 0x2E80, 0x303E }, { 0x3040, 0xA4CF }, - // Hangul - { 0xAC00, 0xD7A3 }, { 0xD7B0, 0xD7C6 }, { 0xD7CB, 0xD7FB }, - // CJK Unified Ideographs - { 0xF900, 0xFAFF }, - // Vertical forms - { 0xFE10, 0xFE19 }, - // CJK Compatibility Forms + Small Form Variants - { 0xFE30, 0xFE6F }, - // Fullwidth forms - { 0xFF01, 0xFF60 }, { 0xFFE0, 0xFFE6 }, - // CJK Unified Ideographs - { 0x20000, 0x2A6DF }, { 0x2A700, 0x2B81F }, { 0x2F800, 0x2FA1F } - }; - - if (isCharInSet(UCS, DoubleWidthCharacters)) - return 2; - return 1; -} - -int columnWidth(StringRef Text) { - unsigned ColumnWidth = 0; - unsigned Length; - for (size_t i = 0, e = Text.size(); i < e; i += Length) { - Length = getNumBytesForUTF8(Text[i]); - if (Length <= 0 || i + Length > Text.size()) - return ErrorInvalidUTF8; - UTF32 buf[1]; - const UTF8 *Start = reinterpret_cast<const UTF8 *>(Text.data() + i); - UTF32 *Target = &buf[0]; - if (conversionOK != ConvertUTF8toUTF32(&Start, Start + Length, &Target, - Target + 1, strictConversion)) - return ErrorInvalidUTF8; - int Width = charWidth(buf[0]); - if (Width < 0) - return ErrorNonPrintableCharacter; - ColumnWidth += Width; - } - return ColumnWidth; -} - -} -} -} diff --git a/lib/Support/LocaleWindows.inc b/lib/Support/LocaleWindows.inc deleted file mode 100644 index 28e429c..0000000 --- a/lib/Support/LocaleWindows.inc +++ /dev/null @@ -1,15 +0,0 @@ -namespace llvm { -namespace sys { -namespace locale { - -int columnWidth(StringRef s) { - return s.size(); -} - -bool isPrint(int c) { - return ' ' <= c && c <= '~'; -} - -} -} -} diff --git a/lib/Support/LocaleXlocale.inc b/lib/Support/LocaleXlocale.inc deleted file mode 100644 index 389fe3d..0000000 --- a/lib/Support/LocaleXlocale.inc +++ /dev/null @@ -1,61 +0,0 @@ -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/ManagedStatic.h" -#include <cassert> -#include <xlocale.h> - - -namespace { - struct locale_holder { - locale_holder() - : l(newlocale(LC_CTYPE_MASK,"en_US.UTF-8",LC_GLOBAL_LOCALE)) - { - assert(NULL!=l); - } - ~locale_holder() { - freelocale(l); - } - - int mbswidth(llvm::SmallString<16> s) const { - // this implementation assumes no '\0' in s - assert(s.size()==strlen(s.c_str())); - - size_t size = mbstowcs_l(NULL,s.c_str(),0,l); - assert(size!=(size_t)-1); - if (size==0) - return 0; - llvm::SmallVector<wchar_t,200> ws(size); - size = mbstowcs_l(&ws[0],s.c_str(),ws.size(),l); - assert(ws.size()==size); - return wcswidth_l(&ws[0],ws.size(),l); - } - - int isprint(int c) const { - return iswprint_l(c,l); - } - - private: - - locale_t l; - }; - - llvm::ManagedStatic<locale_holder> l; -} - -namespace llvm { -namespace sys { -namespace locale { - -int columnWidth(StringRef s) { - int width = l->mbswidth(s); - assert(width>=0); - return width; -} - -bool isPrint(int c) { - return l->isprint(c); -} - -} -} -} diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp index cab45c7..dcd5529 100644 --- a/lib/Support/MemoryBuffer.cpp +++ b/lib/Support/MemoryBuffer.cpp @@ -177,7 +177,7 @@ error_code MemoryBuffer::getFileOrSTDIN(StringRef Filename, //===----------------------------------------------------------------------===// namespace { -/// \brief Memorry maps a file descriptor using sys::fs::mapped_file_region. +/// \brief Memory maps a file descriptor using sys::fs::mapped_file_region. /// /// This handles converting the offset into a legal offset on the platform. class MemoryBufferMMapFile : public MemoryBuffer { @@ -217,7 +217,7 @@ public: }; } -static error_code getMemoryBufferForStream(int FD, +static error_code getMemoryBufferForStream(int FD, StringRef BufferName, OwningPtr<MemoryBuffer> &result) { const ssize_t ChunkSize = 4096*4; @@ -238,14 +238,19 @@ static error_code getMemoryBufferForStream(int FD, return error_code::success(); } -error_code MemoryBuffer::getFile(StringRef Filename, +static error_code getFileAux(const char *Filename, + OwningPtr<MemoryBuffer> &result, int64_t FileSize, + bool RequiresNullTerminator); + +error_code MemoryBuffer::getFile(Twine Filename, OwningPtr<MemoryBuffer> &result, int64_t FileSize, bool RequiresNullTerminator) { // Ensure the path is null terminated. - SmallString<256> PathBuf(Filename.begin(), Filename.end()); - return MemoryBuffer::getFile(PathBuf.c_str(), result, FileSize, - RequiresNullTerminator); + SmallString<256> PathBuf; + StringRef NullTerminatedName = Filename.toNullTerminatedStringRef(PathBuf); + return getFileAux(NullTerminatedName.data(), result, FileSize, + RequiresNullTerminator); } static error_code getOpenFileImpl(int FD, const char *Filename, @@ -253,10 +258,9 @@ static error_code getOpenFileImpl(int FD, const char *Filename, uint64_t FileSize, uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator); -error_code MemoryBuffer::getFile(const char *Filename, - OwningPtr<MemoryBuffer> &result, - int64_t FileSize, - bool RequiresNullTerminator) { +static error_code getFileAux(const char *Filename, + OwningPtr<MemoryBuffer> &result, int64_t FileSize, + bool RequiresNullTerminator) { int FD; error_code EC = sys::fs::openFileForRead(Filename, FD); if (EC) @@ -276,7 +280,7 @@ static bool shouldUseMmap(int FD, int PageSize) { // We don't use mmap for small files because this can severely fragment our // address space. - if (MapSize < 4096*4) + if (MapSize < 4 * 4096 || MapSize < (unsigned)PageSize) return false; if (!RequiresNullTerminator) @@ -302,6 +306,15 @@ static bool shouldUseMmap(int FD, if (End != FileSize) return false; +#if defined(_WIN32) || defined(__CYGWIN__) + // Don't peek the next page if file is multiple of *physical* pagesize(4k) + // but is not multiple of AllocationGranularity(64k), + // when a null terminator is required. + // FIXME: It's not good to hardcode 4096 here. dwPageSize shows 4096. + if ((FileSize & (4096 - 1)) == 0) + return false; +#endif + // Don't try to map files that are exactly a multiple of the system page size // if we need a null terminator. if ((FileSize & (PageSize -1)) == 0) diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp index cfd9ed6..c869b30 100644 --- a/lib/Support/Path.cpp +++ b/lib/Support/Path.cpp @@ -77,7 +77,7 @@ namespace { return path.substr(0, 1); // * {file,directory}name - size_t end = path.find_first_of(separators, 2); + size_t end = path.find_first_of(separators); return path.substr(0, end); } @@ -449,23 +449,18 @@ void replace_extension(SmallVectorImpl<char> &path, const Twine &extension) { } void native(const Twine &path, SmallVectorImpl<char> &result) { + assert((!path.isSingleStringRef() || + path.getSingleStringRef().data() != result.data()) && + "path and result are not allowed to overlap!"); // Clear result. result.clear(); -#ifdef LLVM_ON_WIN32 - SmallString<128> path_storage; - StringRef p = path.toStringRef(path_storage); - result.reserve(p.size()); - for (StringRef::const_iterator i = p.begin(), - e = p.end(); - i != e; - ++i) { - if (*i == '/') - result.push_back('\\'); - else - result.push_back(*i); - } -#else path.toVector(result); + native(result); +} + +void native(SmallVectorImpl<char> &path) { +#ifdef LLVM_ON_WIN32 + std::replace(path.begin(), path.end(), '/', '\\'); #endif } @@ -852,6 +847,21 @@ error_code has_magic(const Twine &path, const Twine &magic, bool &result) { if (Magic.size() < 4) return file_magic::unknown; switch ((unsigned char)Magic[0]) { + case 0x00: { + // COFF short import library file + if (Magic[1] == (char)0x00 && Magic[2] == (char)0xff && + Magic[3] == (char)0xff) + return file_magic::coff_import_library; + // Windows resource file + const char Expected[] = { 0, 0, 0, 0, '\x20', 0, 0, 0, '\xff' }; + if (Magic.size() >= sizeof(Expected) && + memcmp(Magic.data(), Expected, sizeof(Expected)) == 0) + return file_magic::windows_resource; + // 0x0000 = COFF unknown machine type + if (Magic[1] == 0) + return file_magic::coff_object; + break; + } case 0xDE: // 0x0B17C0DE = BC wraper if (Magic[1] == (char)0xC0 && Magic[2] == (char)0x17 && Magic[3] == (char)0x0B) diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp index 23ee5ab..722f4ca 100644 --- a/lib/Support/PrettyStackTrace.cpp +++ b/lib/Support/PrettyStackTrace.cpp @@ -15,10 +15,12 @@ #include "llvm/Support/PrettyStackTrace.h" #include "llvm/ADT/SmallString.h" #include "llvm/Config/config.h" // Get autoconf configuration settings +#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Signals.h" #include "llvm/Support/ThreadLocal.h" #include "llvm/Support/Watchdog.h" #include "llvm/Support/raw_ostream.h" +#include "llvm-c/Core.h" #ifdef HAVE_CRASHREPORTERCLIENT_H #include <CrashReporterClient.h> @@ -26,12 +28,7 @@ using namespace llvm; -namespace llvm { - bool DisablePrettyStackTrace = false; -} - -// FIXME: This should be thread local when llvm supports threads. -static sys::ThreadLocal<const PrettyStackTraceEntry> PrettyStackTraceHead; +static ManagedStatic<sys::ThreadLocal<const PrettyStackTraceEntry> > PrettyStackTraceHead; static unsigned PrintStack(const PrettyStackTraceEntry *Entry, raw_ostream &OS){ unsigned NextID = 0; @@ -49,12 +46,12 @@ static unsigned PrintStack(const PrettyStackTraceEntry *Entry, raw_ostream &OS){ /// PrintCurStackTrace - Print the current stack trace to the specified stream. static void PrintCurStackTrace(raw_ostream &OS) { // Don't print an empty trace. - if (PrettyStackTraceHead.get() == 0) return; + if (PrettyStackTraceHead->get() == 0) return; // If there are pretty stack frames registered, walk and emit them. OS << "Stack dump:\n"; - PrintStack(PrettyStackTraceHead.get(), OS); + PrintStack(PrettyStackTraceHead->get(), OS); OS.flush(); } @@ -102,26 +99,28 @@ static void CrashHandler(void *) { #endif } -static bool RegisterCrashPrinter() { - if (!DisablePrettyStackTrace) - sys::AddSignalHandler(CrashHandler, 0); - return false; -} - PrettyStackTraceEntry::PrettyStackTraceEntry() { - // The first time this is called, we register the crash printer. - static bool HandlerRegistered = RegisterCrashPrinter(); - (void)HandlerRegistered; - // Link ourselves. - NextEntry = PrettyStackTraceHead.get(); - PrettyStackTraceHead.set(this); + NextEntry = PrettyStackTraceHead->get(); + PrettyStackTraceHead->set(this); } PrettyStackTraceEntry::~PrettyStackTraceEntry() { - assert(PrettyStackTraceHead.get() == this && + // Do nothing if PrettyStackTraceHead is uninitialized. This can only happen + // if a shutdown occurred after we created the PrettyStackTraceEntry. That + // does occur in the following idiom: + // + // PrettyStackTraceProgram X(...); + // llvm_shutdown_obj Y; + // + // Without this check, we may end up removing ourselves from the stack trace + // after PrettyStackTraceHead has already been destroyed. + if (!PrettyStackTraceHead.isConstructed()) + return; + + assert(PrettyStackTraceHead->get() == this && "Pretty stack trace entry destruction is out of order"); - PrettyStackTraceHead.set(getNextEntry()); + PrettyStackTraceHead->set(getNextEntry()); } void PrettyStackTraceString::print(raw_ostream &OS) const { @@ -135,3 +134,18 @@ void PrettyStackTraceProgram::print(raw_ostream &OS) const { OS << ArgV[i] << ' '; OS << '\n'; } + +static bool RegisterCrashPrinter() { + sys::AddSignalHandler(CrashHandler, 0); + return false; +} + +void llvm::EnablePrettyStackTrace() { + // The first time this is called, we register the crash printer. + static bool HandlerRegistered = RegisterCrashPrinter(); + (void)HandlerRegistered; +} + +void LLVMEnablePrettyStackTrace() { + EnablePrettyStackTrace(); +} diff --git a/lib/Support/Process.cpp b/lib/Support/Process.cpp index 2c0d37b..d5168f0 100644 --- a/lib/Support/Process.cpp +++ b/lib/Support/Process.cpp @@ -80,6 +80,24 @@ TimeValue self_process::get_wall_time() const { #endif +#define COLOR(FGBG, CODE, BOLD) "\033[0;" BOLD FGBG CODE "m" + +#define ALLCOLORS(FGBG,BOLD) {\ + COLOR(FGBG, "0", BOLD),\ + COLOR(FGBG, "1", BOLD),\ + COLOR(FGBG, "2", BOLD),\ + COLOR(FGBG, "3", BOLD),\ + COLOR(FGBG, "4", BOLD),\ + COLOR(FGBG, "5", BOLD),\ + COLOR(FGBG, "6", BOLD),\ + COLOR(FGBG, "7", BOLD)\ + } + +static const char colorcodes[2][2][8][10] = { + { ALLCOLORS("3",""), ALLCOLORS("3","1;") }, + { ALLCOLORS("4",""), ALLCOLORS("4","1;") } +}; + // Include the platform-specific parts of this class. #ifdef LLVM_ON_UNIX #include "Unix/Process.inc" diff --git a/lib/Support/Program.cpp b/lib/Support/Program.cpp index 79f7e5f..83f2ec4 100644 --- a/lib/Support/Program.cpp +++ b/lib/Support/Program.cpp @@ -22,30 +22,40 @@ using namespace sys; //=== independent code. //===----------------------------------------------------------------------===// -static bool Execute(void **Data, StringRef Program, const char **args, +static bool Execute(ProcessInfo &PI, StringRef Program, const char **args, const char **env, const StringRef **Redirects, unsigned memoryLimit, std::string *ErrMsg); -static int Wait(void *&Data, StringRef Program, unsigned secondsToWait, - std::string *ErrMsg); - int sys::ExecuteAndWait(StringRef Program, const char **args, const char **envp, const StringRef **redirects, unsigned secondsToWait, unsigned memoryLimit, std::string *ErrMsg, bool *ExecutionFailed) { - void *Data = 0; - if (Execute(&Data, Program, args, envp, redirects, memoryLimit, ErrMsg)) { - if (ExecutionFailed) *ExecutionFailed = false; - return Wait(Data, Program, secondsToWait, ErrMsg); + ProcessInfo PI; + if (Execute(PI, Program, args, envp, redirects, memoryLimit, ErrMsg)) { + if (ExecutionFailed) + *ExecutionFailed = false; + ProcessInfo Result = Wait(PI, secondsToWait, true, ErrMsg); + return Result.ReturnCode; } - if (ExecutionFailed) *ExecutionFailed = true; + + if (ExecutionFailed) + *ExecutionFailed = true; + return -1; } -void sys::ExecuteNoWait(StringRef Program, const char **args, const char **envp, - const StringRef **redirects, unsigned memoryLimit, - std::string *ErrMsg) { - Execute(/*Data*/ 0, Program, args, envp, redirects, memoryLimit, ErrMsg); +ProcessInfo sys::ExecuteNoWait(StringRef Program, const char **args, + const char **envp, const StringRef **redirects, + unsigned memoryLimit, std::string *ErrMsg, + bool *ExecutionFailed) { + ProcessInfo PI; + if (ExecutionFailed) + *ExecutionFailed = false; + if (!Execute(PI, Program, args, envp, redirects, memoryLimit, ErrMsg)) + if (ExecutionFailed) + *ExecutionFailed = true; + + return PI; } // Include the platform-specific parts of this class. diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp index dec967e..5413641 100644 --- a/lib/Support/Regex.cpp +++ b/lib/Support/Regex.cpp @@ -43,7 +43,7 @@ bool Regex::isValid(std::string &Error) { size_t len = llvm_regerror(error, preg, NULL, 0); - Error.resize(len); + Error.resize(len - 1); llvm_regerror(error, preg, &Error[0], len); return false; } diff --git a/lib/Support/SmallPtrSet.cpp b/lib/Support/SmallPtrSet.cpp index f0fed77..dd417b4 100644 --- a/lib/Support/SmallPtrSet.cpp +++ b/lib/Support/SmallPtrSet.cpp @@ -202,8 +202,13 @@ void SmallPtrSetImpl::CopyFrom(const SmallPtrSetImpl &RHS) { } else if (CurArraySize != RHS.CurArraySize) { if (isSmall()) CurArray = (const void**)malloc(sizeof(void*) * RHS.CurArraySize); - else - CurArray = (const void**)realloc(CurArray, sizeof(void*)*RHS.CurArraySize); + else { + const void **T = (const void**)realloc(CurArray, + sizeof(void*) * RHS.CurArraySize); + if (!T) + free(CurArray); + CurArray = T; + } assert(CurArray && "Failed to allocate memory?"); } diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp index 51162dd..d4b94f8 100644 --- a/lib/Support/SourceMgr.cpp +++ b/lib/Support/SourceMgr.cpp @@ -211,7 +211,8 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind, LineStr, ColRanges, FixIts); } -void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, +void SourceMgr::PrintMessage(raw_ostream &OS, SMLoc Loc, + SourceMgr::DiagKind Kind, const Twine &Msg, ArrayRef<SMRange> Ranges, ArrayRef<SMFixIt> FixIts, bool ShowColors) const { SMDiagnostic Diagnostic = GetMessage(Loc, Kind, Msg, Ranges, FixIts); @@ -222,8 +223,6 @@ void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, return; } - raw_ostream &OS = errs(); - if (Loc != SMLoc()) { int CurBuf = FindBufferContainingLoc(Loc); assert(CurBuf != -1 && "Invalid or unspecified location!"); @@ -233,6 +232,12 @@ void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, Diagnostic.print(0, OS, ShowColors); } +void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, + const Twine &Msg, ArrayRef<SMRange> Ranges, + ArrayRef<SMFixIt> FixIts, bool ShowColors) const { + PrintMessage(llvm::errs(), Loc, Kind, Msg, Ranges, FixIts, ShowColors); +} + //===----------------------------------------------------------------------===// // SMDiagnostic Implementation //===----------------------------------------------------------------------===// @@ -465,7 +470,7 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S, if (FixItInsertionLine.empty()) return; - for (size_t i = 0, e = FixItInsertionLine.size(), OutCol = 0; i != e; ++i) { + for (size_t i = 0, e = FixItInsertionLine.size(), OutCol = 0; i < e; ++i) { if (i >= LineContents.size() || LineContents[i] != '\t') { S << FixItInsertionLine[i]; ++OutCol; diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp index d7a0bfa..bfae754 100644 --- a/lib/Support/StringRef.cpp +++ b/lib/Support/StringRef.cpp @@ -37,20 +37,39 @@ static bool ascii_isdigit(char x) { return x >= '0' && x <= '9'; } -/// compare_lower - Compare strings, ignoring case. -int StringRef::compare_lower(StringRef RHS) const { - for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) { - unsigned char LHC = ascii_tolower(Data[I]); - unsigned char RHC = ascii_tolower(RHS.Data[I]); +// strncasecmp() is not available on non-POSIX systems, so define an +// alternative function here. +static int ascii_strncasecmp(const char *LHS, const char *RHS, size_t Length) { + for (size_t I = 0; I < Length; ++I) { + unsigned char LHC = ascii_tolower(LHS[I]); + unsigned char RHC = ascii_tolower(RHS[I]); if (LHC != RHC) return LHC < RHC ? -1 : 1; } + return 0; +} +/// compare_lower - Compare strings, ignoring case. +int StringRef::compare_lower(StringRef RHS) const { + if (int Res = ascii_strncasecmp(Data, RHS.Data, min(Length, RHS.Length))) + return Res; if (Length == RHS.Length) return 0; return Length < RHS.Length ? -1 : 1; } +/// Check if this string starts with the given \p Prefix, ignoring case. +bool StringRef::startswith_lower(StringRef Prefix) const { + return Length >= Prefix.Length && + ascii_strncasecmp(Data, Prefix.Data, Prefix.Length) == 0; +} + +/// Check if this string ends with the given \p Suffix, ignoring case. +bool StringRef::endswith_lower(StringRef Suffix) const { + return Length >= Suffix.Length && + ascii_strncasecmp(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0; +} + /// compare_numeric - Compare strings, handle embedded numbers. int StringRef::compare_numeric(StringRef RHS) const { for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) { @@ -85,7 +104,7 @@ int StringRef::compare_numeric(StringRef RHS) const { // Compute the edit distance between the two given strings. unsigned StringRef::edit_distance(llvm::StringRef Other, bool AllowReplacements, - unsigned MaxEditDistance) { + unsigned MaxEditDistance) const { return llvm::ComputeEditDistance( llvm::ArrayRef<char>(data(), size()), llvm::ArrayRef<char>(Other.data(), Other.size()), diff --git a/lib/Support/TargetRegistry.cpp b/lib/Support/TargetRegistry.cpp index 9c81327..0c90c17 100644 --- a/lib/Support/TargetRegistry.cpp +++ b/lib/Support/TargetRegistry.cpp @@ -135,9 +135,9 @@ const Target *TargetRegistry::getClosestTargetForJIT(std::string &Error) { return TheTarget; } -static int TargetArraySortFn(const void *LHS, const void *RHS) { - typedef std::pair<StringRef, const Target*> pair_ty; - return ((const pair_ty*)LHS)->first.compare(((const pair_ty*)RHS)->first); +static int TargetArraySortFn(const std::pair<StringRef, const Target *> *LHS, + const std::pair<StringRef, const Target *> *RHS) { + return LHS->first.compare(RHS->first); } void TargetRegistry::printRegisteredTargetsForVersion() { diff --git a/lib/Support/ThreadLocal.cpp b/lib/Support/ThreadLocal.cpp index 0587aae..868b6ea 100644 --- a/lib/Support/ThreadLocal.cpp +++ b/lib/Support/ThreadLocal.cpp @@ -23,7 +23,7 @@ // Define all methods as no-ops if threading is explicitly disabled namespace llvm { using namespace sys; -ThreadLocalImpl::ThreadLocalImpl() { } +ThreadLocalImpl::ThreadLocalImpl() : data() { } ThreadLocalImpl::~ThreadLocalImpl() { } void ThreadLocalImpl::setInstance(const void* d) { typedef int SIZE_TOO_BIG[sizeof(d) <= sizeof(data) ? 1 : -1]; diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index d0d0e14..6c978a0 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -221,7 +221,7 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Cases("i386", "i486", "i586", "i686", Triple::x86) // FIXME: Do we need to support these? .Cases("i786", "i886", "i986", Triple::x86) - .Cases("amd64", "x86_64", Triple::x86_64) + .Cases("amd64", "x86_64", "x86_64h", Triple::x86_64) .Case("powerpc", Triple::ppc) .Cases("powerpc64", "ppu", Triple::ppc64) .Case("powerpc64le", Triple::ppc64le) diff --git a/lib/Support/Unicode.cpp b/lib/Support/Unicode.cpp new file mode 100644 index 0000000..b719bd8 --- /dev/null +++ b/lib/Support/Unicode.cpp @@ -0,0 +1,367 @@ +//===- llvm/Support/Unicode.cpp - Unicode character properties -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements functions that allow querying certain properties of +// Unicode characters. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Unicode.h" +#include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/UnicodeCharRanges.h" + +namespace llvm { +namespace sys { +namespace unicode { + +bool isPrintable(int UCS) { + // Sorted list of non-overlapping intervals of code points that are not + // supposed to be printable. + static const UnicodeCharRange NonPrintableRanges[] = { + { 0x0000, 0x001F }, { 0x007F, 0x009F }, { 0x034F, 0x034F }, + { 0x0378, 0x0379 }, { 0x037F, 0x0383 }, { 0x038B, 0x038B }, + { 0x038D, 0x038D }, { 0x03A2, 0x03A2 }, { 0x0528, 0x0530 }, + { 0x0557, 0x0558 }, { 0x0560, 0x0560 }, { 0x0588, 0x0588 }, + { 0x058B, 0x058E }, { 0x0590, 0x0590 }, { 0x05C8, 0x05CF }, + { 0x05EB, 0x05EF }, { 0x05F5, 0x0605 }, { 0x061C, 0x061D }, + { 0x06DD, 0x06DD }, { 0x070E, 0x070F }, { 0x074B, 0x074C }, + { 0x07B2, 0x07BF }, { 0x07FB, 0x07FF }, { 0x082E, 0x082F }, + { 0x083F, 0x083F }, { 0x085C, 0x085D }, { 0x085F, 0x089F }, + { 0x08A1, 0x08A1 }, { 0x08AD, 0x08E3 }, { 0x08FF, 0x08FF }, + { 0x0978, 0x0978 }, { 0x0980, 0x0980 }, { 0x0984, 0x0984 }, + { 0x098D, 0x098E }, { 0x0991, 0x0992 }, { 0x09A9, 0x09A9 }, + { 0x09B1, 0x09B1 }, { 0x09B3, 0x09B5 }, { 0x09BA, 0x09BB }, + { 0x09C5, 0x09C6 }, { 0x09C9, 0x09CA }, { 0x09CF, 0x09D6 }, + { 0x09D8, 0x09DB }, { 0x09DE, 0x09DE }, { 0x09E4, 0x09E5 }, + { 0x09FC, 0x0A00 }, { 0x0A04, 0x0A04 }, { 0x0A0B, 0x0A0E }, + { 0x0A11, 0x0A12 }, { 0x0A29, 0x0A29 }, { 0x0A31, 0x0A31 }, + { 0x0A34, 0x0A34 }, { 0x0A37, 0x0A37 }, { 0x0A3A, 0x0A3B }, + { 0x0A3D, 0x0A3D }, { 0x0A43, 0x0A46 }, { 0x0A49, 0x0A4A }, + { 0x0A4E, 0x0A50 }, { 0x0A52, 0x0A58 }, { 0x0A5D, 0x0A5D }, + { 0x0A5F, 0x0A65 }, { 0x0A76, 0x0A80 }, { 0x0A84, 0x0A84 }, + { 0x0A8E, 0x0A8E }, { 0x0A92, 0x0A92 }, { 0x0AA9, 0x0AA9 }, + { 0x0AB1, 0x0AB1 }, { 0x0AB4, 0x0AB4 }, { 0x0ABA, 0x0ABB }, + { 0x0AC6, 0x0AC6 }, { 0x0ACA, 0x0ACA }, { 0x0ACE, 0x0ACF }, + { 0x0AD1, 0x0ADF }, { 0x0AE4, 0x0AE5 }, { 0x0AF2, 0x0B00 }, + { 0x0B04, 0x0B04 }, { 0x0B0D, 0x0B0E }, { 0x0B11, 0x0B12 }, + { 0x0B29, 0x0B29 }, { 0x0B31, 0x0B31 }, { 0x0B34, 0x0B34 }, + { 0x0B3A, 0x0B3B }, { 0x0B45, 0x0B46 }, { 0x0B49, 0x0B4A }, + { 0x0B4E, 0x0B55 }, { 0x0B58, 0x0B5B }, { 0x0B5E, 0x0B5E }, + { 0x0B64, 0x0B65 }, { 0x0B78, 0x0B81 }, { 0x0B84, 0x0B84 }, + { 0x0B8B, 0x0B8D }, { 0x0B91, 0x0B91 }, { 0x0B96, 0x0B98 }, + { 0x0B9B, 0x0B9B }, { 0x0B9D, 0x0B9D }, { 0x0BA0, 0x0BA2 }, + { 0x0BA5, 0x0BA7 }, { 0x0BAB, 0x0BAD }, { 0x0BBA, 0x0BBD }, + { 0x0BC3, 0x0BC5 }, { 0x0BC9, 0x0BC9 }, { 0x0BCE, 0x0BCF }, + { 0x0BD1, 0x0BD6 }, { 0x0BD8, 0x0BE5 }, { 0x0BFB, 0x0C00 }, + { 0x0C04, 0x0C04 }, { 0x0C0D, 0x0C0D }, { 0x0C11, 0x0C11 }, + { 0x0C29, 0x0C29 }, { 0x0C34, 0x0C34 }, { 0x0C3A, 0x0C3C }, + { 0x0C45, 0x0C45 }, { 0x0C49, 0x0C49 }, { 0x0C4E, 0x0C54 }, + { 0x0C57, 0x0C57 }, { 0x0C5A, 0x0C5F }, { 0x0C64, 0x0C65 }, + { 0x0C70, 0x0C77 }, { 0x0C80, 0x0C81 }, { 0x0C84, 0x0C84 }, + { 0x0C8D, 0x0C8D }, { 0x0C91, 0x0C91 }, { 0x0CA9, 0x0CA9 }, + { 0x0CB4, 0x0CB4 }, { 0x0CBA, 0x0CBB }, { 0x0CC5, 0x0CC5 }, + { 0x0CC9, 0x0CC9 }, { 0x0CCE, 0x0CD4 }, { 0x0CD7, 0x0CDD }, + { 0x0CDF, 0x0CDF }, { 0x0CE4, 0x0CE5 }, { 0x0CF0, 0x0CF0 }, + { 0x0CF3, 0x0D01 }, { 0x0D04, 0x0D04 }, { 0x0D0D, 0x0D0D }, + { 0x0D11, 0x0D11 }, { 0x0D3B, 0x0D3C }, { 0x0D45, 0x0D45 }, + { 0x0D49, 0x0D49 }, { 0x0D4F, 0x0D56 }, { 0x0D58, 0x0D5F }, + { 0x0D64, 0x0D65 }, { 0x0D76, 0x0D78 }, { 0x0D80, 0x0D81 }, + { 0x0D84, 0x0D84 }, { 0x0D97, 0x0D99 }, { 0x0DB2, 0x0DB2 }, + { 0x0DBC, 0x0DBC }, { 0x0DBE, 0x0DBF }, { 0x0DC7, 0x0DC9 }, + { 0x0DCB, 0x0DCE }, { 0x0DD5, 0x0DD5 }, { 0x0DD7, 0x0DD7 }, + { 0x0DE0, 0x0DF1 }, { 0x0DF5, 0x0E00 }, { 0x0E3B, 0x0E3E }, + { 0x0E5C, 0x0E80 }, { 0x0E83, 0x0E83 }, { 0x0E85, 0x0E86 }, + { 0x0E89, 0x0E89 }, { 0x0E8B, 0x0E8C }, { 0x0E8E, 0x0E93 }, + { 0x0E98, 0x0E98 }, { 0x0EA0, 0x0EA0 }, { 0x0EA4, 0x0EA4 }, + { 0x0EA6, 0x0EA6 }, { 0x0EA8, 0x0EA9 }, { 0x0EAC, 0x0EAC }, + { 0x0EBA, 0x0EBA }, { 0x0EBE, 0x0EBF }, { 0x0EC5, 0x0EC5 }, + { 0x0EC7, 0x0EC7 }, { 0x0ECE, 0x0ECF }, { 0x0EDA, 0x0EDB }, + { 0x0EE0, 0x0EFF }, { 0x0F48, 0x0F48 }, { 0x0F6D, 0x0F70 }, + { 0x0F98, 0x0F98 }, { 0x0FBD, 0x0FBD }, { 0x0FCD, 0x0FCD }, + { 0x0FDB, 0x0FFF }, { 0x10C6, 0x10C6 }, { 0x10C8, 0x10CC }, + { 0x10CE, 0x10CF }, { 0x115F, 0x1160 }, { 0x1249, 0x1249 }, + { 0x124E, 0x124F }, { 0x1257, 0x1257 }, { 0x1259, 0x1259 }, + { 0x125E, 0x125F }, { 0x1289, 0x1289 }, { 0x128E, 0x128F }, + { 0x12B1, 0x12B1 }, { 0x12B6, 0x12B7 }, { 0x12BF, 0x12BF }, + { 0x12C1, 0x12C1 }, { 0x12C6, 0x12C7 }, { 0x12D7, 0x12D7 }, + { 0x1311, 0x1311 }, { 0x1316, 0x1317 }, { 0x135B, 0x135C }, + { 0x137D, 0x137F }, { 0x139A, 0x139F }, { 0x13F5, 0x13FF }, + { 0x169D, 0x169F }, { 0x16F1, 0x16FF }, { 0x170D, 0x170D }, + { 0x1715, 0x171F }, { 0x1737, 0x173F }, { 0x1754, 0x175F }, + { 0x176D, 0x176D }, { 0x1771, 0x1771 }, { 0x1774, 0x177F }, + { 0x17B4, 0x17B5 }, { 0x17DE, 0x17DF }, { 0x17EA, 0x17EF }, + { 0x17FA, 0x17FF }, { 0x180B, 0x180D }, { 0x180F, 0x180F }, + { 0x181A, 0x181F }, { 0x1878, 0x187F }, { 0x18AB, 0x18AF }, + { 0x18F6, 0x18FF }, { 0x191D, 0x191F }, { 0x192C, 0x192F }, + { 0x193C, 0x193F }, { 0x1941, 0x1943 }, { 0x196E, 0x196F }, + { 0x1975, 0x197F }, { 0x19AC, 0x19AF }, { 0x19CA, 0x19CF }, + { 0x19DB, 0x19DD }, { 0x1A1C, 0x1A1D }, { 0x1A5F, 0x1A5F }, + { 0x1A7D, 0x1A7E }, { 0x1A8A, 0x1A8F }, { 0x1A9A, 0x1A9F }, + { 0x1AAE, 0x1AFF }, { 0x1B4C, 0x1B4F }, { 0x1B7D, 0x1B7F }, + { 0x1BF4, 0x1BFB }, { 0x1C38, 0x1C3A }, { 0x1C4A, 0x1C4C }, + { 0x1C80, 0x1CBF }, { 0x1CC8, 0x1CCF }, { 0x1CF7, 0x1CFF }, + { 0x1DE7, 0x1DFB }, { 0x1F16, 0x1F17 }, { 0x1F1E, 0x1F1F }, + { 0x1F46, 0x1F47 }, { 0x1F4E, 0x1F4F }, { 0x1F58, 0x1F58 }, + { 0x1F5A, 0x1F5A }, { 0x1F5C, 0x1F5C }, { 0x1F5E, 0x1F5E }, + { 0x1F7E, 0x1F7F }, { 0x1FB5, 0x1FB5 }, { 0x1FC5, 0x1FC5 }, + { 0x1FD4, 0x1FD5 }, { 0x1FDC, 0x1FDC }, { 0x1FF0, 0x1FF1 }, + { 0x1FF5, 0x1FF5 }, { 0x1FFF, 0x1FFF }, { 0x200B, 0x200F }, + { 0x202A, 0x202E }, { 0x2060, 0x206F }, { 0x2072, 0x2073 }, + { 0x208F, 0x208F }, { 0x209D, 0x209F }, { 0x20BB, 0x20CF }, + { 0x20F1, 0x20FF }, { 0x218A, 0x218F }, { 0x23F4, 0x23FF }, + { 0x2427, 0x243F }, { 0x244B, 0x245F }, { 0x2700, 0x2700 }, + { 0x2B4D, 0x2B4F }, { 0x2B5A, 0x2BFF }, { 0x2C2F, 0x2C2F }, + { 0x2C5F, 0x2C5F }, { 0x2CF4, 0x2CF8 }, { 0x2D26, 0x2D26 }, + { 0x2D28, 0x2D2C }, { 0x2D2E, 0x2D2F }, { 0x2D68, 0x2D6E }, + { 0x2D71, 0x2D7E }, { 0x2D97, 0x2D9F }, { 0x2DA7, 0x2DA7 }, + { 0x2DAF, 0x2DAF }, { 0x2DB7, 0x2DB7 }, { 0x2DBF, 0x2DBF }, + { 0x2DC7, 0x2DC7 }, { 0x2DCF, 0x2DCF }, { 0x2DD7, 0x2DD7 }, + { 0x2DDF, 0x2DDF }, { 0x2E3C, 0x2E7F }, { 0x2E9A, 0x2E9A }, + { 0x2EF4, 0x2EFF }, { 0x2FD6, 0x2FEF }, { 0x2FFC, 0x2FFF }, + { 0x3040, 0x3040 }, { 0x3097, 0x3098 }, { 0x3100, 0x3104 }, + { 0x312E, 0x3130 }, { 0x3164, 0x3164 }, { 0x318F, 0x318F }, + { 0x31BB, 0x31BF }, { 0x31E4, 0x31EF }, { 0x321F, 0x321F }, + { 0x32FF, 0x32FF }, { 0x4DB6, 0x4DBF }, { 0x9FCD, 0x9FFF }, + { 0xA48D, 0xA48F }, { 0xA4C7, 0xA4CF }, { 0xA62C, 0xA63F }, + { 0xA698, 0xA69E }, { 0xA6F8, 0xA6FF }, { 0xA78F, 0xA78F }, + { 0xA794, 0xA79F }, { 0xA7AB, 0xA7F7 }, { 0xA82C, 0xA82F }, + { 0xA83A, 0xA83F }, { 0xA878, 0xA87F }, { 0xA8C5, 0xA8CD }, + { 0xA8DA, 0xA8DF }, { 0xA8FC, 0xA8FF }, { 0xA954, 0xA95E }, + { 0xA97D, 0xA97F }, { 0xA9CE, 0xA9CE }, { 0xA9DA, 0xA9DD }, + { 0xA9E0, 0xA9FF }, { 0xAA37, 0xAA3F }, { 0xAA4E, 0xAA4F }, + { 0xAA5A, 0xAA5B }, { 0xAA7C, 0xAA7F }, { 0xAAC3, 0xAADA }, + { 0xAAF7, 0xAB00 }, { 0xAB07, 0xAB08 }, { 0xAB0F, 0xAB10 }, + { 0xAB17, 0xAB1F }, { 0xAB27, 0xAB27 }, { 0xAB2F, 0xABBF }, + { 0xABEE, 0xABEF }, { 0xABFA, 0xABFF }, { 0xD7A4, 0xD7AF }, + { 0xD7C7, 0xD7CA }, { 0xD7FC, 0xDFFF }, { 0xFA6E, 0xFA6F }, + { 0xFADA, 0xFAFF }, { 0xFB07, 0xFB12 }, { 0xFB18, 0xFB1C }, + { 0xFB37, 0xFB37 }, { 0xFB3D, 0xFB3D }, { 0xFB3F, 0xFB3F }, + { 0xFB42, 0xFB42 }, { 0xFB45, 0xFB45 }, { 0xFBC2, 0xFBD2 }, + { 0xFD40, 0xFD4F }, { 0xFD90, 0xFD91 }, { 0xFDC8, 0xFDEF }, + { 0xFDFE, 0xFE0F }, { 0xFE1A, 0xFE1F }, { 0xFE27, 0xFE2F }, + { 0xFE53, 0xFE53 }, { 0xFE67, 0xFE67 }, { 0xFE6C, 0xFE6F }, + { 0xFE75, 0xFE75 }, { 0xFEFD, 0xFEFF }, { 0xFF00, 0xFF00 }, + { 0xFFA0, 0xFFA0 }, { 0xFFBF, 0xFFC1 }, { 0xFFC8, 0xFFC9 }, + { 0xFFD0, 0xFFD1 }, { 0xFFD8, 0xFFD9 }, { 0xFFDD, 0xFFDF }, + { 0xFFE7, 0xFFE7 }, { 0xFFEF, 0xFFFB }, { 0xFFFE, 0xFFFF }, + { 0x1000C, 0x1000C }, { 0x10027, 0x10027 }, { 0x1003B, 0x1003B }, + { 0x1003E, 0x1003E }, { 0x1004E, 0x1004F }, { 0x1005E, 0x1007F }, + { 0x100FB, 0x100FF }, { 0x10103, 0x10106 }, { 0x10134, 0x10136 }, + { 0x1018B, 0x1018F }, { 0x1019C, 0x101CF }, { 0x101FE, 0x1027F }, + { 0x1029D, 0x1029F }, { 0x102D1, 0x102FF }, { 0x1031F, 0x1031F }, + { 0x10324, 0x1032F }, { 0x1034B, 0x1037F }, { 0x1039E, 0x1039E }, + { 0x103C4, 0x103C7 }, { 0x103D6, 0x103FF }, { 0x1049E, 0x1049F }, + { 0x104AA, 0x107FF }, { 0x10806, 0x10807 }, { 0x10809, 0x10809 }, + { 0x10836, 0x10836 }, { 0x10839, 0x1083B }, { 0x1083D, 0x1083E }, + { 0x10856, 0x10856 }, { 0x10860, 0x108FF }, { 0x1091C, 0x1091E }, + { 0x1093A, 0x1093E }, { 0x10940, 0x1097F }, { 0x109B8, 0x109BD }, + { 0x109C0, 0x109FF }, { 0x10A04, 0x10A04 }, { 0x10A07, 0x10A0B }, + { 0x10A14, 0x10A14 }, { 0x10A18, 0x10A18 }, { 0x10A34, 0x10A37 }, + { 0x10A3B, 0x10A3E }, { 0x10A48, 0x10A4F }, { 0x10A59, 0x10A5F }, + { 0x10A80, 0x10AFF }, { 0x10B36, 0x10B38 }, { 0x10B56, 0x10B57 }, + { 0x10B73, 0x10B77 }, { 0x10B80, 0x10BFF }, { 0x10C49, 0x10E5F }, + { 0x10E7F, 0x10FFF }, { 0x1104E, 0x11051 }, { 0x11070, 0x1107F }, + { 0x110BD, 0x110BD }, { 0x110C2, 0x110CF }, { 0x110E9, 0x110EF }, + { 0x110FA, 0x110FF }, { 0x11135, 0x11135 }, { 0x11144, 0x1117F }, + { 0x111C9, 0x111CF }, { 0x111DA, 0x1167F }, { 0x116B8, 0x116BF }, + { 0x116CA, 0x11FFF }, { 0x1236F, 0x123FF }, { 0x12463, 0x1246F }, + { 0x12474, 0x12FFF }, { 0x1342F, 0x167FF }, { 0x16A39, 0x16EFF }, + { 0x16F45, 0x16F4F }, { 0x16F7F, 0x16F8E }, { 0x16FA0, 0x1AFFF }, + { 0x1B002, 0x1CFFF }, { 0x1D0F6, 0x1D0FF }, { 0x1D127, 0x1D128 }, + { 0x1D173, 0x1D17A }, { 0x1D1DE, 0x1D1FF }, { 0x1D246, 0x1D2FF }, + { 0x1D357, 0x1D35F }, { 0x1D372, 0x1D3FF }, { 0x1D455, 0x1D455 }, + { 0x1D49D, 0x1D49D }, { 0x1D4A0, 0x1D4A1 }, { 0x1D4A3, 0x1D4A4 }, + { 0x1D4A7, 0x1D4A8 }, { 0x1D4AD, 0x1D4AD }, { 0x1D4BA, 0x1D4BA }, + { 0x1D4BC, 0x1D4BC }, { 0x1D4C4, 0x1D4C4 }, { 0x1D506, 0x1D506 }, + { 0x1D50B, 0x1D50C }, { 0x1D515, 0x1D515 }, { 0x1D51D, 0x1D51D }, + { 0x1D53A, 0x1D53A }, { 0x1D53F, 0x1D53F }, { 0x1D545, 0x1D545 }, + { 0x1D547, 0x1D549 }, { 0x1D551, 0x1D551 }, { 0x1D6A6, 0x1D6A7 }, + { 0x1D7CC, 0x1D7CD }, { 0x1D800, 0x1EDFF }, { 0x1EE04, 0x1EE04 }, + { 0x1EE20, 0x1EE20 }, { 0x1EE23, 0x1EE23 }, { 0x1EE25, 0x1EE26 }, + { 0x1EE28, 0x1EE28 }, { 0x1EE33, 0x1EE33 }, { 0x1EE38, 0x1EE38 }, + { 0x1EE3A, 0x1EE3A }, { 0x1EE3C, 0x1EE41 }, { 0x1EE43, 0x1EE46 }, + { 0x1EE48, 0x1EE48 }, { 0x1EE4A, 0x1EE4A }, { 0x1EE4C, 0x1EE4C }, + { 0x1EE50, 0x1EE50 }, { 0x1EE53, 0x1EE53 }, { 0x1EE55, 0x1EE56 }, + { 0x1EE58, 0x1EE58 }, { 0x1EE5A, 0x1EE5A }, { 0x1EE5C, 0x1EE5C }, + { 0x1EE5E, 0x1EE5E }, { 0x1EE60, 0x1EE60 }, { 0x1EE63, 0x1EE63 }, + { 0x1EE65, 0x1EE66 }, { 0x1EE6B, 0x1EE6B }, { 0x1EE73, 0x1EE73 }, + { 0x1EE78, 0x1EE78 }, { 0x1EE7D, 0x1EE7D }, { 0x1EE7F, 0x1EE7F }, + { 0x1EE8A, 0x1EE8A }, { 0x1EE9C, 0x1EEA0 }, { 0x1EEA4, 0x1EEA4 }, + { 0x1EEAA, 0x1EEAA }, { 0x1EEBC, 0x1EEEF }, { 0x1EEF2, 0x1EFFF }, + { 0x1F02C, 0x1F02F }, { 0x1F094, 0x1F09F }, { 0x1F0AF, 0x1F0B0 }, + { 0x1F0BF, 0x1F0C0 }, { 0x1F0D0, 0x1F0D0 }, { 0x1F0E0, 0x1F0FF }, + { 0x1F10B, 0x1F10F }, { 0x1F12F, 0x1F12F }, { 0x1F16C, 0x1F16F }, + { 0x1F19B, 0x1F1E5 }, { 0x1F203, 0x1F20F }, { 0x1F23B, 0x1F23F }, + { 0x1F249, 0x1F24F }, { 0x1F252, 0x1F2FF }, { 0x1F321, 0x1F32F }, + { 0x1F336, 0x1F336 }, { 0x1F37D, 0x1F37F }, { 0x1F394, 0x1F39F }, + { 0x1F3C5, 0x1F3C5 }, { 0x1F3CB, 0x1F3DF }, { 0x1F3F1, 0x1F3FF }, + { 0x1F43F, 0x1F43F }, { 0x1F441, 0x1F441 }, { 0x1F4F8, 0x1F4F8 }, + { 0x1F4FD, 0x1F4FF }, { 0x1F53E, 0x1F53F }, { 0x1F544, 0x1F54F }, + { 0x1F568, 0x1F5FA }, { 0x1F641, 0x1F644 }, { 0x1F650, 0x1F67F }, + { 0x1F6C6, 0x1F6FF }, { 0x1F774, 0x1FFFF }, { 0x2A6D7, 0x2A6FF }, + { 0x2B735, 0x2B73F }, { 0x2B81E, 0x2F7FF }, { 0x2FA1E, 0xF0000 }, + { 0xFFFFE, 0xFFFFF }, { 0x10FFFE, 0x10FFFF } + }; + static const UnicodeCharSet NonPrintables(NonPrintableRanges); + + return UCS >= 0 && UCS <= 0x10FFFF && !NonPrintables.contains(UCS); +} + +/// Gets the number of positions a character is likely to occupy when output +/// on a terminal ("character width"). This depends on the implementation of the +/// terminal, and there's no standard definition of character width. +/// The implementation defines it in a way that is expected to be compatible +/// with a generic Unicode-capable terminal. +/// \return Character width: +/// * ErrorNonPrintableCharacter (-1) for non-printable characters (as +/// identified by isPrintable); +/// * 0 for non-spacing and enclosing combining marks; +/// * 2 for CJK characters excluding halfwidth forms; +/// * 1 for all remaining characters. +static inline int charWidth(int UCS) +{ + if (!isPrintable(UCS)) + return ErrorNonPrintableCharacter; + + // Sorted list of non-spacing and enclosing combining mark intervals as + // defined in "3.6 Combination" of + // http://www.unicode.org/versions/Unicode6.2.0/UnicodeStandard-6.2.pdf + static const UnicodeCharRange CombiningCharacterRanges[] = { + { 0x0300, 0x036F }, { 0x0483, 0x0489 }, { 0x0591, 0x05BD }, + { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, { 0x05C4, 0x05C5 }, + { 0x05C7, 0x05C7 }, { 0x0610, 0x061A }, { 0x064B, 0x065F }, + { 0x0670, 0x0670 }, { 0x06D6, 0x06DC }, { 0x06DF, 0x06E4 }, + { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED }, { 0x0711, 0x0711 }, + { 0x0730, 0x074A }, { 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, + { 0x0816, 0x0819 }, { 0x081B, 0x0823 }, { 0x0825, 0x0827 }, + { 0x0829, 0x082D }, { 0x0859, 0x085B }, { 0x08E4, 0x08FE }, + { 0x0900, 0x0902 }, { 0x093A, 0x093A }, { 0x093C, 0x093C }, + { 0x0941, 0x0948 }, { 0x094D, 0x094D }, { 0x0951, 0x0957 }, + { 0x0962, 0x0963 }, { 0x0981, 0x0981 }, { 0x09BC, 0x09BC }, + { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 }, + { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 }, + { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D }, { 0x0A51, 0x0A51 }, + { 0x0A70, 0x0A71 }, { 0x0A75, 0x0A75 }, { 0x0A81, 0x0A82 }, + { 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, + { 0x0ACD, 0x0ACD }, { 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, + { 0x0B3C, 0x0B3C }, { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B44 }, + { 0x0B4D, 0x0B4D }, { 0x0B56, 0x0B56 }, { 0x0B62, 0x0B63 }, + { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 }, { 0x0BCD, 0x0BCD }, + { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 }, { 0x0C4A, 0x0C4D }, + { 0x0C55, 0x0C56 }, { 0x0C62, 0x0C63 }, { 0x0CBC, 0x0CBC }, + { 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, + { 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D44 }, { 0x0D4D, 0x0D4D }, + { 0x0D62, 0x0D63 }, { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, + { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, + { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, + { 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, + { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, + { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, + { 0x0F8D, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, + { 0x102D, 0x1030 }, { 0x1032, 0x1037 }, { 0x1039, 0x103A }, + { 0x103D, 0x103E }, { 0x1058, 0x1059 }, { 0x105E, 0x1060 }, + { 0x1071, 0x1074 }, { 0x1082, 0x1082 }, { 0x1085, 0x1086 }, + { 0x108D, 0x108D }, { 0x109D, 0x109D }, { 0x135D, 0x135F }, + { 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 }, + { 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD }, + { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD }, + { 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 }, + { 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B }, + { 0x1A17, 0x1A18 }, { 0x1A56, 0x1A56 }, { 0x1A58, 0x1A5E }, + { 0x1A60, 0x1A60 }, { 0x1A62, 0x1A62 }, { 0x1A65, 0x1A6C }, + { 0x1A73, 0x1A7C }, { 0x1A7F, 0x1A7F }, { 0x1B00, 0x1B03 }, + { 0x1B34, 0x1B34 }, { 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, + { 0x1B42, 0x1B42 }, { 0x1B6B, 0x1B73 }, { 0x1B80, 0x1B81 }, + { 0x1BA2, 0x1BA5 }, { 0x1BA8, 0x1BA9 }, { 0x1BAB, 0x1BAB }, + { 0x1BE6, 0x1BE6 }, { 0x1BE8, 0x1BE9 }, { 0x1BED, 0x1BED }, + { 0x1BEF, 0x1BF1 }, { 0x1C2C, 0x1C33 }, { 0x1C36, 0x1C37 }, + { 0x1CD0, 0x1CD2 }, { 0x1CD4, 0x1CE0 }, { 0x1CE2, 0x1CE8 }, + { 0x1CED, 0x1CED }, { 0x1CF4, 0x1CF4 }, { 0x1DC0, 0x1DE6 }, + { 0x1DFC, 0x1DFF }, { 0x20D0, 0x20F0 }, { 0x2CEF, 0x2CF1 }, + { 0x2D7F, 0x2D7F }, { 0x2DE0, 0x2DFF }, { 0x302A, 0x302D }, + { 0x3099, 0x309A }, { 0xA66F, 0xA672 }, { 0xA674, 0xA67D }, + { 0xA69F, 0xA69F }, { 0xA6F0, 0xA6F1 }, { 0xA802, 0xA802 }, + { 0xA806, 0xA806 }, { 0xA80B, 0xA80B }, { 0xA825, 0xA826 }, + { 0xA8C4, 0xA8C4 }, { 0xA8E0, 0xA8F1 }, { 0xA926, 0xA92D }, + { 0xA947, 0xA951 }, { 0xA980, 0xA982 }, { 0xA9B3, 0xA9B3 }, + { 0xA9B6, 0xA9B9 }, { 0xA9BC, 0xA9BC }, { 0xAA29, 0xAA2E }, + { 0xAA31, 0xAA32 }, { 0xAA35, 0xAA36 }, { 0xAA43, 0xAA43 }, + { 0xAA4C, 0xAA4C }, { 0xAAB0, 0xAAB0 }, { 0xAAB2, 0xAAB4 }, + { 0xAAB7, 0xAAB8 }, { 0xAABE, 0xAABF }, { 0xAAC1, 0xAAC1 }, + { 0xAAEC, 0xAAED }, { 0xAAF6, 0xAAF6 }, { 0xABE5, 0xABE5 }, + { 0xABE8, 0xABE8 }, { 0xABED, 0xABED }, { 0xFB1E, 0xFB1E }, + { 0xFE00, 0xFE0F }, { 0xFE20, 0xFE26 }, { 0x101FD, 0x101FD }, + { 0x10A01, 0x10A03 }, { 0x10A05, 0x10A06 }, { 0x10A0C, 0x10A0F }, + { 0x10A38, 0x10A3A }, { 0x10A3F, 0x10A3F }, { 0x11001, 0x11001 }, + { 0x11038, 0x11046 }, { 0x11080, 0x11081 }, { 0x110B3, 0x110B6 }, + { 0x110B9, 0x110BA }, { 0x11100, 0x11102 }, { 0x11127, 0x1112B }, + { 0x1112D, 0x11134 }, { 0x11180, 0x11181 }, { 0x111B6, 0x111BE }, + { 0x116AB, 0x116AB }, { 0x116AD, 0x116AD }, { 0x116B0, 0x116B5 }, + { 0x116B7, 0x116B7 }, { 0x16F8F, 0x16F92 }, { 0x1D167, 0x1D169 }, + { 0x1D17B, 0x1D182 }, { 0x1D185, 0x1D18B }, { 0x1D1AA, 0x1D1AD }, + { 0x1D242, 0x1D244 }, { 0xE0100, 0xE01EF }, + }; + static const UnicodeCharSet CombiningCharacters(CombiningCharacterRanges); + + if (CombiningCharacters.contains(UCS)) + return 0; + + static const UnicodeCharRange DoubleWidthCharacterRanges[] = { + // Hangul Jamo + { 0x1100, 0x11FF }, + // Deprecated fullwidth angle brackets + { 0x2329, 0x232A }, + // CJK Misc, CJK Unified Ideographs, Yijing Hexagrams, Yi + // excluding U+303F (IDEOGRAPHIC HALF FILL SPACE) + { 0x2E80, 0x303E }, { 0x3040, 0xA4CF }, + // Hangul + { 0xAC00, 0xD7A3 }, { 0xD7B0, 0xD7C6 }, { 0xD7CB, 0xD7FB }, + // CJK Unified Ideographs + { 0xF900, 0xFAFF }, + // Vertical forms + { 0xFE10, 0xFE19 }, + // CJK Compatibility Forms + Small Form Variants + { 0xFE30, 0xFE6F }, + // Fullwidth forms + { 0xFF01, 0xFF60 }, { 0xFFE0, 0xFFE6 }, + // CJK Unified Ideographs + { 0x20000, 0x2A6DF }, { 0x2A700, 0x2B81F }, { 0x2F800, 0x2FA1F } + }; + static const UnicodeCharSet DoubleWidthCharacters(DoubleWidthCharacterRanges); + + if (DoubleWidthCharacters.contains(UCS)) + return 2; + return 1; +} + +int columnWidthUTF8(StringRef Text) { + unsigned ColumnWidth = 0; + unsigned Length; + for (size_t i = 0, e = Text.size(); i < e; i += Length) { + Length = getNumBytesForUTF8(Text[i]); + if (Length <= 0 || i + Length > Text.size()) + return ErrorInvalidUTF8; + UTF32 buf[1]; + const UTF8 *Start = reinterpret_cast<const UTF8 *>(Text.data() + i); + UTF32 *Target = &buf[0]; + if (conversionOK != ConvertUTF8toUTF32(&Start, Start + Length, &Target, + Target + 1, strictConversion)) + return ErrorInvalidUTF8; + int Width = charWidth(buf[0]); + if (Width < 0) + return ErrorNonPrintableCharacter; + ColumnWidth += Width; + } + return ColumnWidth; +} + +} // namespace unicode +} // namespace sys +} // namespace llvm + diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc index 4dcfa09..c9dc871 100644 --- a/lib/Support/Unix/Path.inc +++ b/lib/Support/Unix/Path.inc @@ -182,7 +182,7 @@ namespace sys { namespace fs { #if defined(__FreeBSD__) || defined (__NetBSD__) || defined(__Bitrig__) || \ defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__) || \ - defined(__linux__) || defined(__CYGWIN__) + defined(__linux__) || defined(__CYGWIN__) || defined(__DragonFly__) static int test_dir(char buf[PATH_MAX], char ret[PATH_MAX], const char *dir, const char *bin) @@ -251,7 +251,8 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) { return link_path; } #elif defined(__FreeBSD__) || defined (__NetBSD__) || defined(__Bitrig__) || \ - defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__) + defined(__OpenBSD__) || defined(__minix) || defined(__DragonFly__) || \ + defined(__FreeBSD_kernel__) char exe_path[PATH_MAX]; if (getprogpath(exe_path, argv0) != NULL) @@ -298,6 +299,18 @@ UniqueID file_status::getUniqueID() const { } error_code current_path(SmallVectorImpl<char> &result) { + result.clear(); + + const char *pwd = ::getenv("PWD"); + llvm::sys::fs::file_status PWDStatus, DotStatus; + if (pwd && llvm::sys::path::is_absolute(pwd) && + !llvm::sys::fs::status(pwd, PWDStatus) && + !llvm::sys::fs::status(".", DotStatus) && + PWDStatus.getUniqueID() == DotStatus.getUniqueID()) { + result.append(pwd, pwd + strlen(pwd)); + return error_code::success(); + } + #ifdef MAXPATHLEN result.reserve(MAXPATHLEN); #else diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc index 0a797f6..c5778e7 100644 --- a/lib/Support/Unix/Process.inc +++ b/lib/Support/Unix/Process.inc @@ -13,6 +13,7 @@ #include "Unix.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/MutexGuard.h" #include "llvm/Support/TimeValue.h" @@ -38,25 +39,6 @@ # include <termios.h> #endif -// See if we can use curses to detect information about a terminal when -// connected to one. -#ifdef HAVE_CURSES -# if defined(HAVE_CURSES_H) -# include <curses.h> -# elif defined(HAVE_NCURSES_H) -# include <ncurses.h> -# elif defined(HAVE_NCURSESW_H) -# include <ncursesw.h> -# elif defined(HAVE_NCURSES_CURSES_H) -# include <ncurses/curses.h> -# elif defined(HAVE_NCURSESW_CURSES_H) -# include <ncursesw/curses.h> -# else -# error Have a curses library but unable to find a curses header! -# endif -# include <term.h> -#endif - //===----------------------------------------------------------------------===// //=== WARNING: Implementation here must contain only generic UNIX code that //=== is guaranteed to work on *all* UNIX variants. @@ -107,13 +89,10 @@ TimeValue self_process::get_system_time() const { return getRUsageTimes().second; } +// On Cygwin, getpagesize() returns 64k(AllocationGranularity) and +// offset in mmap(3) should be aligned to the AllocationGranularity. static unsigned getPageSize() { -#if defined(__CYGWIN__) - // On Cygwin, getpagesize() returns 64k but the page size for the purposes of - // memory protection and mmap() is 4k. - // See http://www.cygwin.com/ml/cygwin/2009-01/threads.html#00492 - const int page_size = 0x1000; -#elif defined(HAVE_GETPAGESIZE) +#if defined(HAVE_GETPAGESIZE) const int page_size = ::getpagesize(); #elif defined(HAVE_SYSCONF) long page_size = ::sysconf(_SC_PAGE_SIZE); @@ -159,14 +138,6 @@ void Process::GetTimeUsage(TimeValue &elapsed, TimeValue &user_time, llvm::tie(user_time, sys_time) = getRUsageTimes(); } -int Process::GetCurrentUserId() { - return getuid(); -} - -int Process::GetCurrentGroupId() { - return getgid(); -} - #if defined(HAVE_MACH_MACH_H) && !defined(__GNU__) #include <mach/mach.h> #endif @@ -211,6 +182,22 @@ void Process::PreventCoreFiles() { #endif } +Optional<std::string> Process::GetEnv(StringRef Name) { + std::string NameStr = Name.str(); + const char *Val = ::getenv(NameStr.c_str()); + if (!Val) + return None; + return std::string(Val); +} + +error_code Process::GetArgumentVector(SmallVectorImpl<const char *> &ArgsOut, + ArrayRef<const char *> ArgsIn, + SpecificBumpPtrAllocator<char> &) { + ArgsOut.append(ArgsIn.begin(), ArgsIn.end()); + + return error_code::success(); +} + bool Process::StandardInIsUserInput() { return FileDescriptorIsDisplayed(STDIN_FILENO); } @@ -266,21 +253,50 @@ unsigned Process::StandardErrColumns() { return getColumns(2); } +#ifdef HAVE_TERMINFO +// We manually declare these extern functions because finding the correct +// headers from various terminfo, curses, or other sources is harder than +// writing their specs down. +extern "C" int setupterm(char *term, int filedes, int *errret); +extern "C" struct term *set_curterm(struct term *termp); +extern "C" int del_curterm(struct term *termp); +extern "C" int tigetnum(char *capname); +#endif + static bool terminalHasColors(int fd) { -#ifdef HAVE_CURSES - // First, acquire a global lock because the curses C routines are thread - // hostile. +#ifdef HAVE_TERMINFO + // First, acquire a global lock because these C routines are thread hostile. static sys::Mutex M; MutexGuard G(M); int errret = 0; - if (setupterm((char *)0, fd, &errret) != OK) + if (setupterm((char *)0, fd, &errret) != 0) // Regardless of why, if we can't get terminfo, we shouldn't try to print // colors. return false; - // Test whether the terminal as set up supports color output. - if (has_colors() == TRUE) + // Test whether the terminal as set up supports color output. How to do this + // isn't entirely obvious. We can use the curses routine 'has_colors' but it + // would be nice to avoid a dependency on curses proper when we can make do + // with a minimal terminfo parsing library. Also, we don't really care whether + // the terminal supports the curses-specific color changing routines, merely + // if it will interpret ANSI color escape codes in a reasonable way. Thus, the + // strategy here is just to query the baseline colors capability and if it + // supports colors at all to assume it will translate the escape codes into + // whatever range of colors it does support. We can add more detailed tests + // here if users report them as necessary. + // + // The 'tigetnum' routine returns -2 or -1 on errors, and might return 0 if + // the terminfo says that no colors are supported. + bool HasColors = tigetnum(const_cast<char *>("colors")) > 0; + + // Now extract the structure allocated by setupterm and free its memory + // through a really silly dance. + struct term *termp = set_curterm((struct term *)0); + (void)del_curterm(termp); // Drop any errors here. + + // Return true if we found a color capabilities for the current terminal. + if (HasColors) return true; #endif @@ -302,29 +318,15 @@ bool Process::StandardErrHasColors() { return FileDescriptorHasColors(STDERR_FILENO); } +void Process::UseANSIEscapeCodes(bool /*enable*/) { + // No effect. +} + bool Process::ColorNeedsFlush() { // No, we use ANSI escape sequences. return false; } -#define COLOR(FGBG, CODE, BOLD) "\033[0;" BOLD FGBG CODE "m" - -#define ALLCOLORS(FGBG,BOLD) {\ - COLOR(FGBG, "0", BOLD),\ - COLOR(FGBG, "1", BOLD),\ - COLOR(FGBG, "2", BOLD),\ - COLOR(FGBG, "3", BOLD),\ - COLOR(FGBG, "4", BOLD),\ - COLOR(FGBG, "5", BOLD),\ - COLOR(FGBG, "6", BOLD),\ - COLOR(FGBG, "7", BOLD)\ - } - -static const char colorcodes[2][2][8][10] = { - { ALLCOLORS("3",""), ALLCOLORS("3","1;") }, - { ALLCOLORS("4",""), ALLCOLORS("4","1;") } -}; - const char *Process::OutputColor(char code, bool bold, bool bg) { return colorcodes[bg?1:0][bold?1:0][code&7]; } diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc index a93a912..78b2971 100644 --- a/lib/Support/Unix/Program.inc +++ b/lib/Support/Unix/Program.inc @@ -36,6 +36,9 @@ #include <unistd.h> #endif #ifdef HAVE_POSIX_SPAWN +#ifdef __sun__ +#define _RESTRICT_KYWD +#endif #include <spawn.h> #if !defined(__APPLE__) extern char **environ; @@ -47,6 +50,8 @@ namespace llvm { using namespace sys; +ProcessInfo::ProcessInfo() : Pid(0), ReturnCode(0) {} + // This function just uses the PATH environment variable to find the program. std::string sys::FindProgramByName(const std::string& progName) { @@ -175,9 +180,16 @@ static void SetMemoryLimits (unsigned size) } -static bool Execute(void **Data, StringRef Program, const char **args, +static bool Execute(ProcessInfo &PI, StringRef Program, const char **args, const char **envp, const StringRef **redirects, unsigned memoryLimit, std::string *ErrMsg) { + if (!llvm::sys::fs::exists(Program)) { + if (ErrMsg) + *ErrMsg = std::string("Executable \"") + Program.str() + + std::string("\" doesn't exist!"); + return false; + } + // If this OS has posix_spawn and there is no memory limit being implied, use // posix_spawn. It is more efficient than fork/exec. #ifdef HAVE_POSIX_SPAWN @@ -239,8 +251,8 @@ static bool Execute(void **Data, StringRef Program, const char **args, if (Err) return !MakeErrMsg(ErrMsg, "posix_spawn failed", Err); - if (Data) - *Data = reinterpret_cast<void*>(PID); + PI.Pid = PID; + return true; } #endif @@ -303,56 +315,71 @@ static bool Execute(void **Data, StringRef Program, const char **args, break; } - if (Data) - *Data = reinterpret_cast<void*>(child); + PI.Pid = child; return true; } -static int Wait(void *&Data, StringRef Program, unsigned secondsToWait, - std::string *ErrMsg) { +namespace llvm { + +ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait, + bool WaitUntilTerminates, std::string *ErrMsg) { #ifdef HAVE_SYS_WAIT_H struct sigaction Act, Old; - assert(Data && "invalid pid to wait on, process not started?"); - - // Install a timeout handler. The handler itself does nothing, but the simple - // fact of having a handler at all causes the wait below to return with EINTR, - // unlike if we used SIG_IGN. - if (secondsToWait) { + assert(PI.Pid && "invalid pid to wait on, process not started?"); + + int WaitPidOptions = 0; + pid_t ChildPid = PI.Pid; + if (WaitUntilTerminates) { + SecondsToWait = 0; + ChildPid = -1; // mimic a wait() using waitpid() + } else if (SecondsToWait) { + // Install a timeout handler. The handler itself does nothing, but the + // simple fact of having a handler at all causes the wait below to return + // with EINTR, unlike if we used SIG_IGN. memset(&Act, 0, sizeof(Act)); Act.sa_handler = TimeOutHandler; sigemptyset(&Act.sa_mask); sigaction(SIGALRM, &Act, &Old); - alarm(secondsToWait); - } + alarm(SecondsToWait); + } else if (SecondsToWait == 0) + WaitPidOptions = WNOHANG; // Parent process: Wait for the child process to terminate. int status; - uint64_t pid = reinterpret_cast<uint64_t>(Data); - pid_t child = static_cast<pid_t>(pid); - while (waitpid(pid, &status, 0) != child) - if (secondsToWait && errno == EINTR) { - // Kill the child. - kill(child, SIGKILL); - - // Turn off the alarm and restore the signal handler - alarm(0); - sigaction(SIGALRM, &Old, 0); - - // Wait for child to die - if (wait(&status) != child) - MakeErrMsg(ErrMsg, "Child timed out but wouldn't die"); - else - MakeErrMsg(ErrMsg, "Child timed out", 0); - - return -2; // Timeout detected - } else if (errno != EINTR) { - MakeErrMsg(ErrMsg, "Error waiting for child process"); - return -1; + ProcessInfo WaitResult; + WaitResult.Pid = waitpid(ChildPid, &status, WaitPidOptions); + if (WaitResult.Pid != PI.Pid) { + if (WaitResult.Pid == 0) { + // Non-blocking wait. + return WaitResult; + } else { + if (SecondsToWait && errno == EINTR) { + // Kill the child. + kill(PI.Pid, SIGKILL); + + // Turn off the alarm and restore the signal handler + alarm(0); + sigaction(SIGALRM, &Old, 0); + + // Wait for child to die + if (wait(&status) != ChildPid) + MakeErrMsg(ErrMsg, "Child timed out but wouldn't die"); + else + MakeErrMsg(ErrMsg, "Child timed out", 0); + + WaitResult.ReturnCode = -2; // Timeout detected + return WaitResult; + } else if (errno != EINTR) { + MakeErrMsg(ErrMsg, "Error waiting for child process"); + WaitResult.ReturnCode = -1; + return WaitResult; + } } + } // We exited normally without timeout, so turn off the timer. - if (secondsToWait) { + if (SecondsToWait && !WaitUntilTerminates) { alarm(0); sigaction(SIGALRM, &Old, 0); } @@ -362,24 +389,19 @@ static int Wait(void *&Data, StringRef Program, unsigned secondsToWait, int result = 0; if (WIFEXITED(status)) { result = WEXITSTATUS(status); -#ifdef HAVE_POSIX_SPAWN - // The posix_spawn child process returns 127 on any kind of error. - // Following the POSIX convention for command-line tools (which posix_spawn - // itself apparently does not), check to see if the failure was due to some - // reason other than the file not existing, and return 126 in this case. - bool Exists; - if (result == 127 && !llvm::sys::fs::exists(Program, Exists) && Exists) - result = 126; -#endif + WaitResult.ReturnCode = result; + if (result == 127) { if (ErrMsg) *ErrMsg = llvm::sys::StrError(ENOENT); - return -1; + WaitResult.ReturnCode = -1; + return WaitResult; } if (result == 126) { if (ErrMsg) *ErrMsg = "Program could not be executed"; - return -1; + WaitResult.ReturnCode = -1; + return WaitResult; } } else if (WIFSIGNALED(status)) { if (ErrMsg) { @@ -391,18 +413,16 @@ static int Wait(void *&Data, StringRef Program, unsigned secondsToWait, } // Return a special value to indicate that the process received an unhandled // signal during execution as opposed to failing to execute. - return -2; + WaitResult.ReturnCode = -2; } - return result; #else if (ErrMsg) *ErrMsg = "Program::Wait is not implemented on this platform yet!"; - return -1; + WaitResult.ReturnCode = -2; #endif + return WaitResult; } -namespace llvm { - error_code sys::ChangeStdinToBinary(){ // Do nothing, as Unix doesn't differentiate between text and binary. return make_error_code(errc::success); @@ -438,5 +458,4 @@ bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) { } return true; } - } diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc index 800a6a7..13ae862 100644 --- a/lib/Support/Unix/Signals.inc +++ b/lib/Support/Unix/Signals.inc @@ -333,7 +333,7 @@ static void PrintStackTraceSignalHandler(void *) { void llvm::sys::PrintStackTraceOnErrorSignal() { AddSignalHandler(PrintStackTraceSignalHandler, 0); -#if defined(__APPLE__) && !defined(ANDROID) +#if defined(__APPLE__) && defined(ENABLE_CRASH_OVERRIDES) // Environment variable to disable any kind of crash dialog. if (getenv("LLVM_DISABLE_CRASH_REPORT")) { mach_port_t self = mach_task_self(); @@ -359,7 +359,7 @@ void llvm::sys::PrintStackTraceOnErrorSignal() { // the same linkage unit by just defining our own versions of the assert handler // and abort. -#if defined(__APPLE__) && !defined(ANDROID) +#if defined(__APPLE__) && defined(ENABLE_CRASH_OVERRIDES) #include <signal.h> #include <pthread.h> diff --git a/lib/Support/Unix/ThreadLocal.inc b/lib/Support/Unix/ThreadLocal.inc index 2b4c901..f14d0fa 100644 --- a/lib/Support/Unix/ThreadLocal.inc +++ b/lib/Support/Unix/ThreadLocal.inc @@ -18,7 +18,7 @@ namespace llvm { using namespace sys; -ThreadLocalImpl::ThreadLocalImpl() { } +ThreadLocalImpl::ThreadLocalImpl() : data() { } ThreadLocalImpl::~ThreadLocalImpl() { } void ThreadLocalImpl::setInstance(const void* d) { data = const_cast<void*>(d);} const void* ThreadLocalImpl::getInstance() { return data; } diff --git a/lib/Support/Unix/Unix.h b/lib/Support/Unix/Unix.h index dd11c04..ba688e3 100644 --- a/lib/Support/Unix/Unix.h +++ b/lib/Support/Unix/Unix.h @@ -47,6 +47,10 @@ # include <sys/wait.h> #endif +#ifdef HAVE_DLFCN_H +# include <dlfcn.h> +#endif + #ifndef WEXITSTATUS # define WEXITSTATUS(stat_val) ((unsigned)(stat_val) >> 8) #endif diff --git a/lib/Support/Windows/DynamicLibrary.inc b/lib/Support/Windows/DynamicLibrary.inc index 83da82a..5a7b219 100644 --- a/lib/Support/Windows/DynamicLibrary.inc +++ b/lib/Support/Windows/DynamicLibrary.inc @@ -71,7 +71,7 @@ extern "C" { DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename, std::string *errMsg) { - SmartScopedLock<true> lock(getMutex()); + SmartScopedLock<true> lock(*SymbolsMutex); if (!filename) { // When no file is specified, enumerate all DLLs and EXEs in the process. @@ -83,8 +83,15 @@ DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename, // This is mostly to ensure that the return value still shows up as "valid". return DynamicLibrary(&OpenedHandles); } + + SmallVector<wchar_t, MAX_PATH> filenameUnicode; + if (error_code ec = windows::UTF8ToUTF16(filename, filenameUnicode)) { + SetLastError(ec.value()); + MakeErrMsg(errMsg, std::string(filename) + ": Can't convert to UTF-16: "); + return DynamicLibrary(); + } - HMODULE a_handle = LoadLibrary(filename); + HMODULE a_handle = LoadLibraryW(filenameUnicode.data()); if (a_handle == 0) { MakeErrMsg(errMsg, std::string(filename) + ": Can't open : "); @@ -114,10 +121,10 @@ DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename, #undef EXPLICIT_SYMBOL2 void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) { - SmartScopedLock<true> Lock(getMutex()); + SmartScopedLock<true> Lock(*SymbolsMutex); // First check symbols added via AddSymbol(). - if (ExplicitSymbols) { + if (ExplicitSymbols.isConstructed()) { StringMap<void *>::iterator i = ExplicitSymbols->find(symbolName); if (i != ExplicitSymbols->end()) diff --git a/lib/Support/Windows/Memory.inc b/lib/Support/Windows/Memory.inc index 4c5aebd..1260452 100644 --- a/lib/Support/Windows/Memory.inc +++ b/lib/Support/Windows/Memory.inc @@ -82,7 +82,7 @@ MemoryBlock Memory::allocateMappedMemory(size_t NumBytes, uintptr_t Start = NearBlock ? reinterpret_cast<uintptr_t>(NearBlock->base()) + NearBlock->size() - : NULL; + : 0; // If the requested address is not aligned to the allocation granularity, // round up to get beyond NearBlock. VirtualAlloc would have rounded down. @@ -106,7 +106,7 @@ MemoryBlock Memory::allocateMappedMemory(size_t NumBytes, MemoryBlock Result; Result.Address = PA; Result.Size = NumBlocks*Granularity; - ; + if (Flags & MF_EXEC) Memory::InvalidateInstructionCache(Result.Address, Result.Size); diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc index 1694cb2..0b39198 100644 --- a/lib/Support/Windows/Path.inc +++ b/lib/Support/Windows/Path.inc @@ -37,70 +37,18 @@ typedef int errno_t; using namespace llvm; +using llvm::sys::windows::UTF8ToUTF16; +using llvm::sys::windows::UTF16ToUTF8; + namespace { typedef BOOLEAN (WINAPI *PtrCreateSymbolicLinkW)( /*__in*/ LPCWSTR lpSymlinkFileName, /*__in*/ LPCWSTR lpTargetFileName, /*__in*/ DWORD dwFlags); - PtrCreateSymbolicLinkW create_symbolic_link_api = PtrCreateSymbolicLinkW( - ::GetProcAddress(::GetModuleHandleA("kernel32.dll"), - "CreateSymbolicLinkW")); - - error_code UTF8ToUTF16(StringRef utf8, SmallVectorImpl<wchar_t> &utf16) { - int len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, - utf8.begin(), utf8.size(), - utf16.begin(), 0); - - if (len == 0) - return windows_error(::GetLastError()); - - utf16.reserve(len + 1); - utf16.set_size(len); - - len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, - utf8.begin(), utf8.size(), - utf16.begin(), utf16.size()); - - if (len == 0) - return windows_error(::GetLastError()); - - // Make utf16 null terminated. - utf16.push_back(0); - utf16.pop_back(); - - return error_code::success(); - } - - error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len, - SmallVectorImpl<char> &utf8) { - // Get length. - int len = ::WideCharToMultiByte(CP_UTF8, 0, - utf16, utf16_len, - utf8.begin(), 0, - NULL, NULL); - - if (len == 0) - return windows_error(::GetLastError()); - - utf8.reserve(len); - utf8.set_size(len); - - // Now do the actual conversion. - len = ::WideCharToMultiByte(CP_UTF8, 0, - utf16, utf16_len, - utf8.data(), utf8.size(), - NULL, NULL); - - if (len == 0) - return windows_error(::GetLastError()); - - // Make utf8 null terminated. - utf8.push_back(0); - utf8.pop_back(); - - return error_code::success(); - } + PtrCreateSymbolicLinkW create_symbolic_link_api = + PtrCreateSymbolicLinkW(::GetProcAddress( + ::GetModuleHandleW(L"Kernel32.dll"), "CreateSymbolicLinkW")); error_code TempDir(SmallVectorImpl<wchar_t> &result) { retry_temp_dir: @@ -180,7 +128,7 @@ retry_random_path: BYTE val = 0; if (!::CryptGenRandom(CryptoProvider, 1, &val)) return windows_error(::GetLastError()); - random_path_utf16.push_back("0123456789abcdef"[val & 15]); + random_path_utf16.push_back(L"0123456789abcdef"[val & 15]); } else random_path_utf16.push_back(*i); @@ -268,9 +216,28 @@ namespace sys { namespace fs { std::string getMainExecutable(const char *argv0, void *MainExecAddr) { - char pathname[MAX_PATH]; - DWORD ret = ::GetModuleFileNameA(NULL, pathname, MAX_PATH); - return ret != MAX_PATH ? pathname : ""; + SmallVector<wchar_t, MAX_PATH> PathName; + DWORD Size = ::GetModuleFileNameW(NULL, PathName.data(), PathName.capacity()); + + // A zero return value indicates a failure other than insufficient space. + if (Size == 0) + return ""; + + // Insufficient space is determined by a return value equal to the size of + // the buffer passed in. + if (Size == PathName.capacity()) + return ""; + + // On success, GetModuleFileNameW returns the number of characters written to + // the buffer not including the NULL terminator. + PathName.set_size(Size); + + // Convert the result from UTF-16 to UTF-8. + SmallVector<char, MAX_PATH> PathNameUTF8; + if (UTF16ToUTF8(PathName.data(), PathName.size(), PathNameUTF8)) + return ""; + + return std::string(PathNameUTF8.data()); } UniqueID file_status::getUniqueID() const { @@ -293,47 +260,25 @@ TimeValue file_status::getLastModificationTime() const { } error_code current_path(SmallVectorImpl<char> &result) { - SmallVector<wchar_t, 128> cur_path; - cur_path.reserve(128); -retry_cur_dir: - DWORD len = ::GetCurrentDirectoryW(cur_path.capacity(), cur_path.data()); - - // A zero return value indicates a failure other than insufficient space. - if (len == 0) - return windows_error(::GetLastError()); + SmallVector<wchar_t, MAX_PATH> cur_path; + DWORD len = MAX_PATH; - // If there's insufficient space, the len returned is larger than the len - // given. - if (len > cur_path.capacity()) { + do { cur_path.reserve(len); - goto retry_cur_dir; - } - - cur_path.set_size(len); - // cur_path now holds the current directory in utf-16. Convert to utf-8. + len = ::GetCurrentDirectoryW(cur_path.capacity(), cur_path.data()); - // Find out how much space we need. Sadly, this function doesn't return the - // size needed unless you tell it the result size is 0, which means you - // _always_ have to call it twice. - len = ::WideCharToMultiByte(CP_UTF8, 0, - cur_path.data(), cur_path.size(), - result.data(), 0, - NULL, NULL); - - if (len == 0) - return make_error_code(windows_error(::GetLastError())); + // A zero return value indicates a failure other than insufficient space. + if (len == 0) + return windows_error(::GetLastError()); - result.reserve(len); - result.set_size(len); - // Now do the actual conversion. - len = ::WideCharToMultiByte(CP_UTF8, 0, - cur_path.data(), cur_path.size(), - result.data(), result.size(), - NULL, NULL); - if (len == 0) - return windows_error(::GetLastError()); + // If there's insufficient space, the len returned is larger than the len + // given. + } while (len > cur_path.capacity()); - return error_code::success(); + // On success, GetCurrentDirectoryW returns the number of characters not + // including the null-terminator. + cur_path.set_size(len); + return UTF16ToUTF8(cur_path.begin(), cur_path.size(), result); } error_code create_directory(const Twine &path, bool &existed) { @@ -746,12 +691,11 @@ error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) { case priv: flprotect = PAGE_WRITECOPY; break; } - FileMappingHandle = ::CreateFileMapping(FileHandle, - 0, - flprotect, - Size >> 32, - Size & 0xffffffff, - 0); + FileMappingHandle = + ::CreateFileMappingW(FileHandle, 0, flprotect, + (Offset + Size) >> 32, + (Offset + Size) & 0xffffffff, + 0); if (FileMappingHandle == NULL) { error_code ec = windows_error(GetLastError()); if (FileDescriptor) { @@ -816,7 +760,7 @@ mapped_file_region::mapped_file_region(const Twine &path, mapmode mode, uint64_t length, uint64_t offset, - error_code &ec) + error_code &ec) : Mode(mode) , Size(length) , Mapping() @@ -1018,7 +962,7 @@ error_code detail::directory_iterator_increment(detail::DirIterState &it) { return error_code::success(); } -error_code map_file_pages(const Twine &path, off_t file_offset, size_t size, +error_code map_file_pages(const Twine &path, off_t file_offset, size_t size, bool map_writable, void *&result) { assert(0 && "NOT IMPLEMENTED"); return windows_error::invalid_function; @@ -1078,7 +1022,7 @@ error_code openFileForWrite(const Twine &Name, int &ResultFD, DWORD CreationDisposition; if (Flags & F_Excl) CreationDisposition = CREATE_NEW; - else if (Flags & F_Append) + else if (Flags & F_Append) CreationDisposition = OPEN_ALWAYS; else CreationDisposition = CREATE_ALWAYS; @@ -1115,7 +1059,64 @@ error_code openFileForWrite(const Twine &Name, int &ResultFD, ResultFD = FD; return error_code::success(); } - } // end namespace fs + +namespace windows { +llvm::error_code UTF8ToUTF16(llvm::StringRef utf8, + llvm::SmallVectorImpl<wchar_t> &utf16) { + int len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, + utf8.begin(), utf8.size(), + utf16.begin(), 0); + + if (len == 0) + return llvm::windows_error(::GetLastError()); + + utf16.reserve(len + 1); + utf16.set_size(len); + + len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, + utf8.begin(), utf8.size(), + utf16.begin(), utf16.size()); + + if (len == 0) + return llvm::windows_error(::GetLastError()); + + // Make utf16 null terminated. + utf16.push_back(0); + utf16.pop_back(); + + return llvm::error_code::success(); +} + +llvm::error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len, + llvm::SmallVectorImpl<char> &utf8) { + // Get length. + int len = ::WideCharToMultiByte(CP_UTF8, 0, + utf16, utf16_len, + utf8.begin(), 0, + NULL, NULL); + + if (len == 0) + return llvm::windows_error(::GetLastError()); + + utf8.reserve(len); + utf8.set_size(len); + + // Now do the actual conversion. + len = ::WideCharToMultiByte(CP_UTF8, 0, + utf16, utf16_len, + utf8.data(), utf8.size(), + NULL, NULL); + + if (len == 0) + return llvm::windows_error(::GetLastError()); + + // Make utf8 null terminated. + utf8.push_back(0); + utf8.pop_back(); + + return llvm::error_code::success(); +} +} // end namespace windows } // end namespace sys } // end namespace llvm diff --git a/lib/Support/Windows/Process.inc b/lib/Support/Windows/Process.inc index 359b99f..f9a3db9 100644 --- a/lib/Support/Windows/Process.inc +++ b/lib/Support/Windows/Process.inc @@ -11,18 +11,25 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Support/Allocator.h" + #include "Windows.h" #include <direct.h> #include <io.h> #include <malloc.h> #include <psapi.h> +#include <shellapi.h> #ifdef __MINGW32__ #if (HAVE_LIBPSAPI != 1) #error "libpsapi.a should be present" #endif + #if (HAVE_LIBSHELL32 != 1) + #error "libshell32.a should be present" + #endif #else #pragma comment(lib, "psapi.lib") + #pragma comment(lib, "shell32.lib") #endif //===----------------------------------------------------------------------===// @@ -83,6 +90,8 @@ static unsigned getPageSize() { // that LLVM ought to run as 64-bits on a 64-bit system, anyway. SYSTEM_INFO info; GetSystemInfo(&info); + // FIXME: FileOffset in MapViewOfFile() should be aligned to not dwPageSize, + // but dwAllocationGranularity. return static_cast<unsigned>(info.dwPageSize); } @@ -119,28 +128,89 @@ void Process::GetTimeUsage(TimeValue &elapsed, TimeValue &user_time, sys_time = getTimeValueFromFILETIME(KernelTime); } -int Process::GetCurrentUserId() -{ - return 65536; -} - -int Process::GetCurrentGroupId() -{ - return 65536; -} - // Some LLVM programs such as bugpoint produce core files as a normal part of -// their operation. To prevent the disk from filling up, this configuration item -// does what's necessary to prevent their generation. +// their operation. To prevent the disk from filling up, this configuration +// item does what's necessary to prevent their generation. void Process::PreventCoreFiles() { - // Windows doesn't do core files, but it does do modal pop-up message - // boxes. As this method is used by bugpoint, preventing these pop-ups - // is the moral equivalent of suppressing core files. + // Windows does have the concept of core files, called minidumps. However, + // disabling minidumps for a particular application extends past the lifetime + // of that application, which is the incorrect behavior for this API. + // Additionally, the APIs require elevated privileges to disable and re- + // enable minidumps, which makes this untenable. For more information, see + // WerAddExcludedApplication and WerRemoveExcludedApplication (Vista and + // later). + // + // Windows also has modal pop-up message boxes. As this method is used by + // bugpoint, preventing these pop-ups is additionally important. SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOGPFAULTERRORBOX | SEM_NOOPENFILEERRORBOX); } +/// Returns the environment variable \arg Name's value as a string encoded in +/// UTF-8. \arg Name is assumed to be in UTF-8 encoding. +Optional<std::string> Process::GetEnv(StringRef Name) { + // Convert the argument to UTF-16 to pass it to _wgetenv(). + SmallVector<wchar_t, 128> NameUTF16; + if (error_code ec = windows::UTF8ToUTF16(Name, NameUTF16)) + return None; + + // Environment variable can be encoded in non-UTF8 encoding, and there's no + // way to know what the encoding is. The only reliable way to look up + // multibyte environment variable is to use GetEnvironmentVariableW(). + SmallVector<wchar_t, MAX_PATH> Buf; + size_t Size = MAX_PATH; + do { + Buf.reserve(Size); + Size = + GetEnvironmentVariableW(NameUTF16.data(), Buf.data(), Buf.capacity()); + if (Size == 0) + return None; + + // Try again with larger buffer. + } while (Size > Buf.capacity()); + Buf.set_size(Size); + + // Convert the result from UTF-16 to UTF-8. + SmallVector<char, MAX_PATH> Res; + if (error_code ec = windows::UTF16ToUTF8(Buf.data(), Size, Res)) + return None; + return std::string(Res.data()); +} + +error_code +Process::GetArgumentVector(SmallVectorImpl<const char *> &Args, + ArrayRef<const char *>, + SpecificBumpPtrAllocator<char> &ArgAllocator) { + int NewArgCount; + error_code ec; + + wchar_t **UnicodeCommandLine = CommandLineToArgvW(GetCommandLineW(), + &NewArgCount); + if (!UnicodeCommandLine) + return windows_error(::GetLastError()); + + Args.reserve(NewArgCount); + + for (int i = 0; i < NewArgCount; ++i) { + SmallVector<char, MAX_PATH> NewArgString; + ec = windows::UTF16ToUTF8(UnicodeCommandLine[i], + wcslen(UnicodeCommandLine[i]), + NewArgString); + if (ec) + break; + + char *Buffer = ArgAllocator.Allocate(NewArgString.size() + 1); + ::memcpy(Buffer, NewArgString.data(), NewArgString.size() + 1); + Args.push_back(Buffer); + } + LocalFree(UnicodeCommandLine); + if (ec) + return ec; + + return error_code::success(); +} + bool Process::StandardInIsUserInput() { return FileDescriptorIsDisplayed(0); } @@ -187,6 +257,11 @@ bool Process::StandardErrHasColors() { return FileDescriptorHasColors(2); } +static bool UseANSI = false; +void Process::UseANSIEscapeCodes(bool enable) { + UseANSI = enable; +} + namespace { class DefaultColors { @@ -208,10 +283,12 @@ DefaultColors defaultColors; } bool Process::ColorNeedsFlush() { - return true; + return !UseANSI; } const char *Process::OutputBold(bool bg) { + if (UseANSI) return "\033[1m"; + WORD colors = DefaultColors::GetCurrentColor(); if (bg) colors |= BACKGROUND_INTENSITY; @@ -222,6 +299,8 @@ const char *Process::OutputBold(bool bg) { } const char *Process::OutputColor(char code, bool bold, bool bg) { + if (UseANSI) return colorcodes[bg?1:0][bold?1:0][code&7]; + WORD colors; if (bg) { colors = ((code&1) ? BACKGROUND_RED : 0) | @@ -247,6 +326,8 @@ static WORD GetConsoleTextAttribute(HANDLE hConsoleOutput) { } const char *Process::OutputReverse() { + if (UseANSI) return "\033[7m"; + const WORD attributes = GetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE)); @@ -273,6 +354,7 @@ const char *Process::OutputReverse() { } const char *Process::ResetColor() { + if (UseANSI) return "\033[0m"; SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), defaultColors()); return 0; } diff --git a/lib/Support/Windows/Program.inc b/lib/Support/Windows/Program.inc index 8165ef4..dc09738 100644 --- a/lib/Support/Windows/Program.inc +++ b/lib/Support/Windows/Program.inc @@ -24,16 +24,11 @@ //=== and must not be UNIX code //===----------------------------------------------------------------------===// -namespace { - struct Win32ProcessInfo { - HANDLE hProcess; - DWORD dwProcessId; - }; -} - namespace llvm { using namespace sys; +ProcessInfo::ProcessInfo() : ProcessHandle(0), Pid(0), ReturnCode(0) {} + // This function just uses the PATH environment variable to find the program. std::string sys::FindProgramByName(const std::string &progName) { // Check some degenerate cases @@ -47,42 +42,39 @@ std::string sys::FindProgramByName(const std::string &progName) { // At this point, the file name is valid and does not contain slashes. // Let Windows search for it. - std::string buffer; - buffer.resize(MAX_PATH); - char *dummy = NULL; - DWORD len = SearchPath(NULL, progName.c_str(), ".exe", MAX_PATH, - &buffer[0], &dummy); - - // See if it wasn't found. - if (len == 0) + SmallVector<wchar_t, MAX_PATH> progNameUnicode; + if (windows::UTF8ToUTF16(progName, progNameUnicode)) return ""; - // See if we got the entire path. - if (len < MAX_PATH) - return buffer; + SmallVector<wchar_t, MAX_PATH> buffer; + DWORD len = MAX_PATH; + do { + buffer.reserve(len); + len = ::SearchPathW(NULL, progNameUnicode.data(), L".exe", + buffer.capacity(), buffer.data(), NULL); - // Buffer was too small; grow and retry. - while (true) { - buffer.resize(len+1); - DWORD len2 = SearchPath(NULL, progName.c_str(), ".exe", len+1, &buffer[0], &dummy); - - // It is unlikely the search failed, but it's always possible some file - // was added or removed since the last search, so be paranoid... - if (len2 == 0) + // See if it wasn't found. + if (len == 0) return ""; - else if (len2 <= len) - return buffer; - len = len2; - } + // Buffer was too small; grow and retry. + } while (len > buffer.capacity()); + + buffer.set_size(len); + SmallVector<char, MAX_PATH> result; + if (windows::UTF16ToUTF8(buffer.begin(), buffer.size(), result)) + return ""; + + return std::string(result.data(), result.size()); } static HANDLE RedirectIO(const StringRef *path, int fd, std::string* ErrMsg) { HANDLE h; if (path == 0) { - DuplicateHandle(GetCurrentProcess(), (HANDLE)_get_osfhandle(fd), - GetCurrentProcess(), &h, - 0, TRUE, DUPLICATE_SAME_ACCESS); + if (!DuplicateHandle(GetCurrentProcess(), (HANDLE)_get_osfhandle(fd), + GetCurrentProcess(), &h, + 0, TRUE, DUPLICATE_SAME_ACCESS)) + return INVALID_HANDLE_VALUE; return h; } @@ -97,9 +89,13 @@ static HANDLE RedirectIO(const StringRef *path, int fd, std::string* ErrMsg) { sa.lpSecurityDescriptor = 0; sa.bInheritHandle = TRUE; - h = CreateFile(fname.c_str(), fd ? GENERIC_WRITE : GENERIC_READ, - FILE_SHARE_READ, &sa, fd == 0 ? OPEN_EXISTING : CREATE_ALWAYS, - FILE_ATTRIBUTE_NORMAL, NULL); + SmallVector<wchar_t, 128> fnameUnicode; + if (windows::UTF8ToUTF16(fname, fnameUnicode)) + return INVALID_HANDLE_VALUE; + + h = CreateFileW(fnameUnicode.data(), fd ? GENERIC_WRITE : GENERIC_READ, + FILE_SHARE_READ, &sa, fd == 0 ? OPEN_EXISTING : CREATE_ALWAYS, + FILE_ATTRIBUTE_NORMAL, NULL); if (h == INVALID_HANDLE_VALUE) { MakeErrMsg(ErrMsg, std::string(fname) + ": Can't open file for " + (fd ? "input: " : "output: ")); @@ -171,13 +167,9 @@ static unsigned int ArgLenWithQuotes(const char *Str) { } -static bool Execute(void **Data, - StringRef Program, - const char** args, - const char** envp, - const StringRef** redirects, - unsigned memoryLimit, - std::string* ErrMsg) { +static bool Execute(ProcessInfo &PI, StringRef Program, const char **args, + const char **envp, const StringRef **redirects, + unsigned memoryLimit, std::string *ErrMsg) { if (!sys::fs::can_execute(Program)) { if (ErrMsg) *ErrMsg = "program not executable"; @@ -227,34 +219,28 @@ static bool Execute(void **Data, *p = 0; // The pointer to the environment block for the new process. - OwningArrayPtr<char> envblock; + std::vector<wchar_t> EnvBlock; if (envp) { // An environment block consists of a null-terminated block of // null-terminated strings. Convert the array of environment variables to // an environment block by concatenating them. + for (unsigned i = 0; envp[i]; ++i) { + SmallVector<wchar_t, MAX_PATH> EnvString; + if (error_code ec = windows::UTF8ToUTF16(envp[i], EnvString)) { + SetLastError(ec.value()); + MakeErrMsg(ErrMsg, "Unable to convert environment variable to UTF-16"); + return false; + } - // First, determine the length of the environment block. - len = 0; - for (unsigned i = 0; envp[i]; i++) - len += strlen(envp[i]) + 1; - - // Now build the environment block. - envblock.reset(new char[len+1]); - p = envblock.get(); - - for (unsigned i = 0; envp[i]; i++) { - const char *ev = envp[i]; - size_t len = strlen(ev) + 1; - memcpy(p, ev, len); - p += len; + EnvBlock.insert(EnvBlock.end(), EnvString.begin(), EnvString.end()); + EnvBlock.push_back(0); } - - *p = 0; + EnvBlock.push_back(0); } // Create a child process. - STARTUPINFO si; + STARTUPINFOW si; memset(&si, 0, sizeof(si)); si.cb = sizeof(si); si.hStdInput = INVALID_HANDLE_VALUE; @@ -278,9 +264,14 @@ static bool Execute(void **Data, if (redirects[1] && redirects[2] && *(redirects[1]) == *(redirects[2])) { // If stdout and stderr should go to the same place, redirect stderr // to the handle already open for stdout. - DuplicateHandle(GetCurrentProcess(), si.hStdOutput, - GetCurrentProcess(), &si.hStdError, - 0, TRUE, DUPLICATE_SAME_ACCESS); + if (!DuplicateHandle(GetCurrentProcess(), si.hStdOutput, + GetCurrentProcess(), &si.hStdError, + 0, TRUE, DUPLICATE_SAME_ACCESS)) { + CloseHandle(si.hStdInput); + CloseHandle(si.hStdOutput); + MakeErrMsg(ErrMsg, "can't dup stderr to stdout"); + return false; + } } else { // Just redirect stderr si.hStdError = RedirectIO(redirects[2], 2, ErrMsg); @@ -298,9 +289,27 @@ static bool Execute(void **Data, fflush(stdout); fflush(stderr); - std::string ProgramStr = Program; - BOOL rc = CreateProcess(ProgramStr.c_str(), command.get(), NULL, NULL, TRUE, - 0, envblock.get(), NULL, &si, &pi); + + SmallVector<wchar_t, MAX_PATH> ProgramUtf16; + if (error_code ec = windows::UTF8ToUTF16(Program, ProgramUtf16)) { + SetLastError(ec.value()); + MakeErrMsg(ErrMsg, + std::string("Unable to convert application name to UTF-16")); + return false; + } + + SmallVector<wchar_t, MAX_PATH> CommandUtf16; + if (error_code ec = windows::UTF8ToUTF16(command.get(), CommandUtf16)) { + SetLastError(ec.value()); + MakeErrMsg(ErrMsg, + std::string("Unable to convert command-line to UTF-16")); + return false; + } + + BOOL rc = CreateProcessW(ProgramUtf16.data(), CommandUtf16.data(), 0, 0, + TRUE, CREATE_UNICODE_ENVIRONMENT, + EnvBlock.empty() ? 0 : EnvBlock.data(), 0, &si, + &pi); DWORD err = GetLastError(); // Regardless of whether the process got created or not, we are done with @@ -313,15 +322,12 @@ static bool Execute(void **Data, if (!rc) { SetLastError(err); MakeErrMsg(ErrMsg, std::string("Couldn't execute program '") + - ProgramStr + "'"); + Program.str() + "'"); return false; } - if (Data) { - Win32ProcessInfo* wpi = new Win32ProcessInfo; - wpi->hProcess = pi.hProcess; - wpi->dwProcessId = pi.dwProcessId; - *Data = wpi; - } + + PI.Pid = pi.dwProcessId; + PI.ProcessHandle = pi.hProcess; // Make sure these get closed no matter what. ScopedCommonHandle hThread(pi.hThread); @@ -329,7 +335,7 @@ static bool Execute(void **Data, // Assign the process to a job if a memory limit is defined. ScopedJobHandle hJob; if (memoryLimit != 0) { - hJob = CreateJobObject(0, 0); + hJob = CreateJobObjectW(0, 0); bool success = false; if (hJob) { JOBOBJECT_EXTENDED_LIMIT_INFORMATION jeli; @@ -351,68 +357,72 @@ static bool Execute(void **Data, } } - // Don't leak the handle if the caller doesn't want it. - if (!Data) - CloseHandle(pi.hProcess); - return true; } -static int WaitAux(Win32ProcessInfo *wpi, unsigned secondsToWait, - std::string *ErrMsg) { - // Wait for the process to terminate. - HANDLE hProcess = wpi->hProcess; - DWORD millisecondsToWait = INFINITE; - if (secondsToWait > 0) - millisecondsToWait = secondsToWait * 1000; - - if (WaitForSingleObject(hProcess, millisecondsToWait) == WAIT_TIMEOUT) { - if (!TerminateProcess(hProcess, 1)) { - MakeErrMsg(ErrMsg, "Failed to terminate timed-out program."); - // -2 indicates a crash or timeout as opposed to failure to execute. - return -2; +namespace llvm { +ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait, + bool WaitUntilChildTerminates, std::string *ErrMsg) { + assert(PI.Pid && "invalid pid to wait on, process not started?"); + assert(PI.ProcessHandle && + "invalid process handle to wait on, process not started?"); + DWORD milliSecondsToWait = 0; + if (WaitUntilChildTerminates) + milliSecondsToWait = INFINITE; + else if (SecondsToWait > 0) + milliSecondsToWait = SecondsToWait * 1000; + + ProcessInfo WaitResult = PI; + DWORD WaitStatus = WaitForSingleObject(PI.ProcessHandle, milliSecondsToWait); + if (WaitStatus == WAIT_TIMEOUT) { + if (SecondsToWait) { + if (!TerminateProcess(PI.ProcessHandle, 1)) { + if (ErrMsg) + MakeErrMsg(ErrMsg, "Failed to terminate timed-out program."); + + // -2 indicates a crash or timeout as opposed to failure to execute. + WaitResult.ReturnCode = -2; + CloseHandle(PI.ProcessHandle); + return WaitResult; + } + WaitForSingleObject(PI.ProcessHandle, INFINITE); + CloseHandle(PI.ProcessHandle); + } else { + // Non-blocking wait. + return ProcessInfo(); } - WaitForSingleObject(hProcess, INFINITE); } // Get its exit status. DWORD status; - BOOL rc = GetExitCodeProcess(hProcess, &status); + BOOL rc = GetExitCodeProcess(PI.ProcessHandle, &status); DWORD err = GetLastError(); + CloseHandle(PI.ProcessHandle); if (!rc) { SetLastError(err); - MakeErrMsg(ErrMsg, "Failed getting status for program."); + if (ErrMsg) + MakeErrMsg(ErrMsg, "Failed getting status for program."); + // -2 indicates a crash or timeout as opposed to failure to execute. - return -2; + WaitResult.ReturnCode = -2; + return WaitResult; } if (!status) - return 0; + return WaitResult; // Pass 10(Warning) and 11(Error) to the callee as negative value. if ((status & 0xBFFF0000U) == 0x80000000U) - return (int)status; - - if (status & 0xFF) - return status & 0x7FFFFFFF; - - return 1; -} - -static int Wait(void *&Data, StringRef Program, unsigned secondsToWait, - std::string *ErrMsg) { - Win32ProcessInfo *wpi = reinterpret_cast<Win32ProcessInfo *>(Data); - int Ret = WaitAux(wpi, secondsToWait, ErrMsg); - - CloseHandle(wpi->hProcess); - delete wpi; - Data = 0; + WaitResult.ReturnCode = static_cast<int>(status); + else if (status & 0xFF) + WaitResult.ReturnCode = status & 0x7FFFFFFF; + else + WaitResult.ReturnCode = 1; - return Ret; + return WaitResult; } -namespace llvm { error_code sys::ChangeStdinToBinary(){ int result = _setmode( _fileno(stdin), _O_BINARY ); if (result == -1) @@ -449,5 +459,4 @@ bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) { } return true; } - } diff --git a/lib/Support/Windows/RWMutex.inc b/lib/Support/Windows/RWMutex.inc index 9593923..c431844 100644 --- a/lib/Support/Windows/RWMutex.inc +++ b/lib/Support/Windows/RWMutex.inc @@ -48,8 +48,7 @@ static bool loadSRW() { if (!sChecked) { sChecked = true; - HMODULE hLib = ::LoadLibrary(TEXT("Kernel32")); - if (hLib) { + if (HMODULE hLib = ::GetModuleHandleW(L"Kernel32.dll")) { fpInitializeSRWLock = (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib, "InitializeSRWLock"); @@ -65,7 +64,6 @@ static bool loadSRW() { fpReleaseSRWLockShared = (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib, "ReleaseSRWLockShared"); - ::FreeLibrary(hLib); if (fpInitializeSRWLock != NULL) { sHasSRW = true; diff --git a/lib/Support/Windows/Signals.inc b/lib/Support/Windows/Signals.inc index bce83b9..4b40d51 100644 --- a/lib/Support/Windows/Signals.inc +++ b/lib/Support/Windows/Signals.inc @@ -135,7 +135,7 @@ typedef PVOID (WINAPI *fpSymFunctionTableAccess64)(HANDLE, DWORD64); static fpSymFunctionTableAccess64 SymFunctionTableAccess64; static bool load64BitDebugHelp(void) { - HMODULE hLib = ::LoadLibrary("Dbghelp.dll"); + HMODULE hLib = ::LoadLibraryW(L"Dbghelp.dll"); if (hLib) { StackWalk64 = (fpStackWalk64) ::GetProcAddress(hLib, "StackWalk64"); diff --git a/lib/Support/Windows/TimeValue.inc b/lib/Support/Windows/TimeValue.inc index 96f5579..98b07d6 100644 --- a/lib/Support/Windows/TimeValue.inc +++ b/lib/Support/Windows/TimeValue.inc @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #include "Windows.h" +#include <cctype> #include <time.h> -namespace llvm { -using namespace sys; +using namespace llvm; +using namespace llvm::sys; //===----------------------------------------------------------------------===// //=== WARNING: Implementation here must contain only Win32 specific code. @@ -49,13 +50,10 @@ std::string TimeValue::str() const { char Buffer[25]; // FIXME: the windows version of strftime doesn't support %e strftime(Buffer, 25, "%b %d %H:%M %Y", LT); - assert((Buffer[3] == ' ' && isdigit(Buffer[5]) && Buffer[6] == ' ') || + assert((Buffer[3] == ' ' && isdigit(Buffer[5]) && Buffer[6] == ' ') && "Unexpected format in strftime()!"); // Emulate %e on %d to mute '0'. if (Buffer[4] == '0') Buffer[4] = ' '; return std::string(Buffer); } - - -} diff --git a/lib/Support/Windows/Windows.h b/lib/Support/Windows/Windows.h index 4cdac78..1f3417d 100644 --- a/lib/Support/Windows/Windows.h +++ b/lib/Support/Windows/Windows.h @@ -24,23 +24,31 @@ #define _WIN32_IE 0x0600 // MinGW at it again. #define WIN32_LEAN_AND_MEAN +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Config/config.h" // Get build system configuration settings #include "llvm/Support/Compiler.h" +#include "llvm/Support/system_error.h" #include <windows.h> #include <wincrypt.h> -#include <shlobj.h> #include <cassert> #include <string> +#include <vector> inline bool MakeErrMsg(std::string* ErrMsg, const std::string& prefix) { if (!ErrMsg) return true; char *buffer = NULL; - FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER|FORMAT_MESSAGE_FROM_SYSTEM, - NULL, GetLastError(), 0, (LPSTR)&buffer, 1, NULL); - *ErrMsg = prefix + buffer; + DWORD R = FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM, + NULL, GetLastError(), 0, (LPSTR)&buffer, 1, NULL); + if (R) + *ErrMsg = prefix + buffer; + else + *ErrMsg = prefix + "Unknown error"; + LocalFree(buffer); - return true; + return R != 0; } template <typename HandleTraits> @@ -148,4 +156,13 @@ c_str(SmallVectorImpl<T> &str) { str.pop_back(); return str.data(); } + +namespace sys { +namespace windows { +error_code UTF8ToUTF16(StringRef utf8, + SmallVectorImpl<wchar_t> &utf16); +error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len, + SmallVectorImpl<char> &utf8); +} // end namespace windows +} // end namespace sys } // end namespace llvm. diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp index 213f5e1..9495cd4 100644 --- a/lib/Support/YAMLParser.cpp +++ b/lib/Support/YAMLParser.cpp @@ -96,6 +96,15 @@ static EncodingInfo getUnicodeEncoding(StringRef Input) { namespace llvm { namespace yaml { +/// Pin the vtables to this file. +void Node::anchor() {} +void NullNode::anchor() {} +void ScalarNode::anchor() {} +void KeyValueNode::anchor() {} +void MappingNode::anchor() {} +void SequenceNode::anchor() {} +void AliasNode::anchor() {} + /// Token - A single YAML token. struct Token : ilist_node<Token> { enum TokenKind { @@ -1070,14 +1079,22 @@ bool Scanner::scanDirective() { Current = skip_while(&Scanner::skip_ns_char, Current); StringRef Name(NameStart, Current - NameStart); Current = skip_while(&Scanner::skip_s_white, Current); - + + Token T; if (Name == "YAML") { Current = skip_while(&Scanner::skip_ns_char, Current); - Token T; T.Kind = Token::TK_VersionDirective; T.Range = StringRef(Start, Current - Start); TokenQueue.push_back(T); return true; + } else if(Name == "TAG") { + Current = skip_while(&Scanner::skip_ns_char, Current); + Current = skip_while(&Scanner::skip_s_white, Current); + Current = skip_while(&Scanner::skip_ns_char, Current); + T.Kind = Token::TK_TagDirective; + T.Range = StringRef(Start, Current - Start); + TokenQueue.push_back(T); + return true; } return false; } @@ -1564,10 +1581,6 @@ void Stream::printError(Node *N, const Twine &Msg) { , Ranges); } -void Stream::handleYAMLDirective(const Token &t) { - // TODO: Ensure version is 1.x. -} - document_iterator Stream::begin() { if (CurrentDoc) report_fatal_error("Can only iterate over the stream once"); @@ -1588,14 +1601,59 @@ void Stream::skip() { i->skip(); } -Node::Node(unsigned int Type, OwningPtr<Document> &D, StringRef A) +Node::Node(unsigned int Type, OwningPtr<Document> &D, StringRef A, StringRef T) : Doc(D) , TypeID(Type) - , Anchor(A) { + , Anchor(A) + , Tag(T) { SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin()); SourceRange = SMRange(Start, Start); } +std::string Node::getVerbatimTag() const { + StringRef Raw = getRawTag(); + if (!Raw.empty() && Raw != "!") { + std::string Ret; + if (Raw.find_last_of('!') == 0) { + Ret = Doc->getTagMap().find("!")->second; + Ret += Raw.substr(1); + return llvm_move(Ret); + } else if (Raw.startswith("!!")) { + Ret = Doc->getTagMap().find("!!")->second; + Ret += Raw.substr(2); + return llvm_move(Ret); + } else { + StringRef TagHandle = Raw.substr(0, Raw.find_last_of('!') + 1); + std::map<StringRef, StringRef>::const_iterator It = + Doc->getTagMap().find(TagHandle); + if (It != Doc->getTagMap().end()) + Ret = It->second; + else { + Token T; + T.Kind = Token::TK_Tag; + T.Range = TagHandle; + setError(Twine("Unknown tag handle ") + TagHandle, T); + } + Ret += Raw.substr(Raw.find_last_of('!') + 1); + return llvm_move(Ret); + } + } + + switch (getType()) { + case NK_Null: + return "tag:yaml.org,2002:null"; + case NK_Scalar: + // TODO: Tag resolution. + return "tag:yaml.org,2002:str"; + case NK_Mapping: + return "tag:yaml.org,2002:map"; + case NK_Sequence: + return "tag:yaml.org,2002:seq"; + } + + return ""; +} + Token &Node::peekNext() { return Doc->peekNext(); } @@ -1999,6 +2057,10 @@ void SequenceNode::increment() { } Document::Document(Stream &S) : stream(S), Root(0) { + // Tag maps starts with two default mappings. + TagMap["!"] = "!"; + TagMap["!!"] = "tag:yaml.org,2002:"; + if (parseDirectives()) expectToken(Token::TK_DocumentStart); Token &T = peekNext(); @@ -2042,6 +2104,7 @@ Node *Document::parseBlockNode() { Token T = peekNext(); // Handle properties. Token AnchorInfo; + Token TagInfo; parse_property: switch (T.Kind) { case Token::TK_Alias: @@ -2056,7 +2119,11 @@ parse_property: T = peekNext(); goto parse_property; case Token::TK_Tag: - getNext(); // Skip TK_Tag. + if (TagInfo.Kind == Token::TK_Tag) { + setError("Already encountered a tag for this node!", T); + return 0; + } + TagInfo = getNext(); // Consume TK_Tag. T = peekNext(); goto parse_property; default: @@ -2070,42 +2137,49 @@ parse_property: // Don't eat the TK_BlockEntry, SequenceNode needs it. return new (NodeAllocator) SequenceNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , SequenceNode::ST_Indentless); case Token::TK_BlockSequenceStart: getNext(); return new (NodeAllocator) SequenceNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , SequenceNode::ST_Block); case Token::TK_BlockMappingStart: getNext(); return new (NodeAllocator) MappingNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , MappingNode::MT_Block); case Token::TK_FlowSequenceStart: getNext(); return new (NodeAllocator) SequenceNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , SequenceNode::ST_Flow); case Token::TK_FlowMappingStart: getNext(); return new (NodeAllocator) MappingNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , MappingNode::MT_Flow); case Token::TK_Scalar: getNext(); return new (NodeAllocator) ScalarNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , T.Range); case Token::TK_Key: // Don't eat the TK_Key, KeyValueNode expects it. return new (NodeAllocator) MappingNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , MappingNode::MT_Inline); case Token::TK_DocumentStart: case Token::TK_DocumentEnd: @@ -2126,10 +2200,10 @@ bool Document::parseDirectives() { while (true) { Token T = peekNext(); if (T.Kind == Token::TK_TagDirective) { - handleTagDirective(getNext()); + parseTAGDirective(); isDirective = true; } else if (T.Kind == Token::TK_VersionDirective) { - stream.handleYAMLDirective(getNext()); + parseYAMLDirective(); isDirective = true; } else break; @@ -2137,6 +2211,21 @@ bool Document::parseDirectives() { return isDirective; } +void Document::parseYAMLDirective() { + getNext(); // Eat %YAML <version> +} + +void Document::parseTAGDirective() { + Token Tag = getNext(); // %TAG <handle> <prefix> + StringRef T = Tag.Range; + // Strip %TAG + T = T.substr(T.find_first_of(" \t")).ltrim(" \t"); + std::size_t HandleEnd = T.find_first_of(" \t"); + StringRef TagHandle = T.substr(0, HandleEnd); + StringRef TagPrefix = T.substr(HandleEnd).ltrim(" \t"); + TagMap[TagHandle] = TagPrefix; +} + bool Document::expectToken(int TK) { Token T = getNext(); if (T.Kind != TK) { diff --git a/lib/Support/YAMLTraits.cpp b/lib/Support/YAMLTraits.cpp index b0cd415..42bff96 100644 --- a/lib/Support/YAMLTraits.cpp +++ b/lib/Support/YAMLTraits.cpp @@ -15,6 +15,7 @@ #include "llvm/Support/YAMLParser.h" #include "llvm/Support/raw_ostream.h" #include <cstring> +#include <cctype> using namespace llvm; using namespace yaml; @@ -40,32 +41,43 @@ void IO::setContext(void *Context) { // Input //===----------------------------------------------------------------------===// -Input::Input(StringRef InputContent, void *Ctxt) - : IO(Ctxt), +Input::Input(StringRef InputContent, + void *Ctxt, + SourceMgr::DiagHandlerTy DiagHandler, + void *DiagHandlerCtxt) + : IO(Ctxt), Strm(new Stream(InputContent, SrcMgr)), CurrentNode(NULL) { + if (DiagHandler) + SrcMgr.setDiagHandler(DiagHandler, DiagHandlerCtxt); DocIterator = Strm->begin(); } Input::~Input() { - } error_code Input::error() { return EC; } -void Input::setDiagHandler(SourceMgr::DiagHandlerTy Handler, void *Ctxt) { - SrcMgr.setDiagHandler(Handler, Ctxt); -} +// Pin the vtables to this file. +void Input::HNode::anchor() {} +void Input::EmptyHNode::anchor() {} +void Input::ScalarHNode::anchor() {} -bool Input::outputting() { +bool Input::outputting() const { return false; } bool Input::setCurrentDocument() { if (DocIterator != Strm->end()) { Node *N = DocIterator->getRoot(); + if (!N) { + assert(Strm->failed() && "Root is NULL iff parsing failed"); + EC = make_error_code(errc::invalid_argument); + return false; + } + if (isa<NullNode>(N)) { // Empty files are allowed and ignored ++DocIterator; @@ -82,10 +94,21 @@ void Input::nextDocument() { ++DocIterator; } +bool Input::mapTag(StringRef Tag, bool Default) { + std::string foundTag = CurrentNode->_node->getVerbatimTag(); + if (foundTag.empty()) { + // If no tag found and 'Tag' is the default, say it was found. + return Default; + } + // Return true iff found tag matches supplied tag. + return Tag.equals(foundTag); +} + void Input::beginMapping() { if (EC) return; - MapHNode *MN = dyn_cast<MapHNode>(CurrentNode); + // CurrentNode can be null if the document is empty. + MapHNode *MN = dyn_cast_or_null<MapHNode>(CurrentNode); if (MN) { MN->ValidKeys.clear(); } @@ -96,6 +119,15 @@ bool Input::preflightKey(const char *Key, bool Required, bool, bool &UseDefault, UseDefault = false; if (EC) return false; + + // CurrentNode is null for empty documents, which is an error in case required + // nodes are present. + if (!CurrentNode) { + if (Required) + EC = make_error_code(errc::invalid_argument); + return false; + } + MapHNode *MN = dyn_cast<MapHNode>(CurrentNode); if (!MN) { setError(CurrentNode, "not a mapping"); @@ -122,7 +154,8 @@ void Input::postflightKey(void *saveInfo) { void Input::endMapping() { if (EC) return; - MapHNode *MN = dyn_cast<MapHNode>(CurrentNode); + // CurrentNode can be null if the document is empty. + MapHNode *MN = dyn_cast_or_null<MapHNode>(CurrentNode); if (!MN) return; for (MapHNode::NameToNode::iterator i = MN->Mapping.begin(), @@ -263,6 +296,7 @@ void Input::scalarString(StringRef &S) { } void Input::setError(HNode *hnode, const Twine &message) { + assert(hnode && "HNode must not be NULL"); this->setError(hnode->_node, message); } @@ -334,6 +368,10 @@ void Input::setError(const Twine &Message) { this->setError(CurrentNode, Message); } +bool Input::canElideEmptySequence() { + return false; +} + Input::MapHNode::~MapHNode() { for (MapHNode::NameToNode::iterator i = Mapping.begin(), End = Mapping.end(); i != End; ++i) { @@ -368,7 +406,7 @@ Output::Output(raw_ostream &yout, void *context) Output::~Output() { } -bool Output::outputting() { +bool Output::outputting() const { return true; } @@ -377,6 +415,14 @@ void Output::beginMapping() { NeedsNewLine = true; } +bool Output::mapTag(StringRef Tag, bool Use) { + if (Use) { + this->output(" "); + this->output(Tag); + } + return Use; +} + void Output::endMapping() { StateStack.pop_back(); } @@ -505,9 +551,20 @@ void Output::endBitSetScalar() { } void Output::scalarString(StringRef &S) { + const char ScalarSafeChars[] = "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-/^., \t"; + this->newLineCheck(); - if (S.find('\n') == StringRef::npos) { - // No embedded new-line chars, just print string. + if (S.empty()) { + // Print '' for the empty string because leaving the field empty is not + // allowed. + this->outputUpToEndOfLine("''"); + return; + } + if (S.find_first_not_of(ScalarSafeChars) == StringRef::npos && + !isspace(S.front()) && !isspace(S.back())) { + // If the string consists only of safe characters, print it out without + // quotes. this->outputUpToEndOfLine(S); return; } @@ -532,6 +589,19 @@ void Output::scalarString(StringRef &S) { void Output::setError(const Twine &message) { } +bool Output::canElideEmptySequence() { + // Normally, with an optional key/value where the value is an empty sequence, + // the whole key/value can be not written. But, that produces wrong yaml + // if the key/value is the only thing in the map and the map is used in + // a sequence. This detects if the this sequence is the first key/value + // in map that itself is embedded in a sequnce. + if (StateStack.size() < 2) + return true; + if (StateStack.back() != inMapFirstKey) + return true; + return (StateStack[StateStack.size()-2] != inSeq); +} + void Output::output(StringRef s) { Column += s.size(); Out << s; diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index 92fa8b5..cb96489 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -447,7 +447,8 @@ raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo, error_code EC = sys::fs::openFileForWrite(Filename, FD, Flags); if (EC) { - ErrorInfo = "Error opening output file '" + std::string(Filename) + "'"; + ErrorInfo = "Error opening output file '" + std::string(Filename) + "': " + + EC.message(); ShouldClose = false; return; } |