diff options
author | Stephen Hines <srhines@google.com> | 2013-03-05 23:27:24 -0800 |
---|---|---|
committer | Stephen Hines <srhines@google.com> | 2013-03-05 23:27:24 -0800 |
commit | 5adb136be579e8fff3734461580cb34d1d2983b8 (patch) | |
tree | bff1a422e9c9789df563aaf9a7e91e63e8ec0384 /lib/Support | |
parent | 227a4a4ade38716ba9eb3205f48b52910f3b955e (diff) | |
parent | b3201c5cf1e183d840f7c99ff779d57f1549d8e5 (diff) | |
download | external_llvm-5adb136be579e8fff3734461580cb34d1d2983b8.zip external_llvm-5adb136be579e8fff3734461580cb34d1d2983b8.tar.gz external_llvm-5adb136be579e8fff3734461580cb34d1d2983b8.tar.bz2 |
Merge commit 'b3201c5cf1e183d840f7c99ff779d57f1549d8e5' into merge_20130226
Conflicts:
include/llvm/Support/ELF.h
lib/Support/DeltaAlgorithm.cpp
Change-Id: I24a4fbce62eb39d924efee3c687b55e1e17b30cd
Diffstat (limited to 'lib/Support')
28 files changed, 1040 insertions, 117 deletions
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index 0e3c619..5b68fbb 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/APSInt.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -101,26 +102,6 @@ decDigitValue(unsigned int c) return c - '0'; } -static unsigned int -hexDigitValue(unsigned int c) -{ - unsigned int r; - - r = c - '0'; - if (r <= 9) - return r; - - r = c - 'A'; - if (r <= 5) - return r + 10; - - r = c - 'a'; - if (r <= 5) - return r + 10; - - return -1U; -} - /* Return the value of a decimal exponent of the form [+-]ddddddd. @@ -1932,6 +1913,12 @@ APFloat::convert(const fltSemantics &toSemantics, *losesInfo = (fs != opOK); } else if (category == fcNaN) { *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; + + // For x87 extended precision, we want to make a NaN, not a special NaN if + // the input wasn't special either. + if (!X86SpecialNan && semantics == &APFloat::x87DoubleExtended) + APInt::tcSetBit(significandParts(), semantics->precision - 1); + // gcc forces the Quiet bit on, which means (float)(double)(float_sNan) // does not give you back the same bits. This is dubious, and we // don't currently do it. You're really supposed to get @@ -3032,7 +3019,7 @@ APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) // Unless we have a special case, add in second double. if (category == fcNormal) { - APFloat v(APInt(64, i2)); + APFloat v(IEEEdouble, APInt(64, i2)); fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo); assert(fs == opOK && !losesInfo); (void)fs; @@ -3185,27 +3172,43 @@ APFloat::initFromHalfAPInt(const APInt & api) /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful /// when the size is anything else). void -APFloat::initFromAPInt(const APInt& api, bool isIEEE) +APFloat::initFromAPInt(const fltSemantics* Sem, const APInt& api) { - if (api.getBitWidth() == 16) + if (Sem == &IEEEhalf) return initFromHalfAPInt(api); - else if (api.getBitWidth() == 32) + if (Sem == &IEEEsingle) return initFromFloatAPInt(api); - else if (api.getBitWidth()==64) + if (Sem == &IEEEdouble) return initFromDoubleAPInt(api); - else if (api.getBitWidth()==80) + if (Sem == &x87DoubleExtended) return initFromF80LongDoubleAPInt(api); - else if (api.getBitWidth()==128) - return (isIEEE ? - initFromQuadrupleAPInt(api) : initFromPPCDoubleDoubleAPInt(api)); - else - llvm_unreachable(0); + if (Sem == &IEEEquad) + return initFromQuadrupleAPInt(api); + if (Sem == &PPCDoubleDouble) + return initFromPPCDoubleDoubleAPInt(api); + + llvm_unreachable(0); } APFloat APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) { - return APFloat(APInt::getAllOnesValue(BitWidth), isIEEE); + switch (BitWidth) { + case 16: + return APFloat(IEEEhalf, APInt::getAllOnesValue(BitWidth)); + case 32: + return APFloat(IEEEsingle, APInt::getAllOnesValue(BitWidth)); + case 64: + return APFloat(IEEEdouble, APInt::getAllOnesValue(BitWidth)); + case 80: + return APFloat(x87DoubleExtended, APInt::getAllOnesValue(BitWidth)); + case 128: + if (isIEEE) + return APFloat(IEEEquad, APInt::getAllOnesValue(BitWidth)); + return APFloat(PPCDoubleDouble, APInt::getAllOnesValue(BitWidth)); + default: + llvm_unreachable("Unknown floating bit width"); + } } APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) { @@ -3263,16 +3266,16 @@ APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) { return Val; } -APFloat::APFloat(const APInt& api, bool isIEEE) { - initFromAPInt(api, isIEEE); +APFloat::APFloat(const fltSemantics &Sem, const APInt &API) { + initFromAPInt(&Sem, API); } APFloat::APFloat(float f) { - initFromAPInt(APInt::floatToBits(f)); + initFromAPInt(&IEEEsingle, APInt::floatToBits(f)); } APFloat::APFloat(double d) { - initFromAPInt(APInt::doubleToBits(d)); + initFromAPInt(&IEEEdouble, APInt::doubleToBits(d)); } namespace { @@ -3448,7 +3451,7 @@ void APFloat::toString(SmallVectorImpl<char> &Str, AdjustToPrecision(significand, exp, FormatPrecision); - llvm::SmallVector<char, 256> buffer; + SmallVector<char, 256> buffer; // Fill the buffer. unsigned precision = significand.getBitWidth(); diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index 61e503b..07cb057 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -1876,6 +1876,17 @@ APInt APInt::udiv(const APInt& RHS) const { return Quotient; } +APInt APInt::sdiv(const APInt &RHS) const { + if (isNegative()) { + if (RHS.isNegative()) + return (-(*this)).udiv(-RHS); + return -((-(*this)).udiv(RHS)); + } + if (RHS.isNegative()) + return -(this->udiv(-RHS)); + return this->udiv(RHS); +} + APInt APInt::urem(const APInt& RHS) const { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); if (isSingleWord()) { @@ -1913,6 +1924,17 @@ APInt APInt::urem(const APInt& RHS) const { return Remainder; } +APInt APInt::srem(const APInt &RHS) const { + if (isNegative()) { + if (RHS.isNegative()) + return -((-(*this)).urem(-RHS)); + return -((-(*this)).urem(RHS)); + } + if (RHS.isNegative()) + return this->urem(-RHS); + return this->urem(RHS); +} + void APInt::udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder) { // Get some size facts about the dividend and divisor @@ -1953,6 +1975,24 @@ void APInt::udivrem(const APInt &LHS, const APInt &RHS, divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder); } +void APInt::sdivrem(const APInt &LHS, const APInt &RHS, + APInt &Quotient, APInt &Remainder) { + if (LHS.isNegative()) { + if (RHS.isNegative()) + APInt::udivrem(-LHS, -RHS, Quotient, Remainder); + else { + APInt::udivrem(-LHS, RHS, Quotient, Remainder); + Quotient = -Quotient; + } + Remainder = -Remainder; + } else if (RHS.isNegative()) { + APInt::udivrem(LHS, -RHS, Quotient, Remainder); + Quotient = -Quotient; + } else { + APInt::udivrem(LHS, RHS, Quotient, Remainder); + } +} + APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const { APInt Res = *this+RHS; Overflow = isNonNegative() == RHS.isNonNegative() && diff --git a/lib/Support/Allocator.cpp b/lib/Support/Allocator.cpp index 28f4e64..3c4191b 100644 --- a/lib/Support/Allocator.cpp +++ b/lib/Support/Allocator.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/Allocator.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/Memory.h" #include "llvm/Support/Recycler.h" @@ -82,6 +83,7 @@ void BumpPtrAllocator::Reset() { CurSlab->NextPtr = 0; CurPtr = (char*)(CurSlab + 1); End = ((char*)CurSlab) + CurSlab->Size; + BytesAllocated = 0; } /// Allocate - Allocate space at the specified alignment. @@ -102,6 +104,10 @@ void *BumpPtrAllocator::Allocate(size_t Size, size_t Alignment) { // Check if we can hold it. if (Ptr + Size <= End) { CurPtr = Ptr + Size; + // Update the allocation point of this memory block in MemorySanitizer. + // Without this, MemorySanitizer messages for values originated from here + // will point to the allocation of the entire slab. + __msan_allocated_memory(Ptr, Size); return Ptr; } @@ -117,6 +123,7 @@ void *BumpPtrAllocator::Allocate(size_t Size, size_t Alignment) { Ptr = AlignPtr((char*)(NewSlab + 1), Alignment); assert((uintptr_t)Ptr + Size <= (uintptr_t)NewSlab + NewSlab->Size); + __msan_allocated_memory(Ptr, Size); return Ptr; } @@ -125,6 +132,7 @@ void *BumpPtrAllocator::Allocate(size_t Size, size_t Alignment) { Ptr = AlignPtr(CurPtr, Alignment); CurPtr = Ptr + Size; assert(CurPtr <= End && "Unable to allocate memory!"); + __msan_allocated_memory(Ptr, Size); return Ptr; } diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index f294a17..5ba69fc 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -8,6 +8,8 @@ add_llvm_library(LLVMSupport circular_raw_ostream.cpp CommandLine.cpp ConstantRange.cpp + ConvertUTF.c + ConvertUTFWrapper.cpp CrashRecoveryContext.cpp DataExtractor.cpp DataStream.cpp diff --git a/lib/Support/ConvertUTF.c b/lib/Support/ConvertUTF.c new file mode 100644 index 0000000..23f17ca --- /dev/null +++ b/lib/Support/ConvertUTF.c @@ -0,0 +1,571 @@ +/*===--- ConvertUTF.c - Universal Character Names conversions ---------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + * + *===------------------------------------------------------------------------=*/ +/* + * Copyright 2001-2004 Unicode, Inc. + * + * Disclaimer + * + * This source code is provided as is by Unicode, Inc. No claims are + * made as to fitness for any particular purpose. No warranties of any + * kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been + * purchased on magnetic or optical media from Unicode, Inc., the + * sole remedy for any claim will be exchange of defective media + * within 90 days of receipt. + * + * Limitations on Rights to Redistribute This Code + * + * Unicode, Inc. hereby grants the right to freely use the information + * supplied in this file in the creation of products supporting the + * Unicode Standard, and to make copies of this file in any form + * for internal or external distribution as long as this notice + * remains attached. + */ + +/* --------------------------------------------------------------------- + + Conversions between UTF32, UTF-16, and UTF-8. Source code file. + Author: Mark E. Davis, 1994. + Rev History: Rick McGowan, fixes & updates May 2001. + Sept 2001: fixed const & error conditions per + mods suggested by S. Parent & A. Lillich. + June 2002: Tim Dodd added detection and handling of incomplete + source sequences, enhanced error detection, added casts + to eliminate compiler warnings. + July 2003: slight mods to back out aggressive FFFE detection. + Jan 2004: updated switches in from-UTF8 conversions. + Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions. + + See the header file "ConvertUTF.h" for complete documentation. + +------------------------------------------------------------------------ */ + + +#include "llvm/Support/ConvertUTF.h" +#ifdef CVTUTF_DEBUG +#include <stdio.h> +#endif + +static const int halfShift = 10; /* used for shifting by 10 bits */ + +static const UTF32 halfBase = 0x0010000UL; +static const UTF32 halfMask = 0x3FFUL; + +#define UNI_SUR_HIGH_START (UTF32)0xD800 +#define UNI_SUR_HIGH_END (UTF32)0xDBFF +#define UNI_SUR_LOW_START (UTF32)0xDC00 +#define UNI_SUR_LOW_END (UTF32)0xDFFF +#define false 0 +#define true 1 + +/* --------------------------------------------------------------------- */ + +/* + * Index into the table below with the first byte of a UTF-8 sequence to + * get the number of trailing bytes that are supposed to follow it. + * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is + * left as-is for anyone who may want to do such conversion, which was + * allowed in earlier algorithms. + */ +static const char trailingBytesForUTF8[256] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 +}; + +/* + * Magic values subtracted from a buffer value during UTF8 conversion. + * This table contains as many values as there might be trailing bytes + * in a UTF-8 sequence. + */ +static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, + 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; + +/* + * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed + * into the first byte, depending on how many bytes follow. There are + * as many entries in this table as there are UTF-8 sequence types. + * (I.e., one byte sequence, two byte... etc.). Remember that sequencs + * for *legal* UTF-8 will be 4 or fewer bytes total. + */ +static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; + +/* --------------------------------------------------------------------- */ + +/* The interface converts a whole buffer to avoid function-call overhead. + * Constants have been gathered. Loops & conditionals have been removed as + * much as possible for efficiency, in favor of drop-through switches. + * (See "Note A" at the bottom of the file for equivalent code.) + * If your compiler supports it, the "isLegalUTF8" call can be turned + * into an inline function. + */ + + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF32toUTF16 ( + const UTF32** sourceStart, const UTF32* sourceEnd, + UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF32* source = *sourceStart; + UTF16* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch; + if (target >= targetEnd) { + result = targetExhausted; break; + } + ch = *source++; + if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ + /* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + if (flags == strictConversion) { + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + *target++ = (UTF16)ch; /* normal case */ + } + } else if (ch > UNI_MAX_LEGAL_UTF32) { + if (flags == strictConversion) { + result = sourceIllegal; + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + /* target is a character in range 0xFFFF - 0x10FFFF. */ + if (target + 1 >= targetEnd) { + --source; /* Back up source pointer! */ + result = targetExhausted; break; + } + ch -= halfBase; + *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START); + *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START); + } + } + *sourceStart = source; + *targetStart = target; + return result; +} + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF16toUTF32 ( + const UTF16** sourceStart, const UTF16* sourceEnd, + UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF16* source = *sourceStart; + UTF32* target = *targetStart; + UTF32 ch, ch2; + while (source < sourceEnd) { + const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */ + ch = *source++; + /* If we have a surrogate pair, convert to UTF32 first. */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { + /* If the 16 bits following the high surrogate are in the source buffer... */ + if (source < sourceEnd) { + ch2 = *source; + /* If it's a low surrogate, convert to UTF32. */ + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) { + ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + + (ch2 - UNI_SUR_LOW_START) + halfBase; + ++source; + } else if (flags == strictConversion) { /* it's an unpaired high surrogate */ + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } else { /* We don't have the 16 bits following the high surrogate. */ + --source; /* return to the high surrogate */ + result = sourceExhausted; + break; + } + } else if (flags == strictConversion) { + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } + if (target >= targetEnd) { + source = oldSource; /* Back up source pointer! */ + result = targetExhausted; break; + } + *target++ = ch; + } + *sourceStart = source; + *targetStart = target; +#ifdef CVTUTF_DEBUG +if (result == sourceIllegal) { + fprintf(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2); + fflush(stderr); +} +#endif + return result; +} +ConversionResult ConvertUTF16toUTF8 ( + const UTF16** sourceStart, const UTF16* sourceEnd, + UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF16* source = *sourceStart; + UTF8* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch; + unsigned short bytesToWrite = 0; + const UTF32 byteMask = 0xBF; + const UTF32 byteMark = 0x80; + const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */ + ch = *source++; + /* If we have a surrogate pair, convert to UTF32 first. */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { + /* If the 16 bits following the high surrogate are in the source buffer... */ + if (source < sourceEnd) { + UTF32 ch2 = *source; + /* If it's a low surrogate, convert to UTF32. */ + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) { + ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + + (ch2 - UNI_SUR_LOW_START) + halfBase; + ++source; + } else if (flags == strictConversion) { /* it's an unpaired high surrogate */ + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } else { /* We don't have the 16 bits following the high surrogate. */ + --source; /* return to the high surrogate */ + result = sourceExhausted; + break; + } + } else if (flags == strictConversion) { + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } + /* Figure out how many bytes the result will require */ + if (ch < (UTF32)0x80) { bytesToWrite = 1; + } else if (ch < (UTF32)0x800) { bytesToWrite = 2; + } else if (ch < (UTF32)0x10000) { bytesToWrite = 3; + } else if (ch < (UTF32)0x110000) { bytesToWrite = 4; + } else { bytesToWrite = 3; + ch = UNI_REPLACEMENT_CHAR; + } + + target += bytesToWrite; + if (target > targetEnd) { + source = oldSource; /* Back up source pointer! */ + target -= bytesToWrite; result = targetExhausted; break; + } + switch (bytesToWrite) { /* note: everything falls through. */ + case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 1: *--target = (UTF8)(ch | firstByteMark[bytesToWrite]); + } + target += bytesToWrite; + } + *sourceStart = source; + *targetStart = target; + return result; +} + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF32toUTF8 ( + const UTF32** sourceStart, const UTF32* sourceEnd, + UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF32* source = *sourceStart; + UTF8* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch; + unsigned short bytesToWrite = 0; + const UTF32 byteMask = 0xBF; + const UTF32 byteMark = 0x80; + ch = *source++; + if (flags == strictConversion ) { + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } + /* + * Figure out how many bytes the result will require. Turn any + * illegally large UTF32 things (> Plane 17) into replacement chars. + */ + if (ch < (UTF32)0x80) { bytesToWrite = 1; + } else if (ch < (UTF32)0x800) { bytesToWrite = 2; + } else if (ch < (UTF32)0x10000) { bytesToWrite = 3; + } else if (ch <= UNI_MAX_LEGAL_UTF32) { bytesToWrite = 4; + } else { bytesToWrite = 3; + ch = UNI_REPLACEMENT_CHAR; + result = sourceIllegal; + } + + target += bytesToWrite; + if (target > targetEnd) { + --source; /* Back up source pointer! */ + target -= bytesToWrite; result = targetExhausted; break; + } + switch (bytesToWrite) { /* note: everything falls through. */ + case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]); + } + target += bytesToWrite; + } + *sourceStart = source; + *targetStart = target; + return result; +} + +/* --------------------------------------------------------------------- */ + +/* + * Utility routine to tell whether a sequence of bytes is legal UTF-8. + * This must be called with the length pre-determined by the first byte. + * If not calling this from ConvertUTF8to*, then the length can be set by: + * length = trailingBytesForUTF8[*source]+1; + * and the sequence is illegal right away if there aren't that many bytes + * available. + * If presented with a length > 4, this returns false. The Unicode + * definition of UTF-8 goes up to 4-byte sequences. + */ + +static Boolean isLegalUTF8(const UTF8 *source, int length) { + UTF8 a; + const UTF8 *srcptr = source+length; + switch (length) { + default: return false; + /* Everything else falls through when "true"... */ + case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; + case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; + case 2: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; + + switch (*source) { + /* no fall-through in this inner switch */ + case 0xE0: if (a < 0xA0) return false; break; + case 0xED: if (a > 0x9F) return false; break; + case 0xF0: if (a < 0x90) return false; break; + case 0xF4: if (a > 0x8F) return false; break; + default: if (a < 0x80) return false; + } + + case 1: if (*source >= 0x80 && *source < 0xC2) return false; + } + if (*source > 0xF4) return false; + return true; +} + +/* --------------------------------------------------------------------- */ + +/* + * Exported function to return whether a UTF-8 sequence is legal or not. + * This is not used here; it's just exported. + */ +Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) { + int length = trailingBytesForUTF8[*source]+1; + if (length > sourceEnd - source) { + return false; + } + return isLegalUTF8(source, length); +} + +/* --------------------------------------------------------------------- */ + +/* + * Exported function to return the total number of bytes in a codepoint + * represented in UTF-8, given the value of the first byte. + */ +unsigned getNumBytesForUTF8(UTF8 first) { + return trailingBytesForUTF8[first] + 1; +} + +/* --------------------------------------------------------------------- */ + +/* + * Exported function to return whether a UTF-8 string is legal or not. + * This is not used here; it's just exported. + */ +Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd) { + while (*source != sourceEnd) { + int length = trailingBytesForUTF8[**source] + 1; + if (length > sourceEnd - *source || !isLegalUTF8(*source, length)) + return false; + *source += length; + } + return true; +} + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF8toUTF16 ( + const UTF8** sourceStart, const UTF8* sourceEnd, + UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF8* source = *sourceStart; + UTF16* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch = 0; + unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; + if (extraBytesToRead >= sourceEnd - source) { + result = sourceExhausted; break; + } + /* Do this check whether lenient or strict */ + if (!isLegalUTF8(source, extraBytesToRead+1)) { + result = sourceIllegal; + break; + } + /* + * The cases all fall through. See "Note A" below. + */ + switch (extraBytesToRead) { + case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ + case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ + case 3: ch += *source++; ch <<= 6; + case 2: ch += *source++; ch <<= 6; + case 1: ch += *source++; ch <<= 6; + case 0: ch += *source++; + } + ch -= offsetsFromUTF8[extraBytesToRead]; + + if (target >= targetEnd) { + source -= (extraBytesToRead+1); /* Back up source pointer! */ + result = targetExhausted; break; + } + if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + if (flags == strictConversion) { + source -= (extraBytesToRead+1); /* return to the illegal value itself */ + result = sourceIllegal; + break; + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + *target++ = (UTF16)ch; /* normal case */ + } + } else if (ch > UNI_MAX_UTF16) { + if (flags == strictConversion) { + result = sourceIllegal; + source -= (extraBytesToRead+1); /* return to the start */ + break; /* Bail out; shouldn't continue */ + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + /* target is a character in range 0xFFFF - 0x10FFFF. */ + if (target + 1 >= targetEnd) { + source -= (extraBytesToRead+1); /* Back up source pointer! */ + result = targetExhausted; break; + } + ch -= halfBase; + *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START); + *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START); + } + } + *sourceStart = source; + *targetStart = target; + return result; +} + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF8toUTF32 ( + const UTF8** sourceStart, const UTF8* sourceEnd, + UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF8* source = *sourceStart; + UTF32* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch = 0; + unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; + if (extraBytesToRead >= sourceEnd - source) { + result = sourceExhausted; break; + } + /* Do this check whether lenient or strict */ + if (!isLegalUTF8(source, extraBytesToRead+1)) { + result = sourceIllegal; + break; + } + /* + * The cases all fall through. See "Note A" below. + */ + switch (extraBytesToRead) { + case 5: ch += *source++; ch <<= 6; + case 4: ch += *source++; ch <<= 6; + case 3: ch += *source++; ch <<= 6; + case 2: ch += *source++; ch <<= 6; + case 1: ch += *source++; ch <<= 6; + case 0: ch += *source++; + } + ch -= offsetsFromUTF8[extraBytesToRead]; + + if (target >= targetEnd) { + source -= (extraBytesToRead+1); /* Back up the source pointer! */ + result = targetExhausted; break; + } + if (ch <= UNI_MAX_LEGAL_UTF32) { + /* + * UTF-16 surrogate values are illegal in UTF-32, and anything + * over Plane 17 (> 0x10FFFF) is illegal. + */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + if (flags == strictConversion) { + source -= (extraBytesToRead+1); /* return to the illegal value itself */ + result = sourceIllegal; + break; + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + *target++ = ch; + } + } else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */ + result = sourceIllegal; + *target++ = UNI_REPLACEMENT_CHAR; + } + } + *sourceStart = source; + *targetStart = target; + return result; +} + +/* --------------------------------------------------------------------- + + Note A. + The fall-through switches in UTF-8 reading code save a + temp variable, some decrements & conditionals. The switches + are equivalent to the following loop: + { + int tmpBytesToRead = extraBytesToRead+1; + do { + ch += *source++; + --tmpBytesToRead; + if (tmpBytesToRead) ch <<= 6; + } while (tmpBytesToRead > 0); + } + In UTF-8 writing code, the switches on "bytesToWrite" are + similarly unrolled loops. + + --------------------------------------------------------------------- */ diff --git a/lib/Support/ConvertUTFWrapper.cpp b/lib/Support/ConvertUTFWrapper.cpp new file mode 100644 index 0000000..458fbb0 --- /dev/null +++ b/lib/Support/ConvertUTFWrapper.cpp @@ -0,0 +1,76 @@ +//===-- ConvertUTFWrapper.cpp - Wrap ConvertUTF.h with clang data types -----=== +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/ConvertUTF.h" + +namespace llvm { + +bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source, + char *&ResultPtr, const UTF8 *&ErrorPtr) { + assert(WideCharWidth == 1 || WideCharWidth == 2 || WideCharWidth == 4); + ConversionResult result = conversionOK; + // Copy the character span over. + if (WideCharWidth == 1) { + const UTF8 *Pos = reinterpret_cast<const UTF8*>(Source.begin()); + if (!isLegalUTF8String(&Pos, reinterpret_cast<const UTF8*>(Source.end()))) { + result = sourceIllegal; + ErrorPtr = Pos; + } else { + memcpy(ResultPtr, Source.data(), Source.size()); + ResultPtr += Source.size(); + } + } else if (WideCharWidth == 2) { + const UTF8 *sourceStart = (const UTF8*)Source.data(); + // FIXME: Make the type of the result buffer correct instead of + // using reinterpret_cast. + UTF16 *targetStart = reinterpret_cast<UTF16*>(ResultPtr); + ConversionFlags flags = strictConversion; + result = ConvertUTF8toUTF16( + &sourceStart, sourceStart + Source.size(), + &targetStart, targetStart + 2*Source.size(), flags); + if (result == conversionOK) + ResultPtr = reinterpret_cast<char*>(targetStart); + else + ErrorPtr = sourceStart; + } else if (WideCharWidth == 4) { + const UTF8 *sourceStart = (const UTF8*)Source.data(); + // FIXME: Make the type of the result buffer correct instead of + // using reinterpret_cast. + UTF32 *targetStart = reinterpret_cast<UTF32*>(ResultPtr); + ConversionFlags flags = strictConversion; + result = ConvertUTF8toUTF32( + &sourceStart, sourceStart + Source.size(), + &targetStart, targetStart + 4*Source.size(), flags); + if (result == conversionOK) + ResultPtr = reinterpret_cast<char*>(targetStart); + else + ErrorPtr = sourceStart; + } + assert((result != targetExhausted) + && "ConvertUTF8toUTFXX exhausted target buffer"); + return result == conversionOK; +} + +bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr) { + const UTF32 *SourceStart = &Source; + const UTF32 *SourceEnd = SourceStart + 1; + UTF8 *TargetStart = reinterpret_cast<UTF8 *>(ResultPtr); + UTF8 *TargetEnd = TargetStart + 4; + ConversionResult CR = ConvertUTF32toUTF8(&SourceStart, SourceEnd, + &TargetStart, TargetEnd, + strictConversion); + if (CR != conversionOK) + return false; + + ResultPtr = reinterpret_cast<char*>(TargetStart); + return true; +} + +} // end namespace llvm + diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp index 0c0f15e..d9cb8a9 100644 --- a/lib/Support/Debug.cpp +++ b/lib/Support/Debug.cpp @@ -44,7 +44,7 @@ Debug("debug", cl::desc("Enable debug output"), cl::Hidden, //until program termination. static cl::opt<unsigned> DebugBufferSize("debug-buffer-size", - cl::desc("Buffer the last N characters of debug output" + cl::desc("Buffer the last N characters of debug output " "until program termination. " "[default 0 -- immediate print-out]"), cl::Hidden, diff --git a/lib/Support/DeltaAlgorithm.cpp b/lib/Support/DeltaAlgorithm.cpp index f290668..9e52874 100644 --- a/lib/Support/DeltaAlgorithm.cpp +++ b/lib/Support/DeltaAlgorithm.cpp @@ -27,15 +27,13 @@ bool DeltaAlgorithm::GetTestResult(const changeset_ty &Changes) { void DeltaAlgorithm::Split(const changeset_ty &S, changesetlist_ty &Res) { // FIXME: Allow clients to provide heuristics for improved splitting. - // Get the iterator to the middle. - unsigned N = S.size() / 2; - changeset_ty::const_iterator middle(S.begin()); - std::advance(middle, N); - - // Create each vector using the middle as the split. - changeset_ty LHS(S.begin(), middle); - changeset_ty RHS(middle, S.end()); + // FIXME: This is really slow. + changeset_ty LHS, RHS; + unsigned idx = 0, N = S.size() / 2; + for (changeset_ty::const_iterator it = S.begin(), + ie = S.end(); it != ie; ++it, ++idx) + ((idx < N) ? LHS : RHS).insert(*it); if (!LHS.empty()) Res.push_back(LHS); if (!RHS.empty()) diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp index 615efb8..0f91c11 100644 --- a/lib/Support/Dwarf.cpp +++ b/lib/Support/Dwarf.cpp @@ -688,6 +688,7 @@ const char *llvm::dwarf::MacinfoString(unsigned Encoding) { /// encodings. const char *llvm::dwarf::CallFrameString(unsigned Encoding) { switch (Encoding) { + case DW_CFA_nop: return "DW_CFA_nop"; case DW_CFA_advance_loc: return "DW_CFA_advance_loc"; case DW_CFA_offset: return "DW_CFA_offset"; case DW_CFA_restore: return "DW_CFA_restore"; diff --git a/lib/Support/DynamicLibrary.cpp b/lib/Support/DynamicLibrary.cpp index d40439a..f14cb45 100644 --- a/lib/Support/DynamicLibrary.cpp +++ b/lib/Support/DynamicLibrary.cpp @@ -46,7 +46,7 @@ void llvm::sys::DynamicLibrary::AddSymbol(StringRef symbolName, void *symbolValue) { SmartScopedLock<true> lock(getMutex()); if (ExplicitSymbols == 0) - ExplicitSymbols = new llvm::StringMap<void*>(); + ExplicitSymbols = new StringMap<void*>(); (*ExplicitSymbols)[symbolName] = symbolValue; } diff --git a/lib/Support/FileUtilities.cpp b/lib/Support/FileUtilities.cpp index fc8a2f3..4d7b239 100644 --- a/lib/Support/FileUtilities.cpp +++ b/lib/Support/FileUtilities.cpp @@ -87,9 +87,9 @@ static bool CompareNumbers(const char *&F1P, const char *&F2P, // If one of the positions is at a space and the other isn't, chomp up 'til // the end of the space. - while (isspace(*F1P) && F1P != F1End) + while (isspace(static_cast<unsigned char>(*F1P)) && F1P != F1End) ++F1P; - while (isspace(*F2P) && F2P != F2End) + while (isspace(static_cast<unsigned char>(*F2P)) && F2P != F2End) ++F2P; // If we stop on numbers, compare their difference. diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp index 669c238..bff182f 100644 --- a/lib/Support/GraphWriter.cpp +++ b/lib/Support/GraphWriter.cpp @@ -53,6 +53,17 @@ std::string llvm::DOT::EscapeString(const std::string &Label) { return Str; } +/// \brief Get a color string for this node number. Simply round-robin selects +/// from a reasonable number of colors. +StringRef llvm::DOT::getColorString(unsigned ColorNumber) { + static const int NumColors = 20; + static const char* Colors[NumColors] = { + "aaaaaa", "aa0000", "00aa00", "aa5500", "0055ff", "aa00aa", "00aaaa", + "555555", "ff5555", "55ff55", "ffff55", "5555ff", "ff55ff", "55ffff", + "ffaaaa", "aaffaa", "ffffaa", "aaaaff", "ffaaff", "aaffff"}; + return Colors[ColorNumber % NumColors]; +} + // Execute the graph viewer. Return true if successful. static bool LLVM_ATTRIBUTE_UNUSED ExecGraphViewer(const sys::Path &ExecPath, std::vector<const char*> &args, diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index 5ad5308..b9bbcb9 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" #include "llvm/Config/config.h" #include "llvm/Support/DataStream.h" #include "llvm/Support/Debug.h" @@ -330,7 +331,10 @@ std::string sys::getHostCPUName() { case 20: return "btver1"; case 21: - return "bdver1"; + if (Model <= 15) + return "bdver1"; + else if (Model <= 31) + return "bdver2"; default: return "generic"; } @@ -578,3 +582,14 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features){ return false; } #endif + +std::string sys::getProcessTriple() { + Triple PT(LLVM_HOSTTRIPLE); + + if (sizeof(void *) == 8 && PT.isArch32Bit()) + PT = PT.get64BitArchVariant(); + if (sizeof(void *) == 4 && PT.isArch64Bit()) + PT = PT.get32BitArchVariant(); + + return PT.str(); +} diff --git a/lib/Support/LockFileManager.cpp b/lib/Support/LockFileManager.cpp index 075d8a5..92d8b83 100644 --- a/lib/Support/LockFileManager.cpp +++ b/lib/Support/LockFileManager.cpp @@ -31,7 +31,7 @@ LockFileManager::readLockFile(StringRef LockFileName) { // to read, so we just return. bool Exists = false; if (sys::fs::exists(LockFileName, Exists) || !Exists) - return Optional<std::pair<std::string, int> >(); + return None; // Read the owning host and PID out of the lock file. If it appears that the // owning process is dead, the lock file is invalid. @@ -45,7 +45,7 @@ LockFileManager::readLockFile(StringRef LockFileName) { // Delete the lock file. It's invalid anyway. bool Existed; sys::fs::remove(LockFileName, Existed); - return Optional<std::pair<std::string, int> >(); + return None; } bool LockFileManager::processStillExecuting(StringRef Hostname, int PID) { @@ -64,6 +64,7 @@ bool LockFileManager::processStillExecuting(StringRef Hostname, int PID) { LockFileManager::LockFileManager(StringRef FileName) { + this->FileName = FileName; LockFileName = FileName; LockFileName += ".lock"; @@ -175,6 +176,7 @@ void LockFileManager::waitForUnlock() { #endif // Don't wait more than an hour for the file to appear. const unsigned MaxSeconds = 3600; + bool LockFileGone = false; do { // Sleep for the designated interval, to allow the owning process time to // finish up and remove the lock file. @@ -185,10 +187,18 @@ void LockFileManager::waitForUnlock() { #else nanosleep(&Interval, NULL); #endif - // If the file no longer exists, we're done. + // If the lock file no longer exists, wait for the actual file. bool Exists = false; - if (!sys::fs::exists(LockFileName.str(), Exists) && !Exists) - return; + if (!LockFileGone) { + if (!sys::fs::exists(LockFileName.str(), Exists) && !Exists) { + LockFileGone = true; + Exists = false; + } + } + if (LockFileGone) { + if (!sys::fs::exists(FileName.str(), Exists) && Exists) + return; + } if (!processStillExecuting((*Owner).first, (*Owner).second)) return; diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp index 65b4332..691b6f5 100644 --- a/lib/Support/MemoryBuffer.cpp +++ b/lib/Support/MemoryBuffer.cpp @@ -33,8 +33,13 @@ #include <unistd.h> #else #include <io.h> -#ifndef S_ISFIFO -#define S_ISFIFO(x) (0) +// Simplistic definitinos of these macros to allow files to be read with +// MapInFilePages. +#ifndef S_ISREG +#define S_ISREG(x) (1) +#endif +#ifndef S_ISBLK +#define S_ISBLK(x) (0) #endif #endif #include <fcntl.h> @@ -322,9 +327,10 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename, return error_code(errno, posix_category()); } - // If this is a named pipe, we can't trust the size. Create the memory + // If this not a file or a block device (e.g. it's a named pipe + // or character device), we can't trust the size. Create the memory // buffer by copying off the stream. - if (S_ISFIFO(FileInfo.st_mode)) { + if (!S_ISREG(FileInfo.st_mode) && !S_ISBLK(FileInfo.st_mode)) { return getMemoryBufferForStream(FD, Filename, result); } diff --git a/lib/Support/PathV2.cpp b/lib/Support/PathV2.cpp index 98d7382..41add96 100644 --- a/lib/Support/PathV2.cpp +++ b/lib/Support/PathV2.cpp @@ -44,7 +44,8 @@ namespace { #ifdef LLVM_ON_WIN32 // C: - if (path.size() >= 2 && std::isalpha(path[0]) && path[1] == ':') + if (path.size() >= 2 && std::isalpha(static_cast<unsigned char>(path[0])) && + path[1] == ':') return path.substr(0, 2); #endif diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp index 6540319..fac3cad 100644 --- a/lib/Support/SourceMgr.cpp +++ b/lib/Support/SourceMgr.cpp @@ -15,12 +15,16 @@ #include "llvm/Support/SourceMgr.h" #include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" +#include "llvm/Support/Locale.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/system_error.h" using namespace llvm; +static const size_t TabStop = 8; + namespace { struct LineNoCacheTy { int LastQueryBufferID; @@ -146,7 +150,8 @@ void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const { /// prefixed to the message. SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg, - ArrayRef<SMRange> Ranges) const { + ArrayRef<SMRange> Ranges, + ArrayRef<SMFixIt> FixIts) const { // First thing to do: find the current buffer containing the specified // location to pull out the source line. @@ -193,6 +198,7 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind, R.End = SMLoc::getFromPointer(LineEnd); // Translate from SMLoc ranges to column ranges. + // FIXME: Handle multibyte characters. ColRanges.push_back(std::make_pair(R.Start.getPointer()-LineStart, R.End.getPointer()-LineStart)); } @@ -202,13 +208,13 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind, return SMDiagnostic(*this, Loc, BufferID, LineAndCol.first, LineAndCol.second-1, Kind, Msg.str(), - LineStr, ColRanges); + LineStr, ColRanges, FixIts); } void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg, ArrayRef<SMRange> Ranges, - bool ShowColors) const { - SMDiagnostic Diagnostic = GetMessage(Loc, Kind, Msg, Ranges); + ArrayRef<SMFixIt> FixIts, bool ShowColors) const { + SMDiagnostic Diagnostic = GetMessage(Loc, Kind, Msg, Ranges, FixIts); // Report the message with the diagnostic handler if present. if (DiagHandler) { @@ -231,15 +237,108 @@ void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, // SMDiagnostic Implementation //===----------------------------------------------------------------------===// -SMDiagnostic::SMDiagnostic(const SourceMgr &sm, SMLoc L, const std::string &FN, +SMDiagnostic::SMDiagnostic(const SourceMgr &sm, SMLoc L, StringRef FN, int Line, int Col, SourceMgr::DiagKind Kind, - const std::string &Msg, - const std::string &LineStr, - ArrayRef<std::pair<unsigned,unsigned> > Ranges) + StringRef Msg, StringRef LineStr, + ArrayRef<std::pair<unsigned,unsigned> > Ranges, + ArrayRef<SMFixIt> Hints) : SM(&sm), Loc(L), Filename(FN), LineNo(Line), ColumnNo(Col), Kind(Kind), - Message(Msg), LineContents(LineStr), Ranges(Ranges.vec()) { + Message(Msg), LineContents(LineStr), Ranges(Ranges.vec()), + FixIts(Hints.begin(), Hints.end()) { + std::sort(FixIts.begin(), FixIts.end()); } +static void buildFixItLine(std::string &CaretLine, std::string &FixItLine, + ArrayRef<SMFixIt> FixIts, ArrayRef<char> SourceLine){ + if (FixIts.empty()) + return; + + const char *LineStart = SourceLine.begin(); + const char *LineEnd = SourceLine.end(); + + size_t PrevHintEndCol = 0; + + for (ArrayRef<SMFixIt>::iterator I = FixIts.begin(), E = FixIts.end(); + I != E; ++I) { + // If the fixit contains a newline or tab, ignore it. + if (I->getText().find_first_of("\n\r\t") != StringRef::npos) + continue; + + SMRange R = I->getRange(); + + // If the line doesn't contain any part of the range, then ignore it. + if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart) + continue; + + // Translate from SMLoc to column. + // Ignore pieces of the range that go onto other lines. + // FIXME: Handle multibyte characters in the source line. + unsigned FirstCol; + if (R.Start.getPointer() < LineStart) + FirstCol = 0; + else + FirstCol = R.Start.getPointer() - LineStart; + + // If we inserted a long previous hint, push this one forwards, and add + // an extra space to show that this is not part of the previous + // completion. This is sort of the best we can do when two hints appear + // to overlap. + // + // Note that if this hint is located immediately after the previous + // hint, no space will be added, since the location is more important. + unsigned HintCol = FirstCol; + if (HintCol < PrevHintEndCol) + HintCol = PrevHintEndCol + 1; + + // FIXME: This assertion is intended to catch unintended use of multibyte + // characters in fixits. If we decide to do this, we'll have to track + // separate byte widths for the source and fixit lines. + assert((size_t)llvm::sys::locale::columnWidth(I->getText()) == + I->getText().size()); + + // This relies on one byte per column in our fixit hints. + unsigned LastColumnModified = HintCol + I->getText().size(); + if (LastColumnModified > FixItLine.size()) + FixItLine.resize(LastColumnModified, ' '); + + std::copy(I->getText().begin(), I->getText().end(), + FixItLine.begin() + HintCol); + + PrevHintEndCol = LastColumnModified; + + // For replacements, mark the removal range with '~'. + // FIXME: Handle multibyte characters in the source line. + unsigned LastCol; + if (R.End.getPointer() >= LineEnd) + LastCol = LineEnd - LineStart; + else + LastCol = R.End.getPointer() - LineStart; + + std::fill(&CaretLine[FirstCol], &CaretLine[LastCol], '~'); + } +} + +static void printSourceLine(raw_ostream &S, StringRef LineContents) { + // Print out the source line one character at a time, so we can expand tabs. + for (unsigned i = 0, e = LineContents.size(), OutCol = 0; i != e; ++i) { + if (LineContents[i] != '\t') { + S << LineContents[i]; + ++OutCol; + continue; + } + + // If we have a tab, emit at least one space, then round up to 8 columns. + do { + S << ' '; + ++OutCol; + } while ((OutCol % TabStop) != 0); + } + S << '\n'; +} + +static bool isNonASCII(char c) { + return c & 0x80; +} void SMDiagnostic::print(const char *ProgName, raw_ostream &S, bool ShowColors) const { @@ -297,43 +396,48 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S, if (LineNo == -1 || ColumnNo == -1) return; + // FIXME: If there are multibyte or multi-column characters in the source, all + // our ranges will be wrong. To do this properly, we'll need a byte-to-column + // map like Clang's TextDiagnostic. For now, we'll just handle tabs by + // expanding them later, and bail out rather than show incorrect ranges and + // misaligned fixits for any other odd characters. + if (std::find_if(LineContents.begin(), LineContents.end(), isNonASCII) != + LineContents.end()) { + printSourceLine(S, LineContents); + return; + } + size_t NumColumns = LineContents.size(); + // Build the line with the caret and ranges. - std::string CaretLine(LineContents.size()+1, ' '); + std::string CaretLine(NumColumns+1, ' '); // Expand any ranges. for (unsigned r = 0, e = Ranges.size(); r != e; ++r) { std::pair<unsigned, unsigned> R = Ranges[r]; - for (unsigned i = R.first, - e = std::min(R.second, (unsigned)LineContents.size()); i != e; ++i) - CaretLine[i] = '~'; + std::fill(&CaretLine[R.first], + &CaretLine[std::min((size_t)R.second, CaretLine.size())], + '~'); } - + + // Add any fix-its. + // FIXME: Find the beginning of the line properly for multibyte characters. + std::string FixItInsertionLine; + buildFixItLine(CaretLine, FixItInsertionLine, FixIts, + makeArrayRef(Loc.getPointer() - ColumnNo, + LineContents.size())); + // Finally, plop on the caret. - if (unsigned(ColumnNo) <= LineContents.size()) + if (unsigned(ColumnNo) <= NumColumns) CaretLine[ColumnNo] = '^'; else - CaretLine[LineContents.size()] = '^'; + CaretLine[NumColumns] = '^'; // ... and remove trailing whitespace so the output doesn't wrap for it. We // know that the line isn't completely empty because it has the caret in it at // least. CaretLine.erase(CaretLine.find_last_not_of(' ')+1); - // Print out the source line one character at a time, so we can expand tabs. - for (unsigned i = 0, e = LineContents.size(), OutCol = 0; i != e; ++i) { - if (LineContents[i] != '\t') { - S << LineContents[i]; - ++OutCol; - continue; - } - - // If we have a tab, emit at least one space, then round up to 8 columns. - do { - S << ' '; - ++OutCol; - } while (OutCol & 7); - } - S << '\n'; + printSourceLine(S, LineContents); if (ShowColors) S.changeColor(raw_ostream::GREEN, true); @@ -350,11 +454,36 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S, do { S << CaretLine[i]; ++OutCol; - } while (OutCol & 7); + } while ((OutCol % TabStop) != 0); } + S << '\n'; if (ShowColors) S.resetColor(); + + // Print out the replacement line, matching tabs in the source line. + if (FixItInsertionLine.empty()) + return; + for (size_t i = 0, e = FixItInsertionLine.size(), OutCol = 0; i != e; ++i) { + if (i >= LineContents.size() || LineContents[i] != '\t') { + S << FixItInsertionLine[i]; + ++OutCol; + continue; + } + + // Okay, we have a tab. Insert the appropriate number of characters. + do { + S << FixItInsertionLine[i]; + // FIXME: This is trying not to break up replacements, but then to re-sync + // with the tabs between replacements. This will fail, though, if two + // fix-it replacements are exactly adjacent, or if a fix-it contains a + // space. Really we should be precomputing column widths, which we'll + // need anyway for multibyte chars. + if (FixItInsertionLine[i] != ' ') + ++i; + ++OutCol; + } while (((OutCol % TabStop) != 0) && i != e); + } S << '\n'; } diff --git a/lib/Support/TimeValue.cpp b/lib/Support/TimeValue.cpp index 1a0f7bc..bd8af17 100644 --- a/lib/Support/TimeValue.cpp +++ b/lib/Support/TimeValue.cpp @@ -17,11 +17,16 @@ namespace llvm { using namespace sys; +const TimeValue::SecondsType + TimeValue::PosixZeroTimeSeconds = -946684800; +const TimeValue::SecondsType + TimeValue::Win32ZeroTimeSeconds = -12591158400ULL; + const TimeValue TimeValue::MinTime = TimeValue ( INT64_MIN,0 ); const TimeValue TimeValue::MaxTime = TimeValue ( INT64_MAX,0 ); const TimeValue TimeValue::ZeroTime = TimeValue ( 0,0 ); -const TimeValue TimeValue::PosixZeroTime = TimeValue ( -946684800,0 ); -const TimeValue TimeValue::Win32ZeroTime = TimeValue ( -12591158400ULL,0 ); +const TimeValue TimeValue::PosixZeroTime = TimeValue ( PosixZeroTimeSeconds,0 ); +const TimeValue TimeValue::Win32ZeroTime = TimeValue ( Win32ZeroTimeSeconds,0 ); void TimeValue::normalize( void ) { diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index eefb96b..d2508ac 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -19,6 +19,7 @@ const char *Triple::getArchTypeName(ArchType Kind) { switch (Kind) { case UnknownArch: return "unknown"; + case aarch64: return "aarch64"; case arm: return "arm"; case hexagon: return "hexagon"; case mips: return "mips"; @@ -53,6 +54,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) { default: return 0; + case aarch64: return "aarch64"; + case arm: case thumb: return "arm"; @@ -140,6 +143,7 @@ const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) { case GNU: return "gnu"; case GNUEABIHF: return "gnueabihf"; case GNUEABI: return "gnueabi"; + case GNUX32: return "gnux32"; case EABI: return "eabi"; case MachO: return "macho"; case Android: return "android"; @@ -151,6 +155,7 @@ const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) { Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { return StringSwitch<Triple::ArchType>(Name) + .Case("aarch64", aarch64) .Case("arm", arm) .Case("mips", mips) .Case("mipsel", mipsel) @@ -214,6 +219,7 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Case("powerpc", Triple::ppc) .Cases("powerpc64", "ppu", Triple::ppc64) .Case("mblaze", Triple::mblaze) + .Case("aarch64", Triple::aarch64) .Cases("arm", "xscale", Triple::arm) // FIXME: It would be good to replace these with explicit names for all the // various suffixes supported. @@ -284,6 +290,7 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) { .StartsWith("eabi", Triple::EABI) .StartsWith("gnueabihf", Triple::GNUEABIHF) .StartsWith("gnueabi", Triple::GNUEABI) + .StartsWith("gnux32", Triple::GNUX32) .StartsWith("gnu", Triple::GNU) .StartsWith("macho", Triple::MachO) .StartsWith("android", Triple::Android) @@ -674,6 +681,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) { case llvm::Triple::spir: return 32; + case llvm::Triple::aarch64: case llvm::Triple::mips64: case llvm::Triple::mips64el: case llvm::Triple::nvptx64: @@ -702,6 +710,7 @@ Triple Triple::get32BitArchVariant() const { Triple T(*this); switch (getArch()) { case Triple::UnknownArch: + case Triple::aarch64: case Triple::msp430: T.setArch(UnknownArch); break; @@ -753,6 +762,7 @@ Triple Triple::get64BitArchVariant() const { T.setArch(UnknownArch); break; + case Triple::aarch64: case Triple::spir64: case Triple::mips64: case Triple::mips64el: diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc index 40d6b3f..e00394e 100644 --- a/lib/Support/Unix/Memory.inc +++ b/lib/Support/Unix/Memory.inc @@ -51,7 +51,18 @@ int getPosixProtectionFlags(unsigned Flags) { llvm::sys::Memory::MF_EXEC: return PROT_READ | PROT_WRITE | PROT_EXEC; case llvm::sys::Memory::MF_EXEC: +#if defined(__FreeBSD__) + // On PowerPC, having an executable page that has no read permission + // can have unintended consequences. The function InvalidateInstruction- + // Cache uses instructions dcbf and icbi, both of which are treated by + // the processor as loads. If the page has no read permissions, + // executing these instructions will result in a segmentation fault. + // Somehow, this problem is not present on Linux, but it does happen + // on FreeBSD. + return PROT_READ | PROT_EXEC; +#else return PROT_EXEC; +#endif default: llvm_unreachable("Illegal memory protection flag specified!"); } diff --git a/lib/Support/Unix/PathV2.inc b/lib/Support/Unix/PathV2.inc index dc43b59..c343455 100644 --- a/lib/Support/Unix/PathV2.inc +++ b/lib/Support/Unix/PathV2.inc @@ -423,11 +423,12 @@ retry_random_path: rety_open_create: int RandomFD = ::open(RandomPath.c_str(), O_RDWR | O_CREAT | O_EXCL, mode); if (RandomFD == -1) { + int SavedErrno = errno; // If the file existed, try again, otherwise, error. - if (errno == errc::file_exists) + if (SavedErrno == errc::file_exists) goto retry_random_path; // If path prefix doesn't exist, try to create it. - if (errno == errc::no_such_file_or_directory && + if (SavedErrno == errc::no_such_file_or_directory && !exists(path::parent_path(RandomPath))) { StringRef p(RandomPath); SmallString<64> dir_to_create; @@ -442,13 +443,15 @@ rety_open_create: (*i)[1] == '/' && (*i)[2] != '/') return make_error_code(errc::no_such_file_or_directory); - if (::mkdir(dir_to_create.c_str(), 0700) == -1) + if (::mkdir(dir_to_create.c_str(), 0700) == -1 && + errno != errc::file_exists) return error_code(errno, system_category()); } } goto rety_open_create; } - return error_code(errno, system_category()); + + return error_code(SavedErrno, system_category()); } // Make the path absolute. diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc index 1335b78..9a4454f 100644 --- a/lib/Support/Unix/Process.inc +++ b/lib/Support/Unix/Process.inc @@ -224,6 +224,8 @@ static unsigned getColumns(int FileID) { #if defined(HAVE_SYS_IOCTL_H) && defined(HAVE_TERMIOS_H) // Try to determine the width of the terminal. struct winsize ws; + // Zero-fill ws to avoid a false positive from MemorySanitizer. + memset(&ws, 0, sizeof(ws)); if (ioctl(FileID, TIOCGWINSZ, &ws) == 0) Columns = ws.ws_col; #endif diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc index c384316..117151c 100644 --- a/lib/Support/Unix/Program.inc +++ b/lib/Support/Unix/Program.inc @@ -17,6 +17,7 @@ //===----------------------------------------------------------------------===// #include "Unix.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/FileSystem.h" #include <llvm/Config/config.h> #if HAVE_SYS_STAT_H @@ -164,12 +165,16 @@ static void SetMemoryLimits (unsigned size) setrlimit (RLIMIT_RSS, &r); #endif #ifdef RLIMIT_AS // e.g. NetBSD doesn't have it. + // Don't set virtual memory limit if built with any Sanitizer. They need 80Tb + // of virtual memory for shadow memory mapping. +#if !LLVM_MEMORY_SANITIZER_BUILD && !LLVM_ADDRESS_SANITIZER_BUILD // Virtual memory. getrlimit (RLIMIT_AS, &r); r.rlim_cur = limit; setrlimit (RLIMIT_AS, &r); #endif #endif +#endif } bool diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc index 4a2496f..b8be623 100644 --- a/lib/Support/Unix/Signals.inc +++ b/lib/Support/Unix/Signals.inc @@ -47,17 +47,19 @@ static void (*InterruptFunction)() = 0; static std::vector<std::string> FilesToRemove; static std::vector<std::pair<void(*)(void*), void*> > CallBacksToRun; -// IntSigs - Signals that may interrupt the program at any time. +// IntSigs - Signals that represent requested termination. There's no bug +// or failure, or if there is, it's not our direct responsibility. For whatever +// reason, our continued execution is no longer desirable. static const int IntSigs[] = { - SIGHUP, SIGINT, SIGQUIT, SIGPIPE, SIGTERM, SIGUSR1, SIGUSR2 + SIGHUP, SIGINT, SIGPIPE, SIGTERM, SIGUSR1, SIGUSR2 }; static const int *const IntSigsEnd = IntSigs + sizeof(IntSigs) / sizeof(IntSigs[0]); -// KillSigs - Signals that are synchronous with the program that will cause it -// to die. +// KillSigs - Signals that represent that we have a bug, and our prompt +// termination has been ordered. static const int KillSigs[] = { - SIGILL, SIGTRAP, SIGABRT, SIGFPE, SIGBUS, SIGSEGV + SIGILL, SIGTRAP, SIGABRT, SIGFPE, SIGBUS, SIGSEGV, SIGQUIT #ifdef SIGSYS , SIGSYS #endif @@ -254,7 +256,7 @@ void llvm::sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) { // // On glibc systems we have the 'backtrace' function, which works nicely, but // doesn't demangle symbols. -static void PrintStackTrace(void *) { +void llvm::sys::PrintStackTrace(FILE *FD) { #if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES) static void* StackTrace[256]; // Use backtrace() to output a backtrace on Linux systems with glibc. @@ -278,26 +280,30 @@ static void PrintStackTrace(void *) { Dl_info dlinfo; dladdr(StackTrace[i], &dlinfo); - fprintf(stderr, "%-2d", i); + fprintf(FD, "%-2d", i); const char* name = strrchr(dlinfo.dli_fname, '/'); - if (name == NULL) fprintf(stderr, " %-*s", width, dlinfo.dli_fname); - else fprintf(stderr, " %-*s", width, name+1); + if (name == NULL) fprintf(FD, " %-*s", width, dlinfo.dli_fname); + else fprintf(FD, " %-*s", width, name+1); - fprintf(stderr, " %#0*lx", + fprintf(FD, " %#0*lx", (int)(sizeof(void*) * 2) + 2, (unsigned long)StackTrace[i]); if (dlinfo.dli_sname != NULL) { int res; - fputc(' ', stderr); + fputc(' ', FD); char* d = abi::__cxa_demangle(dlinfo.dli_sname, NULL, NULL, &res); - if (d == NULL) fputs(dlinfo.dli_sname, stderr); - else fputs(d, stderr); + if (d == NULL) fputs(dlinfo.dli_sname, FD); + else fputs(d, FD); free(d); - fprintf(stderr, " + %tu",(char*)StackTrace[i]-(char*)dlinfo.dli_saddr); + // FIXME: When we move to C++11, use %t length modifier. It's not in + // C++03 and causes gcc to issue warnings. Losing the upper 32 bits of + // the stack offset for a stack dump isn't likely to cause any problems. + fprintf(FD, " + %u",(unsigned)((char*)StackTrace[i]- + (char*)dlinfo.dli_saddr)); } - fputc('\n', stderr); + fputc('\n', FD); } #else backtrace_symbols_fd(StackTrace, depth, STDERR_FILENO); @@ -305,10 +311,14 @@ static void PrintStackTrace(void *) { #endif } +static void PrintStackTraceSignalHandler(void *) { + PrintStackTrace(stderr); +} + /// PrintStackTraceOnErrorSignal - When an error signal (such as SIGABRT or /// SIGSEGV) is delivered to the process, print a stack trace and then exit. void llvm::sys::PrintStackTraceOnErrorSignal() { - AddSignalHandler(PrintStackTrace, 0); + AddSignalHandler(PrintStackTraceSignalHandler, 0); #if defined(__APPLE__) && !defined(ANDROID) // Environment variable to disable any kind of crash dialog. diff --git a/lib/Support/Unix/TimeValue.inc b/lib/Support/Unix/TimeValue.inc index 5cf5a9d..df8558b 100644 --- a/lib/Support/Unix/TimeValue.inc +++ b/lib/Support/Unix/TimeValue.inc @@ -48,7 +48,8 @@ TimeValue TimeValue::now() { } return TimeValue( - static_cast<TimeValue::SecondsType>( the_time.tv_sec + PosixZeroTime.seconds_ ), + static_cast<TimeValue::SecondsType>( the_time.tv_sec + + PosixZeroTimeSeconds ), static_cast<TimeValue::NanoSecondsType>( the_time.tv_usec * NANOSECONDS_PER_MICROSECOND ) ); } diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc index 98d8a18..f4898e6 100644 --- a/lib/Support/Windows/Path.inc +++ b/lib/Support/Windows/Path.inc @@ -82,7 +82,7 @@ Path::isValid() const { pos = path.rfind(':',len); size_t rootslash = 0; if (pos != std::string::npos) { - if (pos != 1 || !isalpha(path[0]) || len < 3) + if (pos != 1 || !isalpha(static_cast<unsigned char>(path[0])) || len < 3) return false; rootslash = 2; } diff --git a/lib/Support/Windows/Signals.inc b/lib/Support/Windows/Signals.inc index a969753..3dd6660 100644 --- a/lib/Support/Windows/Signals.inc +++ b/lib/Support/Windows/Signals.inc @@ -295,6 +295,10 @@ void sys::PrintStackTraceOnErrorSignal() { LeaveCriticalSection(&CriticalSection); } +void llvm::sys::PrintStackTrace(FILE *) { + // FIXME: Implement. +} + void sys::SetInterruptFunction(void (*IF)()) { RegisterHandler(); diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index 106864d..f71abd3 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -241,7 +241,8 @@ raw_ostream &raw_ostream::operator<<(double N) { if (cs == '+' || cs == '-') { int c1 = buf[len - 2]; int c0 = buf[len - 1]; - if (isdigit(c1) && isdigit(c0)) { + if (isdigit(static_cast<unsigned char>(c1)) && + isdigit(static_cast<unsigned char>(c0))) { // Trim leading '0': "...e+012" -> "...e+12\0" buf[len - 3] = c1; buf[len - 2] = c0; |