diff options
Diffstat (limited to 'Source/JavaScriptCore/wtf/unicode/UTF8.cpp')
-rw-r--r-- | Source/JavaScriptCore/wtf/unicode/UTF8.cpp | 33 |
1 files changed, 25 insertions, 8 deletions
diff --git a/Source/JavaScriptCore/wtf/unicode/UTF8.cpp b/Source/JavaScriptCore/wtf/unicode/UTF8.cpp index dc24ed5..4c3738b 100644 --- a/Source/JavaScriptCore/wtf/unicode/UTF8.cpp +++ b/Source/JavaScriptCore/wtf/unicode/UTF8.cpp @@ -26,16 +26,14 @@ #include "config.h" #include "UTF8.h" -#include <wtf/StringHasher.h> #include "ASCIICType.h" +#include <wtf/StringHasher.h> +#include <wtf/unicode/CharacterNames.h> namespace WTF { namespace Unicode { -// FIXME: Use definition from CharacterNames.h. -static const UChar replacementCharacter = 0xFFFD; - inline int inlineUTF8SequenceLengthNonASCII(char b0) { if ((b0 & 0xC0) != 0xC0) @@ -316,25 +314,33 @@ ConversionResult convertUTF8ToUTF16( return result; } -unsigned calculateStringHashFromUTF8(const char* data, const char* dataEnd, unsigned& utf16Length) +static inline unsigned calculateStringHashAndLengthFromUTF8Internal(const char* data, const char* dataEnd, unsigned& dataLength, unsigned& utf16Length) { if (!data) return 0; WTF::StringHasher stringHasher; + dataLength = 0; utf16Length = 0; - while (data < dataEnd) { + while (data < dataEnd || (!dataEnd && *data)) { if (isASCII(*data)) { stringHasher.addCharacter(*data++); + dataLength++; utf16Length++; continue; } int utf8SequenceLength = inlineUTF8SequenceLengthNonASCII(*data); + dataLength += utf8SequenceLength; - if (dataEnd - data < utf8SequenceLength) - return false; + if (!dataEnd) { + for (int i = 1; i < utf8SequenceLength; ++i) { + if (!data[i]) + return 0; + } + } else if (dataEnd - data < utf8SequenceLength) + return 0; if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(data), utf8SequenceLength)) return 0; @@ -359,6 +365,17 @@ unsigned calculateStringHashFromUTF8(const char* data, const char* dataEnd, unsi return stringHasher.hash(); } +unsigned calculateStringHashFromUTF8(const char* data, const char* dataEnd, unsigned& utf16Length) +{ + unsigned dataLength; + return calculateStringHashAndLengthFromUTF8Internal(data, dataEnd, dataLength, utf16Length); +} + +unsigned calculateStringHashAndLengthFromUTF8(const char* data, unsigned& dataLength, unsigned& utf16Length) +{ + return calculateStringHashAndLengthFromUTF8Internal(data, 0, dataLength, utf16Length); +} + bool equalUTF16WithUTF8(const UChar* a, const UChar* aEnd, const char* b, const char* bEnd) { while (b < bEnd) { |