summaryrefslogtreecommitdiffstats
path: root/Source/JavaScriptCore/wtf/unicode/UTF8.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'Source/JavaScriptCore/wtf/unicode/UTF8.cpp')
-rw-r--r--Source/JavaScriptCore/wtf/unicode/UTF8.cpp33
1 files changed, 25 insertions, 8 deletions
diff --git a/Source/JavaScriptCore/wtf/unicode/UTF8.cpp b/Source/JavaScriptCore/wtf/unicode/UTF8.cpp
index dc24ed5..4c3738b 100644
--- a/Source/JavaScriptCore/wtf/unicode/UTF8.cpp
+++ b/Source/JavaScriptCore/wtf/unicode/UTF8.cpp
@@ -26,16 +26,14 @@
#include "config.h"
#include "UTF8.h"
-#include <wtf/StringHasher.h>
#include "ASCIICType.h"
+#include <wtf/StringHasher.h>
+#include <wtf/unicode/CharacterNames.h>
namespace WTF {
namespace Unicode {
-// FIXME: Use definition from CharacterNames.h.
-static const UChar replacementCharacter = 0xFFFD;
-
inline int inlineUTF8SequenceLengthNonASCII(char b0)
{
if ((b0 & 0xC0) != 0xC0)
@@ -316,25 +314,33 @@ ConversionResult convertUTF8ToUTF16(
return result;
}
-unsigned calculateStringHashFromUTF8(const char* data, const char* dataEnd, unsigned& utf16Length)
+static inline unsigned calculateStringHashAndLengthFromUTF8Internal(const char* data, const char* dataEnd, unsigned& dataLength, unsigned& utf16Length)
{
if (!data)
return 0;
WTF::StringHasher stringHasher;
+ dataLength = 0;
utf16Length = 0;
- while (data < dataEnd) {
+ while (data < dataEnd || (!dataEnd && *data)) {
if (isASCII(*data)) {
stringHasher.addCharacter(*data++);
+ dataLength++;
utf16Length++;
continue;
}
int utf8SequenceLength = inlineUTF8SequenceLengthNonASCII(*data);
+ dataLength += utf8SequenceLength;
- if (dataEnd - data < utf8SequenceLength)
- return false;
+ if (!dataEnd) {
+ for (int i = 1; i < utf8SequenceLength; ++i) {
+ if (!data[i])
+ return 0;
+ }
+ } else if (dataEnd - data < utf8SequenceLength)
+ return 0;
if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(data), utf8SequenceLength))
return 0;
@@ -359,6 +365,17 @@ unsigned calculateStringHashFromUTF8(const char* data, const char* dataEnd, unsi
return stringHasher.hash();
}
+unsigned calculateStringHashFromUTF8(const char* data, const char* dataEnd, unsigned& utf16Length)
+{
+ unsigned dataLength;
+ return calculateStringHashAndLengthFromUTF8Internal(data, dataEnd, dataLength, utf16Length);
+}
+
+unsigned calculateStringHashAndLengthFromUTF8(const char* data, unsigned& dataLength, unsigned& utf16Length)
+{
+ return calculateStringHashAndLengthFromUTF8Internal(data, 0, dataLength, utf16Length);
+}
+
bool equalUTF16WithUTF8(const UChar* a, const UChar* aEnd, const char* b, const char* bEnd)
{
while (b < bEnd) {