6 files changed, 138 insertions, 25 deletions
diff --git a/JavaScriptCore/wtf/text/AtomicString.cpp b/JavaScriptCore/wtf/text/AtomicString.cpp
index c49a837..acbcd34 100644
--- a/JavaScriptCore/wtf/text/AtomicString.cpp
+++ b/JavaScriptCore/wtf/text/AtomicString.cpp
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
+ * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Library General Public
@@ -26,9 +27,12 @@
 #include <wtf/HashSet.h>
 #include <wtf/Threading.h>
 #include <wtf/WTFThreadData.h>
+#include <wtf/unicode/UTF8.h>
 
 namespace WTF {
 
+using namespace Unicode;
+
 COMPILE_ASSERT(sizeof(AtomicString) == sizeof(String), atomic_string_and_string_must_be_same_size);
 
 class AtomicStringTable {
@@ -85,7 +89,7 @@ struct CStringTranslator {
             if (d[i] != c)
                 return false;
         }
-        return s[length] == 0;
+        return !s[length];
     }
 
     static void translate(StringImpl*& location, const char* const& c, unsigned hash)
@@ -206,12 +210,44 @@ struct HashAndCharactersTranslator {
     }
 };
 
+struct HashAndUTF8Characters {
+    unsigned hash;
+    const char* characters;
+    unsigned length;
+    unsigned utf16Length;
+};
+
+struct HashAndUTF8CharactersTranslator {
+    static unsigned hash(const HashAndUTF8Characters& buffer)
+    {
+        return buffer.hash;
+    }
+
+    static bool equal(StringImpl* const& string, const HashAndUTF8Characters& buffer)
+    {
+        return equalUTF16WithUTF8(string->characters(), string->characters() + string->length(), buffer.characters, buffer.characters + buffer.length);
+    }
+
+    static void translate(StringImpl*& location, const HashAndUTF8Characters& buffer, unsigned hash)
+    {
+        UChar* target;
+        location = StringImpl::createUninitialized(buffer.utf16Length, target).releaseRef();
+
+        const char* source = buffer.characters;
+        if (convertUTF8ToUTF16(&source, source + buffer.length, &target, target + buffer.utf16Length) != conversionOK)
+            ASSERT_NOT_REACHED();
+
+        location->setHash(hash);
+        location->setIsAtomic(true);
+    }
+};
+
 PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length)
 {
     if (!s)
         return 0;
 
-    if (length == 0)
+    if (!length)
         return StringImpl::empty();
     
     UCharBuffer buf = { s, length }; 
@@ -227,7 +263,7 @@ PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length, unsign
     ASSERT(s);
     ASSERT(existingHash);
 
-    if (length == 0)
+    if (!length)
         return StringImpl::empty();
     
     HashAndCharacters buffer = { existingHash, s, length }; 
@@ -246,7 +282,7 @@ PassRefPtr<StringImpl> AtomicString::add(const UChar* s)
     while (s[length] != UChar(0))
         length++;
 
-    if (length == 0)
+    if (!length)
         return StringImpl::empty();
 
     UCharBuffer buf = {s, length}; 
@@ -262,7 +298,7 @@ PassRefPtr<StringImpl> AtomicString::addSlowCase(StringImpl* r)
     if (!r || r->isAtomic())
         return r;
 
-    if (r->length() == 0)
+    if (!r->length())
         return StringImpl::empty();
 
     StringImpl* result = *stringTable().add(r).first;
@@ -276,7 +312,7 @@ AtomicStringImpl* AtomicString::find(const UChar* s, unsigned length, unsigned e
     ASSERT(s);
     ASSERT(existingHash);
 
-    if (length == 0)
+    if (!length)
         return static_cast<AtomicStringImpl*>(StringImpl::empty());
 
     HashAndCharacters buffer = { existingHash, s, length }; 
@@ -290,7 +326,7 @@ void AtomicString::remove(StringImpl* r)
 {
     stringTable().remove(r);
 }
-    
+
 AtomicString AtomicString::lower() const
 {
     // Note: This is a hot function in the Dromaeo benchmark.
@@ -303,4 +339,36 @@ AtomicString AtomicString::lower() const
     return AtomicString(newImpl);
 }
 
+AtomicString AtomicString::fromUTF8(const char* characters, size_t length)
+{
+    if (!characters)
+        return AtomicString();
+
+    if (!length)
+        return emptyAtom;
+
+    HashAndUTF8Characters buffer;
+    buffer.characters = characters;
+    buffer.length = length;
+    buffer.hash = calculateStringHashFromUTF8(characters, characters + length, buffer.utf16Length);
+
+    if (!buffer.hash)
+        return AtomicString();
+
+    pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable().add<HashAndUTF8Characters, HashAndUTF8CharactersTranslator>(buffer);
+
+    // If the string is newly-translated, then we need to adopt it.
+    // The boolean in the pair tells us if that is so.
+    AtomicString atomicString;
+    atomicString.m_string = addResult.second ? adoptRef(*addResult.first) : *addResult.first;
+    return atomicString;
 }
+
+AtomicString AtomicString::fromUTF8(const char* characters)
+{
+    if (!characters)
+        return AtomicString();
+    return fromUTF8(characters, strlen(characters));
+}
+
+} // namespace WTF
diff --git a/JavaScriptCore/wtf/text/AtomicString.h b/JavaScriptCore/wtf/text/AtomicString.h
index 06e63f4..ab5b366 100644
--- a/JavaScriptCore/wtf/text/AtomicString.h
+++ b/JavaScriptCore/wtf/text/AtomicString.h
@@ -108,6 +108,11 @@ public:
     operator QString() const { return m_string; }
 #endif
 
+    // AtomicString::fromUTF8 will return a null string if
+    // the input data contains invalid UTF-8 sequences.
+    static AtomicString fromUTF8(const char*, size_t);
+    static AtomicString fromUTF8(const char*);
+
 private:
     String m_string;
     
diff --git a/JavaScriptCore/wtf/text/CString.cpp b/JavaScriptCore/wtf/text/CString.cpp
index db6443f..981d77a 100644
--- a/JavaScriptCore/wtf/text/CString.cpp
+++ b/JavaScriptCore/wtf/text/CString.cpp
@@ -49,8 +49,11 @@ void CString::init(const char* str, size_t length)
     if (!str)
         return;
 
-    if (length >= numeric_limits<size_t>::max())
-        CRASH();
+    // We need to be sure we can add 1 to length without overflowing.
+    // Since the passed-in length is the length of an actual existing
+    // string, and we know the string doesn't occupy the entire address
+    // space, we can assert here and there's no need for a runtime check.
+    ASSERT(length < numeric_limits<size_t>::max());
 
     m_buffer = CStringBuffer::create(length + 1);
     memcpy(m_buffer->mutableData(), str, length); 
diff --git a/JavaScriptCore/wtf/text/StringConcatenate.h b/JavaScriptCore/wtf/text/StringConcatenate.h
index b54a108..92a2d06 100644
--- a/JavaScriptCore/wtf/text/StringConcatenate.h
+++ b/JavaScriptCore/wtf/text/StringConcatenate.h
@@ -68,7 +68,7 @@ template<>
 class StringTypeAdapter<char*> {
 public:
     StringTypeAdapter<char*>(char* buffer)
-        : m_buffer((unsigned char*)buffer)
+        : m_buffer(buffer)
         , m_length(strlen(buffer))
     {
     }
@@ -77,12 +77,14 @@ public:
 
     void writeTo(UChar* destination)
     {
-        for (unsigned i = 0; i < m_length; ++i)
-            destination[i] = m_buffer[i];
+        for (unsigned i = 0; i < m_length; ++i) {
+            unsigned char c = m_buffer[i];
+            destination[i] = c;
+        }
     }
 
 private:
-    const unsigned char* m_buffer;
+    const char* m_buffer;
     unsigned m_length;
 };
 
@@ -90,7 +92,7 @@ template<>
 class StringTypeAdapter<const char*> {
 public:
     StringTypeAdapter<const char*>(const char* buffer)
-        : m_buffer((unsigned char*)buffer)
+        : m_buffer(buffer)
         , m_length(strlen(buffer))
     {
     }
@@ -99,35 +101,59 @@ public:
 
     void writeTo(UChar* destination)
     {
-        for (unsigned i = 0; i < m_length; ++i)
-            destination[i] = m_buffer[i];
+        for (unsigned i = 0; i < m_length; ++i) {
+            unsigned char c = m_buffer[i];
+            destination[i] = c;
+        }
     }
 
 private:
-    const unsigned char* m_buffer;
+    const char* m_buffer;
     unsigned m_length;
 };
 
 template<>
+class StringTypeAdapter<Vector<char> > {
+public:
+    StringTypeAdapter<Vector<char> >(const Vector<char>& buffer)
+        : m_buffer(buffer)
+    {
+    }
+
+    size_t length() { return m_buffer.size(); }
+
+    void writeTo(UChar* destination)
+    {
+        for (size_t i = 0; i < m_buffer.size(); ++i) {
+            unsigned char c = m_buffer[i];
+            destination[i] = c;
+        }
+    }
+
+private:
+    const Vector<char>& m_buffer;
+};
+
+template<>
 class StringTypeAdapter<String> {
 public:
-    StringTypeAdapter<String>(String& string)
-        : m_data(string.characters())
-        , m_length(string.length())
+    StringTypeAdapter<String>(const String& string)
+        : m_buffer(string)
     {
     }
 
-    unsigned length() { return m_length; }
+    unsigned length() { return m_buffer.length(); }
 
     void writeTo(UChar* destination)
     {
-        for (unsigned i = 0; i < m_length; ++i)
-            destination[i] = m_data[i];
+        const UChar* data = m_buffer.characters();
+        unsigned length = m_buffer.length();
+        for (unsigned i = 0; i < length; ++i)
+            destination[i] = data[i];
     }
 
 private:
-    const UChar* m_data;
-    unsigned m_length;
+    const String& m_buffer;
 };
 
 inline void sumWithOverflow(unsigned& total, unsigned addend, bool& overflow)
diff --git a/JavaScriptCore/wtf/text/StringImpl.h b/JavaScriptCore/wtf/text/StringImpl.h
index 99d0e9d..dc1dbb2 100644
--- a/JavaScriptCore/wtf/text/StringImpl.h
+++ b/JavaScriptCore/wtf/text/StringImpl.h
@@ -53,6 +53,7 @@ namespace WTF {
 
 struct CStringTranslator;
 struct HashAndCharactersTranslator;
+struct HashAndUTF8CharactersTranslator;
 struct UCharBufferTranslator;
 
 enum TextCaseSensitivity { TextCaseSensitive, TextCaseInsensitive };
@@ -66,6 +67,7 @@ class StringImpl : public StringImplBase {
     friend struct JSC::IdentifierUCharBufferTranslator;
     friend struct WTF::CStringTranslator;
     friend struct WTF::HashAndCharactersTranslator;
+    friend struct WTF::HashAndUTF8CharactersTranslator;
     friend struct WTF::UCharBufferTranslator;
     friend class AtomicStringImpl;
 private:
diff --git a/JavaScriptCore/wtf/text/WTFString.h b/JavaScriptCore/wtf/text/WTFString.h
index e9d6ae4..4d853d2 100644
--- a/JavaScriptCore/wtf/text/WTFString.h
+++ b/JavaScriptCore/wtf/text/WTFString.h
@@ -276,6 +276,13 @@ public:
     // to ever prefer copy() over plain old assignment.
     String threadsafeCopy() const;
 
+    // Prevent Strings from being implicitly convertable to bool as it will be ambiguous on any platform that
+    // allows implicit conversion to another pointer type (e.g., Mac allows implicit conversion to NSString*).
+    typedef struct ImplicitConversionFromWTFStringToBoolDisallowedA* (String::*UnspecifiedBoolTypeA);
+    typedef struct ImplicitConversionFromWTFStringToBoolDisallowedB* (String::*UnspecifiedBoolTypeB);
+    operator UnspecifiedBoolTypeA() const;
+    operator UnspecifiedBoolTypeB() const;
+
 #if PLATFORM(CF)
     String(CFStringRef);
     CFStringRef createCFString() const;
@@ -309,6 +316,8 @@ public:
     String(const AECHAR*);
 #endif
 
+    // String::fromUTF8 will return a null string if
+    // the input data contains invalid UTF-8 sequences.
     static String fromUTF8(const char*, size_t);
     static String fromUTF8(const char*);