summaryrefslogtreecommitdiffstats
path: root/Source/JavaScriptCore/wtf/text/StringImpl.h
diff options
context:
space:
mode:
Diffstat (limited to 'Source/JavaScriptCore/wtf/text/StringImpl.h')
-rw-r--r--Source/JavaScriptCore/wtf/text/StringImpl.h408
1 files changed, 408 insertions, 0 deletions
diff --git a/Source/JavaScriptCore/wtf/text/StringImpl.h b/Source/JavaScriptCore/wtf/text/StringImpl.h
new file mode 100644
index 0000000..dc1dbb2
--- /dev/null
+++ b/Source/JavaScriptCore/wtf/text/StringImpl.h
@@ -0,0 +1,408 @@
+/*
+ * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
+ * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 Apple Inc. All rights reserved.
+ * Copyright (C) 2009 Google Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef StringImpl_h
+#define StringImpl_h
+
+#include <limits.h>
+#include <wtf/ASCIICType.h>
+#include <wtf/CrossThreadRefCounted.h>
+#include <wtf/Forward.h>
+#include <wtf/OwnFastMallocPtr.h>
+#include <wtf/StdLibExtras.h>
+#include <wtf/StringHasher.h>
+#include <wtf/Vector.h>
+#include <wtf/text/StringImplBase.h>
+#include <wtf/unicode/Unicode.h>
+
+#if PLATFORM(CF)
+typedef const struct __CFString * CFStringRef;
+#endif
+
+#ifdef __OBJC__
+@class NSString;
+#endif
+
+// FIXME: This is a temporary layering violation while we move string code to WTF.
+// Landing the file moves in one patch, will follow on with patches to change the namespaces.
+namespace JSC {
+struct IdentifierCStringTranslator;
+struct IdentifierUCharBufferTranslator;
+}
+
+namespace WTF {
+
+struct CStringTranslator;
+struct HashAndCharactersTranslator;
+struct HashAndUTF8CharactersTranslator;
+struct UCharBufferTranslator;
+
+enum TextCaseSensitivity { TextCaseSensitive, TextCaseInsensitive };
+
+typedef OwnFastMallocPtr<const UChar> SharableUChar;
+typedef CrossThreadRefCounted<SharableUChar> SharedUChar;
+typedef bool (*CharacterMatchFunctionPtr)(UChar);
+
+class StringImpl : public StringImplBase {
+ friend struct JSC::IdentifierCStringTranslator;
+ friend struct JSC::IdentifierUCharBufferTranslator;
+ friend struct WTF::CStringTranslator;
+ friend struct WTF::HashAndCharactersTranslator;
+ friend struct WTF::HashAndUTF8CharactersTranslator;
+ friend struct WTF::UCharBufferTranslator;
+ friend class AtomicStringImpl;
+private:
+ // Used to construct static strings, which have an special refCount that can never hit zero.
+ // This means that the static string will never be destroyed, which is important because
+ // static strings will be shared across threads & ref-counted in a non-threadsafe manner.
+ StringImpl(const UChar* characters, unsigned length, StaticStringConstructType)
+ : StringImplBase(length, ConstructStaticString)
+ , m_data(characters)
+ , m_buffer(0)
+ , m_hash(0)
+ {
+ // Ensure that the hash is computed so that AtomicStringHash can call existingHash()
+ // with impunity. The empty string is special because it is never entered into
+ // AtomicString's HashKey, but still needs to compare correctly.
+ hash();
+ }
+
+ // Create a normal string with internal storage (BufferInternal)
+ StringImpl(unsigned length)
+ : StringImplBase(length, BufferInternal)
+ , m_data(reinterpret_cast<const UChar*>(this + 1))
+ , m_buffer(0)
+ , m_hash(0)
+ {
+ ASSERT(m_data);
+ ASSERT(m_length);
+ }
+
+ // Create a StringImpl adopting ownership of the provided buffer (BufferOwned)
+ StringImpl(const UChar* characters, unsigned length)
+ : StringImplBase(length, BufferOwned)
+ , m_data(characters)
+ , m_buffer(0)
+ , m_hash(0)
+ {
+ ASSERT(m_data);
+ ASSERT(m_length);
+ }
+
+ // Used to create new strings that are a substring of an existing StringImpl (BufferSubstring)
+ StringImpl(const UChar* characters, unsigned length, PassRefPtr<StringImpl> base)
+ : StringImplBase(length, BufferSubstring)
+ , m_data(characters)
+ , m_substringBuffer(base.leakRef())
+ , m_hash(0)
+ {
+ ASSERT(m_data);
+ ASSERT(m_length);
+ ASSERT(m_substringBuffer->bufferOwnership() != BufferSubstring);
+ }
+
+ // Used to construct new strings sharing an existing SharedUChar (BufferShared)
+ StringImpl(const UChar* characters, unsigned length, PassRefPtr<SharedUChar> sharedBuffer)
+ : StringImplBase(length, BufferShared)
+ , m_data(characters)
+ , m_sharedBuffer(sharedBuffer.leakRef())
+ , m_hash(0)
+ {
+ ASSERT(m_data);
+ ASSERT(m_length);
+ }
+
+ // For use only by AtomicString's XXXTranslator helpers.
+ void setHash(unsigned hash)
+ {
+ ASSERT(!isStatic());
+ ASSERT(!m_hash);
+ ASSERT(hash == computeHash(m_data, m_length));
+ m_hash = hash;
+ }
+
+public:
+ ~StringImpl();
+
+ static PassRefPtr<StringImpl> create(const UChar*, unsigned length);
+ static PassRefPtr<StringImpl> create(const char*, unsigned length);
+ static PassRefPtr<StringImpl> create(const char*);
+ static PassRefPtr<StringImpl> create(const UChar*, unsigned length, PassRefPtr<SharedUChar> sharedBuffer);
+ static ALWAYS_INLINE PassRefPtr<StringImpl> create(PassRefPtr<StringImpl> rep, unsigned offset, unsigned length)
+ {
+ ASSERT(rep);
+ ASSERT(length <= rep->length());
+
+ if (!length)
+ return empty();
+
+ StringImpl* ownerRep = (rep->bufferOwnership() == BufferSubstring) ? rep->m_substringBuffer : rep.get();
+ return adoptRef(new StringImpl(rep->m_data + offset, length, ownerRep));
+ }
+
+ static PassRefPtr<StringImpl> createUninitialized(unsigned length, UChar*& data);
+ static ALWAYS_INLINE PassRefPtr<StringImpl> tryCreateUninitialized(unsigned length, UChar*& output)
+ {
+ if (!length) {
+ output = 0;
+ return empty();
+ }
+
+ if (length > ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(UChar))) {
+ output = 0;
+ return 0;
+ }
+ StringImpl* resultImpl;
+ if (!tryFastMalloc(sizeof(UChar) * length + sizeof(StringImpl)).getValue(resultImpl)) {
+ output = 0;
+ return 0;
+ }
+ output = reinterpret_cast<UChar*>(resultImpl + 1);
+ return adoptRef(new(resultImpl) StringImpl(length));
+ }
+
+ static unsigned dataOffset() { return OBJECT_OFFSETOF(StringImpl, m_data); }
+ static PassRefPtr<StringImpl> createWithTerminatingNullCharacter(const StringImpl&);
+ static PassRefPtr<StringImpl> createStrippingNullCharacters(const UChar*, unsigned length);
+
+ template<size_t inlineCapacity>
+ static PassRefPtr<StringImpl> adopt(Vector<UChar, inlineCapacity>& vector)
+ {
+ if (size_t size = vector.size()) {
+ ASSERT(vector.data());
+ if (size > std::numeric_limits<unsigned>::max())
+ CRASH();
+ return adoptRef(new StringImpl(vector.releaseBuffer(), size));
+ }
+ return empty();
+ }
+ static PassRefPtr<StringImpl> adopt(StringBuffer&);
+
+ SharedUChar* sharedBuffer();
+ const UChar* characters() const { return m_data; }
+
+ size_t cost()
+ {
+ // For substrings, return the cost of the base string.
+ if (bufferOwnership() == BufferSubstring)
+ return m_substringBuffer->cost();
+
+ if (m_refCountAndFlags & s_refCountFlagShouldReportedCost) {
+ m_refCountAndFlags &= ~s_refCountFlagShouldReportedCost;
+ return m_length;
+ }
+ return 0;
+ }
+
+ bool isIdentifier() const { return m_refCountAndFlags & s_refCountFlagIsIdentifier; }
+ void setIsIdentifier(bool isIdentifier)
+ {
+ ASSERT(!isStatic());
+ if (isIdentifier)
+ m_refCountAndFlags |= s_refCountFlagIsIdentifier;
+ else
+ m_refCountAndFlags &= ~s_refCountFlagIsIdentifier;
+ }
+
+ bool hasTerminatingNullCharacter() const { return m_refCountAndFlags & s_refCountFlagHasTerminatingNullCharacter; }
+
+ bool isAtomic() const { return m_refCountAndFlags & s_refCountFlagIsAtomic; }
+ void setIsAtomic(bool isIdentifier)
+ {
+ ASSERT(!isStatic());
+ if (isIdentifier)
+ m_refCountAndFlags |= s_refCountFlagIsAtomic;
+ else
+ m_refCountAndFlags &= ~s_refCountFlagIsAtomic;
+ }
+
+ unsigned hash() const { if (!m_hash) m_hash = computeHash(m_data, m_length); return m_hash; }
+ unsigned existingHash() const { ASSERT(m_hash); return m_hash; }
+ static unsigned computeHash(const UChar* data, unsigned length) { return WTF::StringHasher::createHash<UChar>(data, length); }
+ static unsigned computeHash(const char* data, unsigned length) { return WTF::StringHasher::createHash<char>(data, length); }
+ static unsigned computeHash(const char* data) { return WTF::StringHasher::createHash<char>(data); }
+
+ ALWAYS_INLINE void deref() { m_refCountAndFlags -= s_refCountIncrement; if (!(m_refCountAndFlags & (s_refCountMask | s_refCountFlagStatic))) delete this; }
+ ALWAYS_INLINE bool hasOneRef() const { return (m_refCountAndFlags & (s_refCountMask | s_refCountFlagStatic)) == s_refCountIncrement; }
+
+ static StringImpl* empty();
+
+ static void copyChars(UChar* destination, const UChar* source, unsigned numCharacters)
+ {
+ if (numCharacters <= s_copyCharsInlineCutOff) {
+ for (unsigned i = 0; i < numCharacters; ++i)
+ destination[i] = source[i];
+ } else
+ memcpy(destination, source, numCharacters * sizeof(UChar));
+ }
+
+ // Returns a StringImpl suitable for use on another thread.
+ PassRefPtr<StringImpl> crossThreadString();
+ // Makes a deep copy. Helpful only if you need to use a String on another thread
+ // (use crossThreadString if the method call doesn't need to be threadsafe).
+ // Since StringImpl objects are immutable, there's no other reason to make a copy.
+ PassRefPtr<StringImpl> threadsafeCopy() const;
+
+ PassRefPtr<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX);
+
+ UChar operator[](unsigned i) { ASSERT(i < m_length); return m_data[i]; }
+ UChar32 characterStartingAt(unsigned);
+
+ bool containsOnlyWhitespace();
+
+ int toIntStrict(bool* ok = 0, int base = 10);
+ unsigned toUIntStrict(bool* ok = 0, int base = 10);
+ int64_t toInt64Strict(bool* ok = 0, int base = 10);
+ uint64_t toUInt64Strict(bool* ok = 0, int base = 10);
+ intptr_t toIntPtrStrict(bool* ok = 0, int base = 10);
+
+ int toInt(bool* ok = 0); // ignores trailing garbage
+ unsigned toUInt(bool* ok = 0); // ignores trailing garbage
+ int64_t toInt64(bool* ok = 0); // ignores trailing garbage
+ uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage
+ intptr_t toIntPtr(bool* ok = 0); // ignores trailing garbage
+
+ double toDouble(bool* ok = 0);
+ float toFloat(bool* ok = 0);
+
+ PassRefPtr<StringImpl> lower();
+ PassRefPtr<StringImpl> upper();
+
+ enum LastCharacterBehavior { ObscureLastCharacter, DisplayLastCharacter };
+
+ PassRefPtr<StringImpl> secure(UChar, LastCharacterBehavior = ObscureLastCharacter);
+ PassRefPtr<StringImpl> foldCase();
+
+ PassRefPtr<StringImpl> stripWhiteSpace();
+ PassRefPtr<StringImpl> simplifyWhiteSpace();
+
+ PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr);
+
+ size_t find(UChar, unsigned index = 0);
+ size_t find(CharacterMatchFunctionPtr, unsigned index = 0);
+ size_t find(const char*, unsigned index = 0);
+ size_t find(StringImpl*, unsigned index = 0);
+ size_t findIgnoringCase(const char*, unsigned index = 0);
+ size_t findIgnoringCase(StringImpl*, unsigned index = 0);
+
+ size_t reverseFind(UChar, unsigned index = UINT_MAX);
+ size_t reverseFind(StringImpl*, unsigned index = UINT_MAX);
+ size_t reverseFindIgnoringCase(StringImpl*, unsigned index = UINT_MAX);
+
+ bool startsWith(StringImpl* str, bool caseSensitive = true) { return (caseSensitive ? reverseFind(str, 0) : reverseFindIgnoringCase(str, 0)) == 0; }
+ bool endsWith(StringImpl*, bool caseSensitive = true);
+
+ PassRefPtr<StringImpl> replace(UChar, UChar);
+ PassRefPtr<StringImpl> replace(UChar, StringImpl*);
+ PassRefPtr<StringImpl> replace(StringImpl*, StringImpl*);
+ PassRefPtr<StringImpl> replace(unsigned index, unsigned len, StringImpl*);
+
+ WTF::Unicode::Direction defaultWritingDirection();
+
+#if PLATFORM(CF)
+ CFStringRef createCFString();
+#endif
+#ifdef __OBJC__
+ operator NSString*();
+#endif
+
+private:
+ // This number must be at least 2 to avoid sharing empty, null as well as 1 character strings from SmallStrings.
+ static const unsigned s_copyCharsInlineCutOff = 20;
+
+ static PassRefPtr<StringImpl> createStrippingNullCharactersSlowCase(const UChar*, unsigned length);
+
+ BufferOwnership bufferOwnership() const { return static_cast<BufferOwnership>(m_refCountAndFlags & s_refCountMaskBufferOwnership); }
+ bool isStatic() const { return m_refCountAndFlags & s_refCountFlagStatic; }
+ const UChar* m_data;
+ union {
+ void* m_buffer;
+ StringImpl* m_substringBuffer;
+ SharedUChar* m_sharedBuffer;
+ };
+ mutable unsigned m_hash;
+};
+
+bool equal(const StringImpl*, const StringImpl*);
+bool equal(const StringImpl*, const char*);
+inline bool equal(const char* a, StringImpl* b) { return equal(b, a); }
+
+bool equalIgnoringCase(StringImpl*, StringImpl*);
+bool equalIgnoringCase(StringImpl*, const char*);
+inline bool equalIgnoringCase(const char* a, StringImpl* b) { return equalIgnoringCase(b, a); }
+bool equalIgnoringCase(const UChar* a, const char* b, unsigned length);
+inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, a, length); }
+
+bool equalIgnoringNullity(StringImpl*, StringImpl*);
+
+int codePointCompare(const StringImpl*, const StringImpl*);
+
+static inline bool isSpaceOrNewline(UChar c)
+{
+ // Use isASCIISpace() for basic Latin-1.
+ // This will include newlines, which aren't included in Unicode DirWS.
+ return c <= 0x7F ? WTF::isASCIISpace(c) : WTF::Unicode::direction(c) == WTF::Unicode::WhiteSpaceNeutral;
+}
+
+// This is a hot function because it's used when parsing HTML.
+inline PassRefPtr<StringImpl> StringImpl::createStrippingNullCharacters(const UChar* characters, unsigned length)
+{
+ ASSERT(characters);
+ ASSERT(length);
+
+ // Optimize for the case where there are no Null characters by quickly
+ // searching for nulls, and then using StringImpl::create, which will
+ // memcpy the whole buffer. This is faster than assigning character by
+ // character during the loop.
+
+ // Fast case.
+ int foundNull = 0;
+ for (unsigned i = 0; !foundNull && i < length; i++) {
+ int c = characters[i]; // more efficient than using UChar here (at least on Intel Mac OS)
+ foundNull |= !c;
+ }
+ if (!foundNull)
+ return StringImpl::create(characters, length);
+
+ return StringImpl::createStrippingNullCharactersSlowCase(characters, length);
+}
+
+struct StringHash;
+
+// StringHash is the default hash for StringImpl* and RefPtr<StringImpl>
+template<typename T> struct DefaultHash;
+template<> struct DefaultHash<StringImpl*> {
+ typedef StringHash Hash;
+};
+template<> struct DefaultHash<RefPtr<StringImpl> > {
+ typedef StringHash Hash;
+};
+
+}
+
+using WTF::StringImpl;
+using WTF::equal;
+using WTF::TextCaseSensitivity;
+using WTF::TextCaseSensitive;
+using WTF::TextCaseInsensitive;
+
+#endif