/* * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. * Copyright (c) 2009, Google Inc. All rights reserved. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public License * along with this library; see the file COPYING.LIB. If not, write to * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, * Boston, MA 02110-1301, USA. * */ #ifndef UString_h #define UString_h #include "Collector.h" #include #include #include #include #include #include #include #include namespace JSC { using WTF::PlacementNewAdoptType; using WTF::PlacementNewAdopt; class IdentifierTable; class CString { public: CString() : m_length(0) , m_data(0) { } CString(const char*); CString(const char*, size_t); CString(const CString&); ~CString(); static CString adopt(char*, size_t); // buffer should be allocated with new[]. CString& append(const CString&); CString& operator=(const char* c); CString& operator=(const CString&); CString& operator+=(const CString& c) { return append(c); } size_t size() const { return m_length; } const char* c_str() const { return m_data; } private: size_t m_length; char* m_data; }; typedef Vector CStringBuffer; class UString { friend class JIT; public: struct BaseString; struct Rep : Noncopyable { friend class JIT; static PassRefPtr create(UChar*, int); static PassRefPtr createCopying(const UChar*, int); static PassRefPtr create(PassRefPtr base, int offset, int length); // Constructs a string from a UTF-8 string, using strict conversion (see comments in UTF8.h). // Returns UString::Rep::null for null input or conversion failure. static PassRefPtr createFromUTF8(const char*); void destroy(); bool baseIsSelf() const { return m_identifierTableAndFlags.isFlagSet(BaseStringFlag); } UChar* data() const; int size() const { return len; } unsigned hash() const { if (_hash == 0) _hash = computeHash(data(), len); return _hash; } unsigned computedHash() const { ASSERT(_hash); return _hash; } // fast path for Identifiers static unsigned computeHash(const UChar*, int length); static unsigned computeHash(const char*, int length); static unsigned computeHash(const char* s) { return computeHash(s, strlen(s)); } IdentifierTable* identifierTable() const { return m_identifierTableAndFlags.get(); } void setIdentifierTable(IdentifierTable* table) { ASSERT(!isStatic()); m_identifierTableAndFlags.set(table); } bool isStatic() const { return m_identifierTableAndFlags.isFlagSet(StaticFlag); } void setStatic(bool); void setBaseString(PassRefPtr); BaseString* baseString(); const BaseString* baseString() const; Rep* ref() { ++rc; return this; } ALWAYS_INLINE void deref() { if (--rc == 0) destroy(); } void checkConsistency() const; enum UStringFlags { StaticFlag, BaseStringFlag }; // unshared data int offset; int len; int rc; // For null and empty static strings, this field does not reflect a correct count, because ref/deref are not thread-safe. A special case in destroy() guarantees that these do not get deleted. mutable unsigned _hash; PtrAndFlags m_identifierTableAndFlags; void* m_baseString; // If "this" is a BaseString instance, it is 0. BaseString* otherwise. static BaseString& null() { return *nullBaseString; } static BaseString& empty() { return *emptyBaseString; } private: friend void initializeUString(); static BaseString* nullBaseString; static BaseString* emptyBaseString; }; struct BaseString : public Rep { BaseString() { m_identifierTableAndFlags.setFlag(BaseStringFlag); } // potentially shared data. UChar* buf; int preCapacity; int usedPreCapacity; int capacity; int usedCapacity; size_t reportedCost; }; public: UString(); UString(const char*); UString(const UChar*, int length); UString(UChar*, int length, bool copy); UString(const UString& s) : m_rep(s.m_rep) { } UString(const Vector& buffer); ~UString() { } // Special constructor for cases where we overwrite an object in place. UString(PlacementNewAdoptType) : m_rep(PlacementNewAdopt) { } static UString from(int); static UString from(unsigned int); static UString from(long); static UString from(double); struct Range { public: Range(int pos, int len) : position(pos) , length(len) { } Range() { } int position; int length; }; UString spliceSubstringsWithSeparators(const Range* substringRanges, int rangeCount, const UString* separators, int separatorCount) const; UString& append(const UString&); UString& append(const char*); UString& append(UChar); UString& append(char c) { return append(static_cast(static_cast(c))); } UString& append(const UChar*, int size); bool getCString(CStringBuffer&) const; // NOTE: This method should only be used for *debugging* purposes as it // is neither Unicode safe nor free from side effects nor thread-safe. char* ascii() const; /** * Convert the string to UTF-8, assuming it is UTF-16 encoded. * In non-strict mode, this function is tolerant of badly formed UTF-16, it * can create UTF-8 strings that are invalid because they have characters in * the range U+D800-U+DDFF, U+FFFE, or U+FFFF, but the UTF-8 string is * guaranteed to be otherwise valid. * In strict mode, error is returned as null CString. */ CString UTF8String(bool strict = false) const; UString& operator=(const char*c); UString& operator+=(const UString& s) { return append(s); } UString& operator+=(const char* s) { return append(s); } const UChar* data() const { return m_rep->data(); } bool isNull() const { return (m_rep == &Rep::null()); } bool isEmpty() const { return (!m_rep->len); } bool is8Bit() const; int size() const { return m_rep->size(); } UChar operator[](int pos) const; double toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const; double toDouble(bool tolerateTrailingJunk) const; double toDouble() const; uint32_t toUInt32(bool* ok = 0) const; uint32_t toUInt32(bool* ok, bool tolerateEmptyString) const; uint32_t toStrictUInt32(bool* ok = 0) const; unsigned toArrayIndex(bool* ok = 0) const; int find(const UString& f, int pos = 0) const; int find(UChar, int pos = 0) const; int rfind(const UString& f, int pos) const; int rfind(UChar, int pos) const; UString substr(int pos = 0, int len = -1) const; static const UString& null() { return *nullUString; } Rep* rep() const { return m_rep.get(); } static Rep* nullRep(); UString(PassRefPtr r) : m_rep(r) { ASSERT(m_rep); } size_t cost() const; private: void expandCapacity(int requiredLength); void expandPreCapacity(int requiredPreCap); void makeNull(); RefPtr m_rep; static UString* nullUString; friend void initializeUString(); friend bool operator==(const UString&, const UString&); friend PassRefPtr concatenate(Rep*, Rep*); // returns 0 if out of memory }; PassRefPtr concatenate(UString::Rep*, UString::Rep*); PassRefPtr concatenate(UString::Rep*, int); PassRefPtr concatenate(UString::Rep*, double); bool operator==(const UString&, const UString&); inline bool operator!=(const UString& s1, const UString& s2) { return !JSC::operator==(s1, s2); } bool operator<(const UString& s1, const UString& s2); bool operator>(const UString& s1, const UString& s2); bool operator==(const UString& s1, const char* s2); inline bool operator!=(const UString& s1, const char* s2) { return !JSC::operator==(s1, s2); } inline bool operator==(const char *s1, const UString& s2) { return operator==(s2, s1); } inline bool operator!=(const char *s1, const UString& s2) { return !JSC::operator==(s1, s2); } bool operator==(const CString&, const CString&); inline UString operator+(const UString& s1, const UString& s2) { RefPtr result = concatenate(s1.rep(), s2.rep()); return UString(result ? result.release() : UString::nullRep()); } int compare(const UString&, const UString&); bool equal(const UString::Rep*, const UString::Rep*); inline UChar* UString::Rep::data() const { const BaseString* base = baseString(); return base->buf + base->preCapacity + offset; } inline void UString::Rep::setStatic(bool v) { ASSERT(!identifierTable()); if (v) m_identifierTableAndFlags.setFlag(StaticFlag); else m_identifierTableAndFlags.clearFlag(StaticFlag); } inline void UString::Rep::setBaseString(PassRefPtr base) { ASSERT(base != this); m_baseString = base.releaseRef(); } inline UString::BaseString* UString::Rep::baseString() { return reinterpret_cast(baseIsSelf() ? this : m_baseString); } inline const UString::BaseString* UString::Rep::baseString() const { return const_cast(const_cast(this)->baseString()); } #ifdef NDEBUG inline void UString::Rep::checkConsistency() const { } #endif inline UString::UString() : m_rep(&Rep::null()) { } // Rule from ECMA 15.2 about what an array index is. // Must exactly match string form of an unsigned integer, and be less than 2^32 - 1. inline unsigned UString::toArrayIndex(bool* ok) const { unsigned i = toStrictUInt32(ok); if (ok && i >= 0xFFFFFFFFU) *ok = false; return i; } // We'd rather not do shared substring append for small strings, since // this runs too much risk of a tiny initial string holding down a // huge buffer. // FIXME: this should be size_t but that would cause warnings until we // fix UString sizes to be size_t instead of int static const int minShareSize = Heap::minExtraCostSize / sizeof(UChar); inline size_t UString::cost() const { BaseString* base = m_rep->baseString(); size_t capacity = (base->capacity + base->preCapacity) * sizeof(UChar); size_t reportedCost = base->reportedCost; ASSERT(capacity >= reportedCost); size_t capacityDelta = capacity - reportedCost; if (capacityDelta < static_cast(minShareSize)) return 0; base->reportedCost = capacity; return capacityDelta; } struct IdentifierRepHash : PtrHash > { static unsigned hash(const RefPtr& key) { return key->computedHash(); } static unsigned hash(JSC::UString::Rep* key) { return key->computedHash(); } }; void initializeUString(); } // namespace JSC namespace WTF { template struct DefaultHash; template struct StrHash; template<> struct StrHash { static unsigned hash(const JSC::UString::Rep* key) { return key->hash(); } static bool equal(const JSC::UString::Rep* a, const JSC::UString::Rep* b) { return JSC::equal(a, b); } static const bool safeToCompareToEmptyOrDeleted = false; }; template<> struct StrHash > : public StrHash { using StrHash::hash; static unsigned hash(const RefPtr& key) { return key->hash(); } using StrHash::equal; static bool equal(const RefPtr& a, const RefPtr& b) { return JSC::equal(a.get(), b.get()); } static bool equal(const JSC::UString::Rep* a, const RefPtr& b) { return JSC::equal(a, b.get()); } static bool equal(const RefPtr& a, const JSC::UString::Rep* b) { return JSC::equal(a.get(), b); } static const bool safeToCompareToEmptyOrDeleted = false; }; template<> struct DefaultHash { typedef StrHash Hash; }; template<> struct DefaultHash > { typedef StrHash > Hash; }; } // namespace WTF #endif