diff options
Diffstat (limited to 'JavaScriptCore/wtf/text/WTFString.cpp')
-rw-r--r-- | JavaScriptCore/wtf/text/WTFString.cpp | 960 |
1 files changed, 960 insertions, 0 deletions
diff --git a/JavaScriptCore/wtf/text/WTFString.cpp b/JavaScriptCore/wtf/text/WTFString.cpp new file mode 100644 index 0000000..a683e3d --- /dev/null +++ b/JavaScriptCore/wtf/text/WTFString.cpp @@ -0,0 +1,960 @@ +/* + * (C) 1999 Lars Knoll (knoll@kde.org) + * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. + * Copyright (C) 2007-2009 Torch Mobile, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#include "config.h" +#include "WTFString.h" + +#include <limits> +#include <stdarg.h> +#include <wtf/ASCIICType.h> +#include <wtf/text/CString.h> +#include <wtf/StringExtras.h> +#include <wtf/Vector.h> +#include <wtf/dtoa.h> +#include <wtf/unicode/UTF8.h> +#include <wtf/unicode/Unicode.h> + +using namespace WTF; +using namespace WTF::Unicode; + +namespace WebCore { + +String::String(const UChar* str, unsigned len) +{ + if (!str) + return; + m_impl = StringImpl::create(str, len); +} + +String::String(const UChar* str) +{ + if (!str) + return; + + int len = 0; + while (str[len] != UChar(0)) + len++; + + m_impl = StringImpl::create(str, len); +} + +String::String(const char* str) +{ + if (!str) + return; + m_impl = StringImpl::create(str); +} + +String::String(const char* str, unsigned length) +{ + if (!str) + return; + m_impl = StringImpl::create(str, length); +} + +void String::append(const String& str) +{ + if (str.isEmpty()) + return; + + // FIXME: This is extremely inefficient. So much so that we might want to take this + // out of String's API. We can make it better by optimizing the case where exactly + // one String is pointing at this StringImpl, but even then it's going to require a + // call to fastMalloc every single time. + if (str.m_impl) { + if (m_impl) { + UChar* data; + RefPtr<StringImpl> newImpl = + StringImpl::createUninitialized(m_impl->length() + str.length(), data); + memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar)); + memcpy(data + m_impl->length(), str.characters(), str.length() * sizeof(UChar)); + m_impl = newImpl.release(); + } else + m_impl = str.m_impl; + } +} + +void String::append(char c) +{ + // FIXME: This is extremely inefficient. So much so that we might want to take this + // out of String's API. We can make it better by optimizing the case where exactly + // one String is pointing at this StringImpl, but even then it's going to require a + // call to fastMalloc every single time. + if (m_impl) { + UChar* data; + RefPtr<StringImpl> newImpl = + StringImpl::createUninitialized(m_impl->length() + 1, data); + memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar)); + data[m_impl->length()] = c; + m_impl = newImpl.release(); + } else + m_impl = StringImpl::create(&c, 1); +} + +void String::append(UChar c) +{ + // FIXME: This is extremely inefficient. So much so that we might want to take this + // out of String's API. We can make it better by optimizing the case where exactly + // one String is pointing at this StringImpl, but even then it's going to require a + // call to fastMalloc every single time. + if (m_impl) { + UChar* data; + RefPtr<StringImpl> newImpl = + StringImpl::createUninitialized(m_impl->length() + 1, data); + memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar)); + data[m_impl->length()] = c; + m_impl = newImpl.release(); + } else + m_impl = StringImpl::create(&c, 1); +} + +String operator+(const String& a, const String& b) +{ + if (a.isEmpty()) + return b; + if (b.isEmpty()) + return a; + String c = a; + c += b; + return c; +} + +String operator+(const String& s, const char* cs) +{ + return s + String(cs); +} + +String operator+(const char* cs, const String& s) +{ + return String(cs) + s; +} + +void String::insert(const String& str, unsigned pos) +{ + if (str.isEmpty()) { + if (str.isNull()) + return; + if (isNull()) + m_impl = str.impl(); + return; + } + insert(str.characters(), str.length(), pos); +} + +void String::append(const UChar* charactersToAppend, unsigned lengthToAppend) +{ + if (!m_impl) { + if (!charactersToAppend) + return; + m_impl = StringImpl::create(charactersToAppend, lengthToAppend); + return; + } + + if (!lengthToAppend) + return; + + ASSERT(charactersToAppend); + UChar* data; + RefPtr<StringImpl> newImpl = + StringImpl::createUninitialized(length() + lengthToAppend, data); + memcpy(data, characters(), length() * sizeof(UChar)); + memcpy(data + length(), charactersToAppend, lengthToAppend * sizeof(UChar)); + m_impl = newImpl.release(); +} + +void String::insert(const UChar* charactersToInsert, unsigned lengthToInsert, unsigned position) +{ + if (position >= length()) { + append(charactersToInsert, lengthToInsert); + return; + } + + ASSERT(m_impl); + + if (!lengthToInsert) + return; + + ASSERT(charactersToInsert); + UChar* data; + RefPtr<StringImpl> newImpl = + StringImpl::createUninitialized(length() + lengthToInsert, data); + memcpy(data, characters(), position * sizeof(UChar)); + memcpy(data + position, charactersToInsert, lengthToInsert * sizeof(UChar)); + memcpy(data + position + lengthToInsert, characters() + position, (length() - position) * sizeof(UChar)); + m_impl = newImpl.release(); +} + +UChar String::operator[](unsigned i) const +{ + if (!m_impl || i >= m_impl->length()) + return 0; + return m_impl->characters()[i]; +} + +UChar32 String::characterStartingAt(unsigned i) const +{ + if (!m_impl || i >= m_impl->length()) + return 0; + return m_impl->characterStartingAt(i); +} + +unsigned String::length() const +{ + if (!m_impl) + return 0; + return m_impl->length(); +} + +void String::truncate(unsigned position) +{ + if (position >= length()) + return; + UChar* data; + RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(position, data); + memcpy(data, characters(), position * sizeof(UChar)); + m_impl = newImpl.release(); +} + +void String::remove(unsigned position, int lengthToRemove) +{ + if (lengthToRemove <= 0) + return; + if (position >= length()) + return; + if (static_cast<unsigned>(lengthToRemove) > length() - position) + lengthToRemove = length() - position; + UChar* data; + RefPtr<StringImpl> newImpl = + StringImpl::createUninitialized(length() - lengthToRemove, data); + memcpy(data, characters(), position * sizeof(UChar)); + memcpy(data + position, characters() + position + lengthToRemove, + (length() - lengthToRemove - position) * sizeof(UChar)); + m_impl = newImpl.release(); +} + +String String::substring(unsigned pos, unsigned len) const +{ + if (!m_impl) + return String(); + return m_impl->substring(pos, len); +} + +String String::lower() const +{ + if (!m_impl) + return String(); + return m_impl->lower(); +} + +String String::upper() const +{ + if (!m_impl) + return String(); + return m_impl->upper(); +} + +String String::stripWhiteSpace() const +{ + if (!m_impl) + return String(); + return m_impl->stripWhiteSpace(); +} + +String String::simplifyWhiteSpace() const +{ + if (!m_impl) + return String(); + return m_impl->simplifyWhiteSpace(); +} + +String String::removeCharacters(CharacterMatchFunctionPtr findMatch) const +{ + if (!m_impl) + return String(); + return m_impl->removeCharacters(findMatch); +} + +String String::foldCase() const +{ + if (!m_impl) + return String(); + return m_impl->foldCase(); +} + +bool String::percentage(int& result) const +{ + if (!m_impl || !m_impl->length()) + return false; + + if ((*m_impl)[m_impl->length() - 1] != '%') + return false; + + result = charactersToIntStrict(m_impl->characters(), m_impl->length() - 1); + return true; +} + +const UChar* String::characters() const +{ + if (!m_impl) + return 0; + return m_impl->characters(); +} + +const UChar* String::charactersWithNullTermination() +{ + if (!m_impl) + return 0; + if (m_impl->hasTerminatingNullCharacter()) + return m_impl->characters(); + m_impl = StringImpl::createWithTerminatingNullCharacter(*m_impl); + return m_impl->characters(); +} + +String String::format(const char *format, ...) +{ +#if PLATFORM(QT) + // Use QString::vsprintf to avoid the locale dependent formatting of vsnprintf. + // https://bugs.webkit.org/show_bug.cgi?id=18994 + va_list args; + va_start(args, format); + + QString buffer; + buffer.vsprintf(format, args); + + va_end(args); + + return buffer; + +#elif OS(WINCE) + va_list args; + va_start(args, format); + + Vector<char, 256> buffer; + + int bufferSize = 256; + buffer.resize(bufferSize); + for (;;) { + int written = vsnprintf(buffer.data(), bufferSize, format, args); + va_end(args); + + if (written == 0) + return String(""); + if (written > 0) + return StringImpl::create(buffer.data(), written); + + bufferSize <<= 1; + buffer.resize(bufferSize); + va_start(args, format); + } + +#else + va_list args; + va_start(args, format); + + Vector<char, 256> buffer; + + // Do the format once to get the length. +#if COMPILER(MSVC) + int result = _vscprintf(format, args); +#else + char ch; + int result = vsnprintf(&ch, 1, format, args); + // We need to call va_end() and then va_start() again here, as the + // contents of args is undefined after the call to vsnprintf + // according to http://man.cx/snprintf(3) + // + // Not calling va_end/va_start here happens to work on lots of + // systems, but fails e.g. on 64bit Linux. + va_end(args); + va_start(args, format); +#endif + + if (result == 0) + return String(""); + if (result < 0) + return String(); + unsigned len = result; + buffer.grow(len + 1); + + // Now do the formatting again, guaranteed to fit. + vsnprintf(buffer.data(), buffer.size(), format, args); + + va_end(args); + + return StringImpl::create(buffer.data(), len); +#endif +} + +String String::number(short n) +{ + return String::format("%hd", n); +} + +String String::number(unsigned short n) +{ + return String::format("%hu", n); +} + +String String::number(int n) +{ + return String::format("%d", n); +} + +String String::number(unsigned n) +{ + return String::format("%u", n); +} + +String String::number(long n) +{ + return String::format("%ld", n); +} + +String String::number(unsigned long n) +{ + return String::format("%lu", n); +} + +String String::number(long long n) +{ +#if OS(WINDOWS) && !PLATFORM(QT) + return String::format("%I64i", n); +#else + return String::format("%lli", n); +#endif +} + +String String::number(unsigned long long n) +{ +#if OS(WINDOWS) && !PLATFORM(QT) + return String::format("%I64u", n); +#else + return String::format("%llu", n); +#endif +} + +String String::number(double n) +{ + return String::format("%.6lg", n); +} + +int String::toIntStrict(bool* ok, int base) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toIntStrict(ok, base); +} + +unsigned String::toUIntStrict(bool* ok, int base) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toUIntStrict(ok, base); +} + +int64_t String::toInt64Strict(bool* ok, int base) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toInt64Strict(ok, base); +} + +uint64_t String::toUInt64Strict(bool* ok, int base) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toUInt64Strict(ok, base); +} + +intptr_t String::toIntPtrStrict(bool* ok, int base) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toIntPtrStrict(ok, base); +} + + +int String::toInt(bool* ok) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toInt(ok); +} + +unsigned String::toUInt(bool* ok) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toUInt(ok); +} + +int64_t String::toInt64(bool* ok) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toInt64(ok); +} + +uint64_t String::toUInt64(bool* ok) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toUInt64(ok); +} + +intptr_t String::toIntPtr(bool* ok) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toIntPtr(ok); +} + +double String::toDouble(bool* ok) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0.0; + } + return m_impl->toDouble(ok); +} + +float String::toFloat(bool* ok) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0.0f; + } + return m_impl->toFloat(ok); +} + +String String::threadsafeCopy() const +{ + if (!m_impl) + return String(); + return m_impl->threadsafeCopy(); +} + +String String::crossThreadString() const +{ + if (!m_impl) + return String(); + return m_impl->crossThreadString(); +} + +bool String::isEmpty() const +{ + return !m_impl || !m_impl->length(); +} + +void String::split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const +{ + result.clear(); + + int startPos = 0; + int endPos; + while ((endPos = find(separator, startPos)) != -1) { + if (allowEmptyEntries || startPos != endPos) + result.append(substring(startPos, endPos - startPos)); + startPos = endPos + separator.length(); + } + if (allowEmptyEntries || startPos != static_cast<int>(length())) + result.append(substring(startPos)); +} + +void String::split(const String& separator, Vector<String>& result) const +{ + return split(separator, false, result); +} + +void String::split(UChar separator, bool allowEmptyEntries, Vector<String>& result) const +{ + result.clear(); + + int startPos = 0; + int endPos; + while ((endPos = find(separator, startPos)) != -1) { + if (allowEmptyEntries || startPos != endPos) + result.append(substring(startPos, endPos - startPos)); + startPos = endPos + 1; + } + if (allowEmptyEntries || startPos != static_cast<int>(length())) + result.append(substring(startPos)); +} + +void String::split(UChar separator, Vector<String>& result) const +{ + return split(String(&separator, 1), false, result); +} + +Vector<char> String::ascii() const +{ + if (m_impl) + return m_impl->ascii(); + + const char* nullMsg = "(null impl)"; + Vector<char, 2048> buffer; + for (int i = 0; nullMsg[i]; ++i) + buffer.append(nullMsg[i]); + + buffer.append('\0'); + return buffer; +} + +CString String::latin1() const +{ + // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are + // preserved, characters outside of this range are converted to '?'. + + unsigned length = this->length(); + const UChar* characters = this->characters(); + + char* characterBuffer; + CString result = CString::newUninitialized(length, characterBuffer); + + for (unsigned i = 0; i < length; ++i) { + UChar ch = characters[i]; + characterBuffer[i] = ch > 255 ? '?' : ch; + } + + return result; +} + +// Helper to write a three-byte UTF-8 code point to the buffer, caller must check room is available. +static inline void putUTF8Triple(char*& buffer, UChar ch) +{ + ASSERT(ch >= 0x0800); + *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0); + *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80); + *buffer++ = static_cast<char>((ch & 0x3F) | 0x80); +} + +CString String::utf8() const +{ + unsigned length = this->length(); + const UChar* characters = this->characters(); + + // Allocate a buffer big enough to hold all the characters + // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes). + // Optimization ideas, if we find this function is hot: + // * We could speculatively create a CStringBuffer to contain 'length' + // characters, and resize if necessary (i.e. if the buffer contains + // non-ascii characters). (Alternatively, scan the buffer first for + // ascii characters, so we know this will be sufficient). + // * We could allocate a CStringBuffer with an appropriate size to + // have a good chance of being able to write the string into the + // buffer without reallocing (say, 1.5 x length). + Vector<char, 1024> bufferVector(length * 3); + + char* buffer = bufferVector.data(); + ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), false); + ASSERT(result != sourceIllegal); // Only produced from strict conversion. + ASSERT(result != targetExhausted); // (length * 3) should be sufficient for any conversion + + // If a high surrogate is left unconverted, treat it the same was as an unpaired high surrogate + // would have been handled in the middle of a string with non-strict conversion - which is to say, + // simply encode it to UTF-8. + if (result == sourceExhausted) { + // This should be one unpaired high surrogate. + ASSERT((characters + 1) == (characters + length)); + ASSERT((*characters >= 0xD800) && (*characters <= 0xDBFF)); + // There should be room left, since one UChar hasn't been converted. + ASSERT((buffer + 3) <= (buffer + bufferVector.size())); + putUTF8Triple(buffer, *characters); + } + + return CString(bufferVector.data(), buffer - bufferVector.data()); +} + +String String::fromUTF8(const char* stringStart, size_t length) +{ + if (!stringStart) + return String(); + + // We'll use a StringImpl as a buffer; if the source string only contains ascii this should be + // the right length, if there are any multi-byte sequences this buffer will be too large. + UChar* buffer; + String stringBuffer(StringImpl::createUninitialized(length, buffer)); + UChar* bufferEnd = buffer + length; + + // Try converting into the buffer. + const char* stringCurrent = stringStart; + if (convertUTF8ToUTF16(&stringCurrent, stringStart + length, &buffer, bufferEnd) != conversionOK) + return String(); + + // stringBuffer is full (the input must have been all ascii) so just return it! + if (buffer == bufferEnd) + return stringBuffer; + + // stringBuffer served its purpose as a buffer, copy the contents out into a new string. + unsigned utf16Length = buffer - stringBuffer.characters(); + ASSERT(utf16Length < length); + return String(stringBuffer.characters(), utf16Length); +} + +String String::fromUTF8(const char* string) +{ + if (!string) + return String(); + return fromUTF8(string, strlen(string)); +} + +String String::fromUTF8WithLatin1Fallback(const char* string, size_t size) +{ + String utf8 = fromUTF8(string, size); + if (!utf8) + return String(string, size); + return utf8; +} + +// String Operations + +static bool isCharacterAllowedInBase(UChar c, int base) +{ + if (c > 0x7F) + return false; + if (isASCIIDigit(c)) + return c - '0' < base; + if (isASCIIAlpha(c)) { + if (base > 36) + base = 36; + return (c >= 'a' && c < 'a' + base - 10) + || (c >= 'A' && c < 'A' + base - 10); + } + return false; +} + +template <typename IntegralType> +static inline IntegralType toIntegralType(const UChar* data, size_t length, bool* ok, int base) +{ + static const IntegralType integralMax = std::numeric_limits<IntegralType>::max(); + static const bool isSigned = std::numeric_limits<IntegralType>::is_signed; + const IntegralType maxMultiplier = integralMax / base; + + IntegralType value = 0; + bool isOk = false; + bool isNegative = false; + + if (!data) + goto bye; + + // skip leading whitespace + while (length && isSpaceOrNewline(*data)) { + length--; + data++; + } + + if (isSigned && length && *data == '-') { + length--; + data++; + isNegative = true; + } else if (length && *data == '+') { + length--; + data++; + } + + if (!length || !isCharacterAllowedInBase(*data, base)) + goto bye; + + while (length && isCharacterAllowedInBase(*data, base)) { + length--; + IntegralType digitValue; + UChar c = *data; + if (isASCIIDigit(c)) + digitValue = c - '0'; + else if (c >= 'a') + digitValue = c - 'a' + 10; + else + digitValue = c - 'A' + 10; + + if (value > maxMultiplier || (value == maxMultiplier && digitValue > (integralMax % base) + isNegative)) + goto bye; + + value = base * value + digitValue; + data++; + } + +#if COMPILER(MSVC) +#pragma warning(push, 0) +#pragma warning(disable:4146) +#endif + + if (isNegative) + value = -value; + +#if COMPILER(MSVC) +#pragma warning(pop) +#endif + + // skip trailing space + while (length && isSpaceOrNewline(*data)) { + length--; + data++; + } + + if (!length) + isOk = true; +bye: + if (ok) + *ok = isOk; + return isOk ? value : 0; +} + +static unsigned lengthOfCharactersAsInteger(const UChar* data, size_t length) +{ + size_t i = 0; + + // Allow leading spaces. + for (; i != length; ++i) { + if (!isSpaceOrNewline(data[i])) + break; + } + + // Allow sign. + if (i != length && (data[i] == '+' || data[i] == '-')) + ++i; + + // Allow digits. + for (; i != length; ++i) { + if (!isASCIIDigit(data[i])) + break; + } + + return i; +} + +int charactersToIntStrict(const UChar* data, size_t length, bool* ok, int base) +{ + return toIntegralType<int>(data, length, ok, base); +} + +unsigned charactersToUIntStrict(const UChar* data, size_t length, bool* ok, int base) +{ + return toIntegralType<unsigned>(data, length, ok, base); +} + +int64_t charactersToInt64Strict(const UChar* data, size_t length, bool* ok, int base) +{ + return toIntegralType<int64_t>(data, length, ok, base); +} + +uint64_t charactersToUInt64Strict(const UChar* data, size_t length, bool* ok, int base) +{ + return toIntegralType<uint64_t>(data, length, ok, base); +} + +intptr_t charactersToIntPtrStrict(const UChar* data, size_t length, bool* ok, int base) +{ + return toIntegralType<intptr_t>(data, length, ok, base); +} + +int charactersToInt(const UChar* data, size_t length, bool* ok) +{ + return toIntegralType<int>(data, lengthOfCharactersAsInteger(data, length), ok, 10); +} + +unsigned charactersToUInt(const UChar* data, size_t length, bool* ok) +{ + return toIntegralType<unsigned>(data, lengthOfCharactersAsInteger(data, length), ok, 10); +} + +int64_t charactersToInt64(const UChar* data, size_t length, bool* ok) +{ + return toIntegralType<int64_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10); +} + +uint64_t charactersToUInt64(const UChar* data, size_t length, bool* ok) +{ + return toIntegralType<uint64_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10); +} + +intptr_t charactersToIntPtr(const UChar* data, size_t length, bool* ok) +{ + return toIntegralType<intptr_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10); +} + +double charactersToDouble(const UChar* data, size_t length, bool* ok) +{ + if (!length) { + if (ok) + *ok = false; + return 0.0; + } + + Vector<char, 256> bytes(length + 1); + for (unsigned i = 0; i < length; ++i) + bytes[i] = data[i] < 0x7F ? data[i] : '?'; + bytes[length] = '\0'; + char* end; + double val = WTF::strtod(bytes.data(), &end); + if (ok) + *ok = (end == 0 || *end == '\0'); + return val; +} + +float charactersToFloat(const UChar* data, size_t length, bool* ok) +{ + // FIXME: This will return ok even when the string fits into a double but not a float. + return static_cast<float>(charactersToDouble(data, length, ok)); +} + +} // namespace WebCore + +#ifndef NDEBUG +// For use in the debugger - leaks memory +WebCore::String* string(const char*); + +WebCore::String* string(const char* s) +{ + return new WebCore::String(s); +} +#endif |