summaryrefslogtreecommitdiffstats
path: root/WebCore/platform/text/String.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'WebCore/platform/text/String.cpp')
-rw-r--r--WebCore/platform/text/String.cpp799
1 files changed, 799 insertions, 0 deletions
diff --git a/WebCore/platform/text/String.cpp b/WebCore/platform/text/String.cpp
new file mode 100644
index 0000000..ee4cef4
--- /dev/null
+++ b/WebCore/platform/text/String.cpp
@@ -0,0 +1,799 @@
+/*
+ * (C) 1999 Lars Knoll (knoll@kde.org)
+ * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include "config.h"
+#include "PlatformString.h"
+
+#include "CString.h"
+#include "FloatConversion.h"
+#include "StringBuffer.h"
+#include "TextEncoding.h"
+#include <kjs/dtoa.h>
+#include <kjs/identifier.h>
+#include <limits>
+#include <stdarg.h>
+#include <wtf/ASCIICType.h>
+#include <wtf/StringExtras.h>
+#include <wtf/Vector.h>
+#include <wtf/unicode/Unicode.h>
+
+using KJS::Identifier;
+using KJS::UString;
+
+using namespace WTF;
+
+namespace WebCore {
+
+String::String(const UChar* str, unsigned len)
+{
+ if (!str)
+ return;
+ m_impl = StringImpl::create(str, len);
+}
+
+String::String(const UChar* str)
+{
+ if (!str)
+ return;
+
+ int len = 0;
+ while (str[len] != UChar(0))
+ len++;
+
+ m_impl = StringImpl::create(str, len);
+}
+
+String::String(const char* str)
+{
+ if (!str)
+ return;
+ m_impl = StringImpl::create(str);
+}
+
+String::String(const char* str, unsigned length)
+{
+ if (!str)
+ return;
+ m_impl = StringImpl::create(str, length);
+}
+
+void String::append(const String& str)
+{
+ // FIXME: This is extremely inefficient. So much so that we might want to take this
+ // out of String's API. We can make it better by optimizing the case where exactly
+ // one String is pointing at this StringImpl, but even then it's going to require a
+ // call to fastMalloc every single time.
+ if (str.m_impl) {
+ if (m_impl) {
+ StringBuffer buffer(m_impl->length() + str.length());
+ memcpy(buffer.characters(), m_impl->characters(), m_impl->length() * sizeof(UChar));
+ memcpy(buffer.characters() + m_impl->length(), str.characters(), str.length() * sizeof(UChar));
+ m_impl = StringImpl::adopt(buffer);
+ } else
+ m_impl = str.m_impl;
+ }
+}
+
+void String::append(char c)
+{
+ // FIXME: This is extremely inefficient. So much so that we might want to take this
+ // out of String's API. We can make it better by optimizing the case where exactly
+ // one String is pointing at this StringImpl, but even then it's going to require a
+ // call to fastMalloc every single time.
+ if (m_impl) {
+ StringBuffer buffer(m_impl->length() + 1);
+ memcpy(buffer.characters(), m_impl->characters(), m_impl->length() * sizeof(UChar));
+ buffer[m_impl->length()] = c;
+ m_impl = StringImpl::adopt(buffer);
+ } else
+ m_impl = StringImpl::create(&c, 1);
+}
+
+void String::append(UChar c)
+{
+ // FIXME: This is extremely inefficient. So much so that we might want to take this
+ // out of String's API. We can make it better by optimizing the case where exactly
+ // one String is pointing at this StringImpl, but even then it's going to require a
+ // call to fastMalloc every single time.
+ if (m_impl) {
+ StringBuffer buffer(m_impl->length() + 1);
+ memcpy(buffer.characters(), m_impl->characters(), m_impl->length() * sizeof(UChar));
+ buffer[m_impl->length()] = c;
+ m_impl = StringImpl::adopt(buffer);
+ } else
+ m_impl = StringImpl::create(&c, 1);
+}
+
+String operator+(const String& a, const String& b)
+{
+ if (a.isEmpty())
+ return b;
+ if (b.isEmpty())
+ return a;
+ String c = a;
+ c += b;
+ return c;
+}
+
+String operator+(const String& s, const char* cs)
+{
+ return s + String(cs);
+}
+
+String operator+(const char* cs, const String& s)
+{
+ return String(cs) + s;
+}
+
+void String::insert(const String& str, unsigned pos)
+{
+ if (str.isEmpty()) {
+ if (str.isNull())
+ return;
+ if (isNull())
+ m_impl = str.impl();
+ return;
+ }
+ insert(str.characters(), str.length(), pos);
+}
+
+void String::append(const UChar* charactersToAppend, unsigned lengthToAppend)
+{
+ if (!m_impl) {
+ if (!charactersToAppend)
+ return;
+ m_impl = StringImpl::create(charactersToAppend, lengthToAppend);
+ return;
+ }
+
+ if (!lengthToAppend)
+ return;
+
+ ASSERT(charactersToAppend);
+ StringBuffer buffer(length() + lengthToAppend);
+ memcpy(buffer.characters(), characters(), length() * sizeof(UChar));
+ memcpy(buffer.characters() + length(), charactersToAppend, lengthToAppend * sizeof(UChar));
+ m_impl = StringImpl::adopt(buffer);
+}
+
+void String::insert(const UChar* charactersToInsert, unsigned lengthToInsert, unsigned position)
+{
+ if (position >= length()) {
+ append(charactersToInsert, lengthToInsert);
+ return;
+ }
+
+ ASSERT(m_impl);
+
+ if (!lengthToInsert)
+ return;
+
+ ASSERT(charactersToInsert);
+ StringBuffer buffer(length() + lengthToInsert);
+ memcpy(buffer.characters(), characters(), position * sizeof(UChar));
+ memcpy(buffer.characters() + position, charactersToInsert, lengthToInsert * sizeof(UChar));
+ memcpy(buffer.characters() + position + lengthToInsert, characters() + position, (length() - position) * sizeof(UChar));
+ m_impl = StringImpl::adopt(buffer);
+}
+
+UChar String::operator[](unsigned i) const
+{
+ if (!m_impl || i >= m_impl->length())
+ return 0;
+ return m_impl->characters()[i];
+}
+
+UChar32 String::characterStartingAt(unsigned i) const
+{
+ if (!m_impl || i >= m_impl->length())
+ return 0;
+ return m_impl->characterStartingAt(i);
+}
+
+unsigned String::length() const
+{
+ if (!m_impl)
+ return 0;
+ return m_impl->length();
+}
+
+void String::truncate(unsigned position)
+{
+ if (position >= length())
+ return;
+ StringBuffer buffer(position);
+ memcpy(buffer.characters(), characters(), position * sizeof(UChar));
+ m_impl = StringImpl::adopt(buffer);
+}
+
+void String::remove(unsigned position, int lengthToRemove)
+{
+ if (lengthToRemove <= 0)
+ return;
+ if (position >= length())
+ return;
+ if (static_cast<unsigned>(lengthToRemove) > length() - position)
+ lengthToRemove = length() - position;
+ StringBuffer buffer(length() - lengthToRemove);
+ memcpy(buffer.characters(), characters(), position * sizeof(UChar));
+ memcpy(buffer.characters() + position, characters() + position + lengthToRemove,
+ (length() - lengthToRemove - position) * sizeof(UChar));
+ m_impl = StringImpl::adopt(buffer);
+}
+
+String String::substring(unsigned pos, unsigned len) const
+{
+ if (!m_impl)
+ return String();
+ return m_impl->substring(pos, len);
+}
+
+String String::lower() const
+{
+ if (!m_impl)
+ return String();
+ return m_impl->lower();
+}
+
+String String::upper() const
+{
+ if (!m_impl)
+ return String();
+ return m_impl->upper();
+}
+
+String String::stripWhiteSpace() const
+{
+ if (!m_impl)
+ return String();
+ return m_impl->stripWhiteSpace();
+}
+
+String String::simplifyWhiteSpace() const
+{
+ if (!m_impl)
+ return String();
+ return m_impl->simplifyWhiteSpace();
+}
+
+String String::foldCase() const
+{
+ if (!m_impl)
+ return String();
+ return m_impl->foldCase();
+}
+
+bool String::percentage(int& result) const
+{
+ if (!m_impl || !m_impl->length())
+ return false;
+
+ if ((*m_impl)[m_impl->length() - 1] != '%')
+ return false;
+
+ result = charactersToIntStrict(m_impl->characters(), m_impl->length() - 1);
+ return true;
+}
+
+const UChar* String::characters() const
+{
+ if (!m_impl)
+ return 0;
+ return m_impl->characters();
+}
+
+const UChar* String::charactersWithNullTermination()
+{
+ if (!m_impl)
+ return 0;
+ if (m_impl->hasTerminatingNullCharacter())
+ return m_impl->characters();
+ m_impl = StringImpl::createWithTerminatingNullCharacter(*m_impl);
+ return m_impl->characters();
+}
+
+String String::format(const char *format, ...)
+{
+ va_list args;
+ va_start(args, format);
+
+ Vector<char, 256> buffer;
+
+ // Do the format once to get the length.
+#if COMPILER(MSVC)
+ int result = _vscprintf(format, args);
+#else
+ char ch;
+ int result = vsnprintf(&ch, 1, format, args);
+ // We need to call va_end() and then va_start() again here, as the
+ // contents of args is undefined after the call to vsnprintf
+ // according to http://man.cx/snprintf(3)
+ //
+ // Not calling va_end/va_start here happens to work on lots of
+ // systems, but fails e.g. on 64bit Linux.
+ va_end(args);
+ va_start(args, format);
+#endif
+
+ if (result == 0)
+ return String("");
+ if (result < 0)
+ return String();
+ unsigned len = result;
+ buffer.grow(len + 1);
+
+ // Now do the formatting again, guaranteed to fit.
+ vsnprintf(buffer.data(), buffer.size(), format, args);
+
+ va_end(args);
+
+ return StringImpl::create(buffer.data(), len);
+}
+
+String String::number(int n)
+{
+ return String::format("%d", n);
+}
+
+String String::number(unsigned n)
+{
+ return String::format("%u", n);
+}
+
+String String::number(long n)
+{
+ return String::format("%ld", n);
+}
+
+String String::number(unsigned long n)
+{
+ return String::format("%lu", n);
+}
+
+String String::number(long long n)
+{
+#if PLATFORM(WIN_OS)
+ return String::format("%I64i", n);
+#else
+ return String::format("%lli", n);
+#endif
+}
+
+String String::number(unsigned long long n)
+{
+#if PLATFORM(WIN_OS)
+ return String::format("%I64u", n);
+#else
+ return String::format("%llu", n);
+#endif
+}
+
+String String::number(double n)
+{
+ return String::format("%.6lg", n);
+}
+
+int String::toIntStrict(bool* ok, int base) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0;
+ }
+ return m_impl->toIntStrict(ok, base);
+}
+
+unsigned String::toUIntStrict(bool* ok, int base) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0;
+ }
+ return m_impl->toUIntStrict(ok, base);
+}
+
+int64_t String::toInt64Strict(bool* ok, int base) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0;
+ }
+ return m_impl->toInt64Strict(ok, base);
+}
+
+uint64_t String::toUInt64Strict(bool* ok, int base) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0;
+ }
+ return m_impl->toUInt64Strict(ok, base);
+}
+
+int String::toInt(bool* ok) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0;
+ }
+ return m_impl->toInt(ok);
+}
+
+unsigned String::toUInt(bool* ok) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0;
+ }
+ return m_impl->toUInt(ok);
+}
+
+int64_t String::toInt64(bool* ok) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0;
+ }
+ return m_impl->toInt64(ok);
+}
+
+uint64_t String::toUInt64(bool* ok) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0;
+ }
+ return m_impl->toUInt64(ok);
+}
+
+double String::toDouble(bool* ok) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0.0;
+ }
+ return m_impl->toDouble(ok);
+}
+
+float String::toFloat(bool* ok) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0.0f;
+ }
+ return m_impl->toFloat(ok);
+}
+
+String String::copy() const
+{
+ if (!m_impl)
+ return String();
+ return m_impl->copy();
+}
+
+bool String::isEmpty() const
+{
+ return !m_impl || !m_impl->length();
+}
+
+Length* String::toCoordsArray(int& len) const
+{
+ return m_impl ? m_impl->toCoordsArray(len) : 0;
+}
+
+Length* String::toLengthArray(int& len) const
+{
+ return m_impl ? m_impl->toLengthArray(len) : 0;
+}
+
+void String::split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const
+{
+ result.clear();
+
+ int startPos = 0;
+ int endPos;
+ while ((endPos = find(separator, startPos)) != -1) {
+ if (allowEmptyEntries || startPos != endPos)
+ result.append(substring(startPos, endPos - startPos));
+ startPos = endPos + separator.length();
+ }
+ if (allowEmptyEntries || startPos != static_cast<int>(length()))
+ result.append(substring(startPos));
+}
+
+void String::split(const String& separator, Vector<String>& result) const
+{
+ return split(separator, false, result);
+}
+
+void String::split(UChar separator, bool allowEmptyEntries, Vector<String>& result) const
+{
+ result.clear();
+
+ int startPos = 0;
+ int endPos;
+ while ((endPos = find(separator, startPos)) != -1) {
+ if (allowEmptyEntries || startPos != endPos)
+ result.append(substring(startPos, endPos - startPos));
+ startPos = endPos + 1;
+ }
+ if (allowEmptyEntries || startPos != static_cast<int>(length()))
+ result.append(substring(startPos));
+}
+
+void String::split(UChar separator, Vector<String>& result) const
+{
+ return split(String(&separator, 1), false, result);
+}
+
+#ifndef NDEBUG
+Vector<char> String::ascii() const
+{
+ if (m_impl)
+ return m_impl->ascii();
+
+ const char* nullMsg = "(null impl)";
+ Vector<char, 2048> buffer;
+ for (int i = 0; nullMsg[i]; ++i)
+ buffer.append(nullMsg[i]);
+
+ buffer.append('\0');
+ return buffer;
+}
+#endif
+
+CString String::latin1() const
+{
+ return Latin1Encoding().encode(characters(), length());
+}
+
+CString String::utf8() const
+{
+ return UTF8Encoding().encode(characters(), length());
+}
+
+String String::fromUTF8(const char* string, size_t size)
+{
+ return UTF8Encoding().decode(string, size);
+}
+
+String String::fromUTF8(const char* string)
+{
+ return UTF8Encoding().decode(string, strlen(string));
+}
+
+String::String(const Identifier& str)
+{
+ if (str.isNull())
+ return;
+ m_impl = StringImpl::create(reinterpret_cast<const UChar*>(str.data()), str.size());
+}
+
+String::String(const UString& str)
+{
+ if (str.isNull())
+ return;
+ m_impl = StringImpl::create(reinterpret_cast<const UChar*>(str.data()), str.size());
+}
+
+String::operator Identifier() const
+{
+ if (!m_impl)
+ return Identifier();
+ return Identifier(reinterpret_cast<const KJS::UChar*>(m_impl->characters()), m_impl->length());
+}
+
+String::operator UString() const
+{
+ if (!m_impl)
+ return UString();
+ return UString(reinterpret_cast<const KJS::UChar*>(m_impl->characters()), m_impl->length());
+}
+
+// String Operations
+
+static bool isCharacterAllowedInBase(UChar c, int base)
+{
+ if (c > 0x7F)
+ return false;
+ if (isASCIIDigit(c))
+ return c - '0' < base;
+ if (isASCIIAlpha(c)) {
+ if (base > 36)
+ base = 36;
+ return (c >= 'a' && c < 'a' + base - 10)
+ || (c >= 'A' && c < 'A' + base - 10);
+ }
+ return false;
+}
+
+template <typename IntegralType>
+static inline IntegralType toIntegralType(const UChar* data, size_t length, bool* ok, int base)
+{
+ static const IntegralType integralMax = std::numeric_limits<IntegralType>::max();
+ static const bool isSigned = std::numeric_limits<IntegralType>::is_signed;
+ const IntegralType maxMultiplier = integralMax / base;
+
+ IntegralType value = 0;
+ bool isOk = false;
+ bool isNegative = false;
+
+ if (!data)
+ goto bye;
+
+ // skip leading whitespace
+ while (length && isSpaceOrNewline(*data)) {
+ length--;
+ data++;
+ }
+
+ if (isSigned && length && *data == '-') {
+ length--;
+ data++;
+ isNegative = true;
+ } else if (length && *data == '+') {
+ length--;
+ data++;
+ }
+
+ if (!length || !isCharacterAllowedInBase(*data, base))
+ goto bye;
+
+ while (length && isCharacterAllowedInBase(*data, base)) {
+ length--;
+ IntegralType digitValue;
+ UChar c = *data;
+ if (isASCIIDigit(c))
+ digitValue = c - '0';
+ else if (c >= 'a')
+ digitValue = c - 'a' + 10;
+ else
+ digitValue = c - 'A' + 10;
+
+ if (value > maxMultiplier || (value == maxMultiplier && digitValue > (integralMax % base) + isNegative))
+ goto bye;
+
+ value = base * value + digitValue;
+ data++;
+ }
+
+ if (isNegative)
+ value = -value;
+
+ // skip trailing space
+ while (length && isSpaceOrNewline(*data)) {
+ length--;
+ data++;
+ }
+
+ if (!length)
+ isOk = true;
+bye:
+ if (ok)
+ *ok = isOk;
+ return isOk ? value : 0;
+}
+
+static unsigned lengthOfCharactersAsInteger(const UChar* data, size_t length)
+{
+ size_t i = 0;
+
+ // Allow leading spaces.
+ for (; i != length; ++i) {
+ if (!isSpaceOrNewline(data[i]))
+ break;
+ }
+
+ // Allow sign.
+ if (i != length && (data[i] == '+' || data[i] == '-'))
+ ++i;
+
+ // Allow digits.
+ for (; i != length; ++i) {
+ if (!Unicode::isDigit(data[i]))
+ break;
+ }
+
+ return i;
+}
+
+int charactersToIntStrict(const UChar* data, size_t length, bool* ok, int base)
+{
+ return toIntegralType<int>(data, length, ok, base);
+}
+
+unsigned charactersToUIntStrict(const UChar* data, size_t length, bool* ok, int base)
+{
+ return toIntegralType<unsigned>(data, length, ok, base);
+}
+
+int64_t charactersToInt64Strict(const UChar* data, size_t length, bool* ok, int base)
+{
+ return toIntegralType<int64_t>(data, length, ok, base);
+}
+
+uint64_t charactersToUInt64Strict(const UChar* data, size_t length, bool* ok, int base)
+{
+ return toIntegralType<uint64_t>(data, length, ok, base);
+}
+
+int charactersToInt(const UChar* data, size_t length, bool* ok)
+{
+ return toIntegralType<int>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
+}
+
+unsigned charactersToUInt(const UChar* data, size_t length, bool* ok)
+{
+ return toIntegralType<unsigned>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
+}
+
+int64_t charactersToInt64(const UChar* data, size_t length, bool* ok)
+{
+ return toIntegralType<int64_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
+}
+
+uint64_t charactersToUInt64(const UChar* data, size_t length, bool* ok)
+{
+ return toIntegralType<uint64_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
+}
+
+double charactersToDouble(const UChar* data, size_t length, bool* ok)
+{
+ if (!length) {
+ if (ok)
+ *ok = false;
+ return 0.0;
+ }
+
+ Vector<char, 256> bytes(length + 1);
+ for (unsigned i = 0; i < length; ++i)
+ bytes[i] = data[i] < 0x7F ? data[i] : '?';
+ bytes[length] = '\0';
+ char* end;
+ double val = kjs_strtod(bytes.data(), &end);
+ if (ok)
+ *ok = (end == 0 || *end == '\0');
+ return val;
+}
+
+float charactersToFloat(const UChar* data, size_t length, bool* ok)
+{
+ // FIXME: This will return ok even when the string fits into a double but not a float.
+ return narrowPrecisionToFloat(charactersToDouble(data, length, ok));
+}
+
+} // namespace WebCore
+
+#ifndef NDEBUG
+// For debugging only -- leaks memory
+WebCore::String* string(const char* s)
+{
+ return new WebCore::String(s);
+}
+#endif