summaryrefslogtreecommitdiffstats
path: root/WebCore/platform/text
diff options
context:
space:
mode:
Diffstat (limited to 'WebCore/platform/text')
-rw-r--r--WebCore/platform/text/AtomicString.cpp216
-rw-r--r--WebCore/platform/text/AtomicString.h66
-rw-r--r--WebCore/platform/text/AtomicStringHash.h64
-rw-r--r--WebCore/platform/text/BidiResolver.h400
-rw-r--r--WebCore/platform/text/CString.cpp34
-rw-r--r--WebCore/platform/text/CString.h24
-rw-r--r--WebCore/platform/text/CharacterNames.h3
-rw-r--r--WebCore/platform/text/ParserUtilities.h54
-rw-r--r--WebCore/platform/text/PlatformString.h150
-rw-r--r--WebCore/platform/text/RegularExpression.cpp136
-rw-r--r--WebCore/platform/text/RegularExpression.h30
-rw-r--r--WebCore/platform/text/SegmentedString.cpp44
-rw-r--r--WebCore/platform/text/SegmentedString.h32
-rw-r--r--WebCore/platform/text/String.cpp369
-rw-r--r--WebCore/platform/text/StringBuilder.cpp97
-rw-r--r--WebCore/platform/text/StringBuilder.h57
-rw-r--r--WebCore/platform/text/StringHash.h91
-rw-r--r--WebCore/platform/text/StringImpl.cpp317
-rw-r--r--WebCore/platform/text/StringImpl.h33
-rw-r--r--WebCore/platform/text/TextBreakIteratorICU.cpp2
-rw-r--r--WebCore/platform/text/TextCodec.cpp30
-rw-r--r--WebCore/platform/text/TextCodec.h39
-rw-r--r--WebCore/platform/text/TextCodecICU.cpp223
-rw-r--r--WebCore/platform/text/TextCodecICU.h11
-rw-r--r--WebCore/platform/text/TextCodecLatin1.cpp27
-rw-r--r--WebCore/platform/text/TextCodecLatin1.h4
-rw-r--r--WebCore/platform/text/TextCodecUTF16.cpp15
-rw-r--r--WebCore/platform/text/TextCodecUTF16.h4
-rw-r--r--WebCore/platform/text/TextCodecUserDefined.cpp27
-rw-r--r--WebCore/platform/text/TextCodecUserDefined.h4
-rw-r--r--WebCore/platform/text/TextDecoder.cpp47
-rw-r--r--WebCore/platform/text/TextDecoder.h8
-rw-r--r--WebCore/platform/text/TextEncoding.cpp10
-rw-r--r--WebCore/platform/text/TextEncoding.h10
-rw-r--r--WebCore/platform/text/TextEncodingRegistry.cpp3
-rw-r--r--WebCore/platform/text/TextStream.cpp122
-rw-r--r--WebCore/platform/text/TextStream.h34
-rw-r--r--WebCore/platform/text/cf/StringCF.cpp4
-rw-r--r--WebCore/platform/text/cf/StringImplCF.cpp4
-rw-r--r--WebCore/platform/text/mac/ShapeArabic.c29
-rw-r--r--WebCore/platform/text/mac/ShapeArabic.h3
-rw-r--r--WebCore/platform/text/mac/TextCodecMac.cpp30
-rw-r--r--WebCore/platform/text/mac/TextCodecMac.h5
-rw-r--r--WebCore/platform/text/mac/mac-encodings.txt4
-rw-r--r--WebCore/platform/text/qt/StringQt.cpp10
-rw-r--r--WebCore/platform/text/qt/TextCodecQt.cpp8
-rw-r--r--WebCore/platform/text/qt/TextCodecQt.h6
-rw-r--r--WebCore/platform/text/wx/StringWx.cpp7
48 files changed, 1868 insertions, 1079 deletions
diff --git a/WebCore/platform/text/AtomicString.cpp b/WebCore/platform/text/AtomicString.cpp
index c584e6c..dc573e1 100644
--- a/WebCore/platform/text/AtomicString.cpp
+++ b/WebCore/platform/text/AtomicString.cpp
@@ -20,27 +20,46 @@
#include "config.h"
-#ifdef AVOID_STATIC_CONSTRUCTORS
+#ifdef SKIP_STATIC_CONSTRUCTORS_ON_GCC
#define ATOMICSTRING_HIDE_GLOBALS 1
#endif
#include "AtomicString.h"
-#include "DeprecatedString.h"
#include "StaticConstructors.h"
#include "StringHash.h"
#include <kjs/identifier.h>
+#include <wtf/Threading.h>
#include <wtf/HashSet.h>
-using KJS::Identifier;
-using KJS::UString;
+#if ENABLE(WORKERS)
+#include <wtf/ThreadSpecific.h>
+using namespace WTF;
+#endif
+
+#if USE(JSC)
+using JSC::Identifier;
+using JSC::UString;
+#endif
namespace WebCore {
-static HashSet<StringImpl*>* stringTable;
+#if ENABLE(WORKERS)
+static ThreadSpecific<HashSet<StringImpl*> >* staticStringTable;
+#else
+static HashSet<StringImpl*>* staticStringTable;
+#endif
-struct CStringTranslator
+static inline HashSet<StringImpl*>* stringTable()
{
+#if ENABLE(WORKERS)
+ return *staticStringTable;
+#else
+ return staticStringTable;
+#endif
+}
+
+struct CStringTranslator {
static unsigned hash(const char* c)
{
return StringImpl::computeHash(c);
@@ -74,13 +93,16 @@ bool operator==(const AtomicString& a, const char* b)
return CStringTranslator::equal(impl, b);
}
-StringImpl* AtomicString::add(const char* c)
+PassRefPtr<StringImpl> AtomicString::add(const char* c)
{
if (!c)
return 0;
if (!*c)
return StringImpl::empty();
- return *stringTable->add<const char*, CStringTranslator>(c).first;
+ pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable()->add<const char*, CStringTranslator>(c);
+ if (!addResult.second)
+ return *addResult.first;
+ return adoptRef(*addResult.first);
}
struct UCharBuffer {
@@ -88,6 +110,37 @@ struct UCharBuffer {
unsigned length;
};
+static inline bool equal(StringImpl* string, const UChar* characters, unsigned length)
+{
+ if (string->length() != length)
+ return false;
+
+#if PLATFORM(ARM)
+ const UChar* stringCharacters = string->characters();
+ for (unsigned i = 0; i != length; ++i) {
+ if (*stringCharacters++ != *characters++)
+ return false;
+ }
+ return true;
+#else
+ /* Do it 4-bytes-at-a-time on architectures where it's safe */
+
+ const uint32_t* stringCharacters = reinterpret_cast<const uint32_t*>(string->characters());
+ const uint32_t* bufferCharacters = reinterpret_cast<const uint32_t*>(characters);
+
+ unsigned halfLength = length >> 1;
+ for (unsigned i = 0; i != halfLength; ++i) {
+ if (*stringCharacters++ != *bufferCharacters++)
+ return false;
+ }
+
+ if (length & 1 && *reinterpret_cast<const uint16_t*>(stringCharacters) != *reinterpret_cast<const uint16_t*>(bufferCharacters))
+ return false;
+
+ return true;
+#endif
+}
+
struct UCharBufferTranslator {
static unsigned hash(const UCharBuffer& buf)
{
@@ -96,37 +149,7 @@ struct UCharBufferTranslator {
static bool equal(StringImpl* const& str, const UCharBuffer& buf)
{
- unsigned strLength = str->length();
- unsigned bufLength = buf.length;
- if (strLength != bufLength)
- return false;
-
-#if PLATFORM(ARM)
- const UChar* strChars = str->characters();
- const UChar* bufChars = buf.s;
-
- for (unsigned i = 0; i != strLength; ++i) {
- if (*strChars++ != *bufChars++)
- return false;
- }
- return true;
-#else
- /* Do it 4-bytes-at-a-time on architectures where it's safe */
- const uint32_t* strChars = reinterpret_cast<const uint32_t*>(str->characters());
- const uint32_t* bufChars = reinterpret_cast<const uint32_t*>(buf.s);
-
- unsigned halfLength = strLength >> 1;
- for (unsigned i = 0; i != halfLength; ++i) {
- if (*strChars++ != *bufChars++)
- return false;
- }
-
- if (strLength & 1 &&
- *reinterpret_cast<const uint16_t *>(strChars) != *reinterpret_cast<const uint16_t *>(bufChars))
- return false;
-
- return true;
-#endif
+ return WebCore::equal(str, buf.s, buf.length);
}
static void translate(StringImpl*& location, const UCharBuffer& buf, unsigned hash)
@@ -135,7 +158,31 @@ struct UCharBufferTranslator {
}
};
-StringImpl* AtomicString::add(const UChar* s, int length)
+struct HashAndCharacters {
+ unsigned hash;
+ const UChar* characters;
+ unsigned length;
+};
+
+struct HashAndCharactersTranslator {
+ static unsigned hash(const HashAndCharacters& buffer)
+ {
+ ASSERT(buffer.hash == StringImpl::computeHash(buffer.characters, buffer.length));
+ return buffer.hash;
+ }
+
+ static bool equal(StringImpl* const& string, const HashAndCharacters& buffer)
+ {
+ return WebCore::equal(string, buffer.characters, buffer.length);
+ }
+
+ static void translate(StringImpl*& location, const HashAndCharacters& buffer, unsigned hash)
+ {
+ location = new StringImpl(buffer.characters, buffer.length, hash);
+ }
+};
+
+PassRefPtr<StringImpl> AtomicString::add(const UChar* s, int length)
{
if (!s)
return 0;
@@ -143,11 +190,14 @@ StringImpl* AtomicString::add(const UChar* s, int length)
if (length == 0)
return StringImpl::empty();
- UCharBuffer buf = {s, length};
- return *stringTable->add<UCharBuffer, UCharBufferTranslator>(buf).first;
+ UCharBuffer buf = { s, length };
+ pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable()->add<UCharBuffer, UCharBufferTranslator>(buf);
+ if (!addResult.second)
+ return *addResult.first;
+ return adoptRef(*addResult.first);
}
-StringImpl* AtomicString::add(const UChar* s)
+PassRefPtr<StringImpl> AtomicString::add(const UChar* s)
{
if (!s)
return 0;
@@ -160,10 +210,13 @@ StringImpl* AtomicString::add(const UChar* s)
return StringImpl::empty();
UCharBuffer buf = {s, length};
- return *stringTable->add<UCharBuffer, UCharBufferTranslator>(buf).first;
+ pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable()->add<UCharBuffer, UCharBufferTranslator>(buf);
+ if (!addResult.second)
+ return *addResult.first;
+ return adoptRef(*addResult.first);
}
-StringImpl* AtomicString::add(StringImpl* r)
+PassRefPtr<StringImpl> AtomicString::add(StringImpl* r)
{
if (!r || r->m_inTable)
return r;
@@ -171,7 +224,7 @@ StringImpl* AtomicString::add(StringImpl* r)
if (r->length() == 0)
return StringImpl::empty();
- StringImpl* result = *stringTable->add(r).first;
+ StringImpl* result = *stringTable()->add(r).first;
if (result == r)
r->m_inTable = true;
return result;
@@ -179,37 +232,65 @@ StringImpl* AtomicString::add(StringImpl* r)
void AtomicString::remove(StringImpl* r)
{
- stringTable->remove(r);
+ stringTable()->remove(r);
}
-StringImpl* AtomicString::add(const KJS::Identifier& str)
+#if USE(JSC)
+PassRefPtr<StringImpl> AtomicString::add(const JSC::Identifier& identifier)
{
- return add(reinterpret_cast<const UChar*>(str.data()), str.size());
-}
+ if (identifier.isNull())
+ return 0;
-StringImpl* AtomicString::add(const KJS::UString& str)
-{
- return add(reinterpret_cast<const UChar*>(str.data()), str.size());
-}
+ UString::Rep* string = identifier.ustring().rep();
+ unsigned length = string->size();
+ if (!length)
+ return StringImpl::empty();
-AtomicString::operator Identifier() const
-{
- return m_string;
+ HashAndCharacters buffer = { string->computedHash(), string->data(), length };
+ pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable()->add<HashAndCharacters, HashAndCharactersTranslator>(buffer);
+ if (!addResult.second)
+ return *addResult.first;
+ return adoptRef(*addResult.first);
}
-AtomicString::operator UString() const
+PassRefPtr<StringImpl> AtomicString::add(const JSC::UString& ustring)
{
- return m_string;
+ if (ustring.isNull())
+ return 0;
+
+ UString::Rep* string = ustring.rep();
+ unsigned length = string->size();
+ if (!length)
+ return StringImpl::empty();
+
+ HashAndCharacters buffer = { string->hash(), string->data(), length };
+ pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable()->add<HashAndCharacters, HashAndCharactersTranslator>(buffer);
+ if (!addResult.second)
+ return *addResult.first;
+ return adoptRef(*addResult.first);
}
+#endif
-AtomicString::AtomicString(const DeprecatedString& s)
- : m_string(add(reinterpret_cast<const UChar*>(s.unicode()), s.length()))
+AtomicStringImpl* AtomicString::find(const JSC::Identifier& identifier)
{
+ if (identifier.isNull())
+ return 0;
+
+ UString::Rep* string = identifier.ustring().rep();
+ unsigned length = string->size();
+ if (!length)
+ return static_cast<AtomicStringImpl*>(StringImpl::empty());
+
+ HashAndCharacters buffer = { string->computedHash(), string->data(), length };
+ HashSet<StringImpl*>::iterator iterator = stringTable()->find<HashAndCharacters, HashAndCharactersTranslator>(buffer);
+ if (iterator == stringTable()->end())
+ return 0;
+ return static_cast<AtomicStringImpl*>(*iterator);
}
-DeprecatedString AtomicString::deprecatedString() const
+AtomicString::operator UString() const
{
- return m_string.deprecatedString();
+ return m_string;
}
DEFINE_GLOBAL(AtomicString, nullAtom)
@@ -222,7 +303,14 @@ void AtomicString::init()
{
static bool initialized;
if (!initialized) {
- stringTable = new HashSet<StringImpl*>;
+ // Initialization is not thread safe, so this function must be called from the main thread first.
+ ASSERT(isMainThread());
+
+#if ENABLE(WORKERS)
+ staticStringTable = new ThreadSpecific<HashSet<StringImpl*> >;
+#else
+ staticStringTable = new HashSet<StringImpl*>;
+#endif
// Use placement new to initialize the globals.
new ((void*)&nullAtom) AtomicString;
diff --git a/WebCore/platform/text/AtomicString.h b/WebCore/platform/text/AtomicString.h
index 4a0bb5b..ad034d9 100644
--- a/WebCore/platform/text/AtomicString.h
+++ b/WebCore/platform/text/AtomicString.h
@@ -1,7 +1,5 @@
/*
- * This file is part of the DOM implementation for KDE.
- *
- * Copyright (C) 2004, 2005, 2006 Apple Computer, Inc.
+ * Copyright (C) 2004, 2005, 2006, 2008 Apple Inc. All rights reserved.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
@@ -28,6 +26,8 @@
namespace WebCore {
+struct AtomicStringHash;
+
class AtomicString {
public:
static void init();
@@ -36,17 +36,28 @@ public:
AtomicString(const char* s) : m_string(add(s)) { }
AtomicString(const UChar* s, int length) : m_string(add(s, length)) { }
AtomicString(const UChar* s) : m_string(add(s)) { }
- AtomicString(const KJS::UString& s) : m_string(add(s)) { }
- AtomicString(const KJS::Identifier& s) : m_string(add(s)) { }
+#if USE(JSC)
+ AtomicString(const JSC::UString& s) : m_string(add(s)) { }
+ AtomicString(const JSC::Identifier& s) : m_string(add(s)) { }
+#endif
AtomicString(StringImpl* imp) : m_string(add(imp)) { }
AtomicString(AtomicStringImpl* imp) : m_string(imp) { }
AtomicString(const String& s) : m_string(add(s.impl())) { }
+ // Hash table deleted values, which are only constructed and never copied or destroyed.
+ AtomicString(WTF::HashTableDeletedValueType) : m_string(WTF::HashTableDeletedValue) { }
+ bool isHashTableDeletedValue() const { return m_string.isHashTableDeletedValue(); }
+
+#if USE(JSC)
+ static AtomicStringImpl* find(const JSC::Identifier&);
+#endif
+
operator const String&() const { return m_string; }
- const String& domString() const { return m_string; };
+ const String& string() const { return m_string; };
- operator KJS::Identifier() const;
- operator KJS::UString() const;
+#if USE(JSC)
+ operator JSC::UString() const;
+#endif
AtomicStringImpl* impl() const { return static_cast<AtomicStringImpl *>(m_string.impl()); }
@@ -57,23 +68,21 @@ public:
bool contains(UChar c) const { return m_string.contains(c); }
bool contains(const AtomicString& s, bool caseSensitive = true) const
- { return m_string.contains(s.domString(), caseSensitive); }
+ { return m_string.contains(s.string(), caseSensitive); }
int find(UChar c, int start = 0) const { return m_string.find(c, start); }
int find(const AtomicString& s, int start = 0, bool caseSentitive = true) const
- { return m_string.find(s.domString(), start, caseSentitive); }
+ { return m_string.find(s.string(), start, caseSentitive); }
bool startsWith(const AtomicString& s, bool caseSensitive = true) const
- { return m_string.startsWith(s.domString(), caseSensitive); }
+ { return m_string.startsWith(s.string(), caseSensitive); }
bool endsWith(const AtomicString& s, bool caseSensitive = true) const
- { return m_string.endsWith(s.domString(), caseSensitive); }
+ { return m_string.endsWith(s.string(), caseSensitive); }
int toInt(bool* ok = 0) const { return m_string.toInt(ok); }
double toDouble(bool* ok = 0) const { return m_string.toDouble(ok); }
float toFloat(bool* ok = 0) const { return m_string.toFloat(ok); }
bool percentage(int& p) const { return m_string.percentage(p); }
- Length* toLengthArray(int& len) const { return m_string.toLengthArray(len); }
- Length* toCoordsArray(int& len) const { return m_string.toCoordsArray(len); }
bool isNull() const { return m_string.isNull(); }
bool isEmpty() const { return m_string.isEmpty(); }
@@ -93,18 +102,17 @@ public:
operator QString() const { return m_string; }
#endif
- AtomicString(const DeprecatedString&);
- DeprecatedString deprecatedString() const;
-
private:
String m_string;
- static StringImpl* add(const char*);
- static StringImpl* add(const UChar*, int length);
- static StringImpl* add(const UChar*);
- static StringImpl* add(StringImpl*);
- static StringImpl* add(const KJS::UString&);
- static StringImpl* add(const KJS::Identifier&);
+ static PassRefPtr<StringImpl> add(const char*);
+ static PassRefPtr<StringImpl> add(const UChar*, int length);
+ static PassRefPtr<StringImpl> add(const UChar*);
+ static PassRefPtr<StringImpl> add(StringImpl*);
+#if USE(JSC)
+ static PassRefPtr<StringImpl> add(const JSC::UString&);
+ static PassRefPtr<StringImpl> add(const JSC::Identifier&);
+#endif
};
inline bool operator==(const AtomicString& a, const AtomicString& b) { return a.impl() == b.impl(); }
@@ -126,6 +134,7 @@ inline bool equalIgnoringCase(const char* a, const AtomicString& b) { return equ
inline bool equalIgnoringCase(const String& a, const AtomicString& b) { return equalIgnoringCase(a.impl(), b.impl()); }
// Define external global variables for the commonly used atomic strings.
+// These are only usable from the main thread.
#ifndef ATOMICSTRING_HIDE_GLOBALS
extern const AtomicString nullAtom;
extern const AtomicString emptyAtom;
@@ -136,4 +145,15 @@ inline bool equalIgnoringCase(const String& a, const AtomicString& b) { return e
} // namespace WebCore
+
+namespace WTF {
+
+ // AtomicStringHash is the default hash for AtomicString
+ template<typename T> struct DefaultHash;
+ template<> struct DefaultHash<WebCore::AtomicString> {
+ typedef WebCore::AtomicStringHash Hash;
+ };
+
+} // namespace WTF
+
#endif // AtomicString_h
diff --git a/WebCore/platform/text/AtomicStringHash.h b/WebCore/platform/text/AtomicStringHash.h
new file mode 100644
index 0000000..67a45de
--- /dev/null
+++ b/WebCore/platform/text/AtomicStringHash.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
+ * its contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef AtomicStringHash_h
+#define AtomicStringHash_h
+
+#include "AtomicString.h"
+#include <wtf/HashTraits.h>
+
+namespace WebCore {
+
+ struct AtomicStringHash {
+ static unsigned hash(const AtomicString& key)
+ {
+ return key.impl()->existingHash();
+ }
+
+ static bool equal(const AtomicString& a, const AtomicString& b)
+ {
+ return a == b;
+ }
+
+ static const bool safeToCompareToEmptyOrDeleted = false;
+ };
+
+}
+
+namespace WTF {
+
+ // WebCore::AtomicStringHash is the default hash for AtomicString
+ template<> struct HashTraits<WebCore::AtomicString> : GenericHashTraits<WebCore::AtomicString> {
+ static const bool emptyValueIsZero = true;
+ static void constructDeletedValue(WebCore::AtomicString& slot) { new (&slot) WebCore::AtomicString(HashTableDeletedValue); }
+ static bool isDeletedValue(const WebCore::AtomicString& slot) { return slot.isHashTableDeletedValue(); }
+ };
+
+}
+
+#endif
diff --git a/WebCore/platform/text/BidiResolver.h b/WebCore/platform/text/BidiResolver.h
index d2515a9..ffd3d51 100644
--- a/WebCore/platform/text/BidiResolver.h
+++ b/WebCore/platform/text/BidiResolver.h
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2000 Lars Knoll (knoll@kde.org)
- * Copyright (C) 2003, 2004, 2006, 2007 Apple Inc. All right reserved.
+ * Copyright (C) 2003, 2004, 2006, 2007, 2008 Apple Inc. All right reserved.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
@@ -23,7 +23,9 @@
#define BidiResolver_h
#include "BidiContext.h"
+#include <wtf/Noncopyable.h>
#include <wtf/PassRefPtr.h>
+#include <wtf/Vector.h>
namespace WebCore {
@@ -85,6 +87,8 @@ struct BidiCharacterRun {
}
}
+ void destroy() { delete this; }
+
int start() const { return m_start; }
int stop() const { return m_stop; }
unsigned char level() const { return m_level; }
@@ -100,19 +104,24 @@ struct BidiCharacterRun {
BidiCharacterRun* m_next;
};
-template <class Iterator, class Run> class BidiResolver {
+template <class Iterator, class Run> class BidiResolver : public Noncopyable {
public :
BidiResolver()
: m_direction(WTF::Unicode::OtherNeutral)
- , m_adjustEmbedding(false)
, reachedEndOfLine(false)
, emptyRun(true)
, m_firstRun(0)
, m_lastRun(0)
+ , m_logicallyLastRun(0)
, m_runCount(0)
{
}
+ const Iterator& position() const { return current; }
+ void setPosition(const Iterator& position) { current = position; }
+
+ void increment() { current.increment(); }
+
BidiContext* context() const { return m_status.context.get(); }
void setContext(PassRefPtr<BidiContext> c) { m_status.context = c; }
@@ -126,22 +135,27 @@ public :
const BidiStatus& status() const { return m_status; }
void setStatus(const BidiStatus s) { m_status = s; }
- bool adjustEmbedding() const { return m_adjustEmbedding; }
- void setAdjustEmbedding(bool adjsutEmbedding) { m_adjustEmbedding = adjsutEmbedding; }
-
void embed(WTF::Unicode::Direction);
- void createBidiRunsForLine(const Iterator& start, const Iterator& end, bool visualOrder = false, bool hardLineBreak = false);
+ void commitExplicitEmbedding();
+
+ void createBidiRunsForLine(const Iterator& end, bool visualOrder = false, bool hardLineBreak = false);
Run* firstRun() const { return m_firstRun; }
Run* lastRun() const { return m_lastRun; }
- int runCount() const { return m_runCount; }
+ Run* logicallyLastRun() const { return m_logicallyLastRun; }
+ unsigned runCount() const { return m_runCount; }
void addRun(Run*);
+ void prependRun(Run*);
+
+ void moveRunToEnd(Run*);
+ void moveRunToBeginning(Run*);
+
void deleteRuns();
protected:
void appendRun();
- void reverseRuns(int start, int end);
+ void reverseRuns(unsigned start, unsigned end);
Iterator current;
Iterator sor;
@@ -149,7 +163,6 @@ protected:
Iterator last;
BidiStatus m_status;
WTF::Unicode::Direction m_direction;
- bool m_adjustEmbedding;
Iterator endOfLine;
bool reachedEndOfLine;
Iterator lastBeforeET;
@@ -157,25 +170,96 @@ protected:
Run* m_firstRun;
Run* m_lastRun;
- int m_runCount;
+ Run* m_logicallyLastRun;
+ unsigned m_runCount;
+
+private:
+ void raiseExplicitEmbeddingLevel(WTF::Unicode::Direction from, WTF::Unicode::Direction to);
+ void lowerExplicitEmbeddingLevel(WTF::Unicode::Direction from);
+
+ Vector<WTF::Unicode::Direction, 8> m_currentExplicitEmbeddingSequence;
};
template <class Iterator, class Run>
-void BidiResolver<Iterator, Run>::appendRun()
+inline void BidiResolver<Iterator, Run>::addRun(Run* run)
{
- if (emptyRun || eor.atEnd())
- return;
-
- Run* bidiRun = new Run(sor.offset(), eor.offset() + 1, context(), m_direction);
if (!m_firstRun)
- m_firstRun = bidiRun;
+ m_firstRun = run;
+ else
+ m_lastRun->m_next = run;
+ m_lastRun = run;
+ m_runCount++;
+}
+
+template <class Iterator, class Run>
+inline void BidiResolver<Iterator, Run>::prependRun(Run* run)
+{
+ ASSERT(!run->m_next);
+
+ if (!m_lastRun)
+ m_lastRun = run;
else
- m_lastRun->m_next = bidiRun;
- m_lastRun = bidiRun;
+ run->m_next = m_firstRun;
+ m_firstRun = run;
m_runCount++;
+}
+
+template <class Iterator, class Run>
+inline void BidiResolver<Iterator, Run>::moveRunToEnd(Run* run)
+{
+ ASSERT(m_firstRun);
+ ASSERT(m_lastRun);
+ ASSERT(run->m_next);
+
+ Run* current = 0;
+ Run* next = m_firstRun;
+ while (next != run) {
+ current = next;
+ next = current->next();
+ }
+
+ if (!current)
+ m_firstRun = run->next();
+ else
+ current->m_next = run->m_next;
+
+ run->m_next = 0;
+ m_lastRun->m_next = run;
+ m_lastRun = run;
+}
+
+template <class Iterator, class Run>
+inline void BidiResolver<Iterator, Run>::moveRunToBeginning(Run* run)
+{
+ ASSERT(m_firstRun);
+ ASSERT(m_lastRun);
+ ASSERT(run != m_firstRun);
+
+ Run* current = m_firstRun;
+ Run* next = current->next();
+ while (next != run) {
+ current = next;
+ next = current->next();
+ }
+
+ current->m_next = run->m_next;
+ if (run == m_lastRun)
+ m_lastRun = current;
+
+ run->m_next = m_firstRun;
+ m_firstRun = run;
+}
+
+template <class Iterator, class Run>
+void BidiResolver<Iterator, Run>::appendRun()
+{
+ if (!emptyRun && !eor.atEnd()) {
+ addRun(new Run(sor.offset(), eor.offset() + 1, context(), m_direction));
+
+ eor.increment();
+ sor = eor;
+ }
- eor.increment(*this);
- sor = eor;
m_direction = WTF::Unicode::OtherNeutral;
m_status.eor = WTF::Unicode::OtherNeutral;
}
@@ -185,122 +269,144 @@ void BidiResolver<Iterator, Run>::embed(WTF::Unicode::Direction d)
{
using namespace WTF::Unicode;
- bool b = m_adjustEmbedding;
- m_adjustEmbedding = false;
- if (d == PopDirectionalFormat) {
- BidiContext* c = context()->parent();
- if (c) {
- if (!emptyRun && eor != last) {
- ASSERT(m_status.eor != OtherNeutral || eor.atEnd());
- // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last
- ASSERT(m_status.last == EuropeanNumberSeparator
- || m_status.last == EuropeanNumberTerminator
- || m_status.last == CommonNumberSeparator
- || m_status.last == BoundaryNeutral
- || m_status.last == BlockSeparator
- || m_status.last == SegmentSeparator
- || m_status.last == WhiteSpaceNeutral
- || m_status.last == OtherNeutral);
- if (m_direction == OtherNeutral)
- m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft;
- if (context()->dir() == LeftToRight) {
- // bidi.sor ... bidi.eor ... bidi.last L
- if (m_status.eor == EuropeanNumber) {
- if (m_status.lastStrong != LeftToRight) {
- m_direction = EuropeanNumber;
- appendRun();
- }
- } else if (m_status.eor == ArabicNumber) {
- m_direction = ArabicNumber;
- appendRun();
- } else if (m_status.lastStrong != LeftToRight) {
- if (context()->dir() == RightToLeft)
- m_direction = RightToLeft;
- else {
- appendRun();
- m_direction = LeftToRight;
- }
- }
- } else if (m_status.eor == EuropeanNumber || m_status.eor == ArabicNumber || m_status.lastStrong == LeftToRight) {
+ ASSERT(d == PopDirectionalFormat || d == LeftToRightEmbedding || d == LeftToRightOverride || d == RightToLeftEmbedding || d == RightToLeftOverride);
+ m_currentExplicitEmbeddingSequence.append(d);
+}
+
+template <class Iterator, class Run>
+void BidiResolver<Iterator, Run>::lowerExplicitEmbeddingLevel(WTF::Unicode::Direction from)
+{
+ using namespace WTF::Unicode;
+
+ if (!emptyRun && eor != last) {
+ ASSERT(m_status.eor != OtherNeutral || eor.atEnd());
+ // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last
+ ASSERT(m_status.last == EuropeanNumberSeparator
+ || m_status.last == EuropeanNumberTerminator
+ || m_status.last == CommonNumberSeparator
+ || m_status.last == BoundaryNeutral
+ || m_status.last == BlockSeparator
+ || m_status.last == SegmentSeparator
+ || m_status.last == WhiteSpaceNeutral
+ || m_status.last == OtherNeutral);
+ if (m_direction == OtherNeutral)
+ m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft;
+ if (from == LeftToRight) {
+ // bidi.sor ... bidi.eor ... bidi.last L
+ if (m_status.eor == EuropeanNumber) {
+ if (m_status.lastStrong != LeftToRight) {
+ m_direction = EuropeanNumber;
appendRun();
- m_direction = RightToLeft;
}
- eor = last;
+ } else if (m_status.eor == ArabicNumber) {
+ m_direction = ArabicNumber;
+ appendRun();
+ } else if (m_status.lastStrong != LeftToRight) {
+ appendRun();
+ m_direction = LeftToRight;
}
+ } else if (m_status.eor == EuropeanNumber || m_status.eor == ArabicNumber || m_status.lastStrong == LeftToRight) {
appendRun();
- emptyRun = true;
- // sor for the new run is determined by the higher level (rule X10)
- setLastDir(context()->dir());
- setLastStrongDir(context()->dir());
- setContext(c);
- eor = Iterator();
- }
- } else {
- Direction runDir;
- if (d == RightToLeftEmbedding || d == RightToLeftOverride)
- runDir = RightToLeft;
- else
- runDir = LeftToRight;
- bool override = d == LeftToRightOverride || d == RightToLeftOverride;
-
- unsigned char level = context()->level();
- if (runDir == RightToLeft) {
- if (level % 2) // we have an odd level
- level += 2;
- else
- level++;
- } else {
- if (level % 2) // we have an odd level
- level++;
- else
- level += 2;
+ m_direction = RightToLeft;
}
+ eor = last;
+ }
+ appendRun();
+ emptyRun = true;
+ // sor for the new run is determined by the higher level (rule X10)
+ setLastDir(from);
+ setLastStrongDir(from);
+ eor = Iterator();
+}
- if (level < 61) {
- if (!emptyRun && eor != last) {
- ASSERT(m_status.eor != OtherNeutral || eor.atEnd());
- // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last
- ASSERT(m_status.last == EuropeanNumberSeparator
- || m_status.last == EuropeanNumberTerminator
- || m_status.last == CommonNumberSeparator
- || m_status.last == BoundaryNeutral
- || m_status.last == BlockSeparator
- || m_status.last == SegmentSeparator
- || m_status.last == WhiteSpaceNeutral
- || m_status.last == OtherNeutral);
- if (m_direction == OtherNeutral)
- m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft;
- if (runDir == LeftToRight) {
- // bidi.sor ... bidi.eor ... bidi.last L
- if (m_status.eor == EuropeanNumber) {
- if (m_status.lastStrong != LeftToRight) {
- m_direction = EuropeanNumber;
- appendRun();
- }
- } else if (m_status.eor == ArabicNumber) {
- m_direction = ArabicNumber;
- appendRun();
- } else if (m_status.lastStrong != LeftToRight && context()->dir() == LeftToRight) {
- appendRun();
- m_direction = LeftToRight;
- }
- } else if (m_status.eor == ArabicNumber
- || m_status.eor == EuropeanNumber && (m_status.lastStrong != LeftToRight || context()->dir() == RightToLeft)
- || m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && context()->dir() == RightToLeft) {
+template <class Iterator, class Run>
+void BidiResolver<Iterator, Run>::raiseExplicitEmbeddingLevel(WTF::Unicode::Direction from, WTF::Unicode::Direction to)
+{
+ using namespace WTF::Unicode;
+
+ if (!emptyRun && eor != last) {
+ ASSERT(m_status.eor != OtherNeutral || eor.atEnd());
+ // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last
+ ASSERT(m_status.last == EuropeanNumberSeparator
+ || m_status.last == EuropeanNumberTerminator
+ || m_status.last == CommonNumberSeparator
+ || m_status.last == BoundaryNeutral
+ || m_status.last == BlockSeparator
+ || m_status.last == SegmentSeparator
+ || m_status.last == WhiteSpaceNeutral
+ || m_status.last == OtherNeutral);
+ if (m_direction == OtherNeutral)
+ m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft;
+ if (to == LeftToRight) {
+ // bidi.sor ... bidi.eor ... bidi.last L
+ if (m_status.eor == EuropeanNumber) {
+ if (m_status.lastStrong != LeftToRight) {
+ m_direction = EuropeanNumber;
appendRun();
- m_direction = RightToLeft;
}
- eor = last;
+ } else if (m_status.eor == ArabicNumber) {
+ m_direction = ArabicNumber;
+ appendRun();
+ } else if (m_status.lastStrong != LeftToRight && from == LeftToRight) {
+ appendRun();
+ m_direction = LeftToRight;
}
+ } else if (m_status.eor == ArabicNumber
+ || m_status.eor == EuropeanNumber && (m_status.lastStrong != LeftToRight || from == RightToLeft)
+ || m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && from == RightToLeft) {
appendRun();
- emptyRun = true;
- setContext(new BidiContext(level, runDir, override, context()));
- setLastDir(runDir);
- setLastStrongDir(runDir);
- eor = Iterator();
+ m_direction = RightToLeft;
+ }
+ eor = last;
+ }
+ appendRun();
+ emptyRun = true;
+ setLastDir(to);
+ setLastStrongDir(to);
+ eor = Iterator();
+}
+
+template <class Iterator, class Run>
+void BidiResolver<Iterator, Run>::commitExplicitEmbedding()
+{
+ using namespace WTF::Unicode;
+
+ unsigned char fromLevel = context()->level();
+ RefPtr<BidiContext> toContext = context();
+
+ for (size_t i = 0; i < m_currentExplicitEmbeddingSequence.size(); ++i) {
+ Direction embedding = m_currentExplicitEmbeddingSequence[i];
+ if (embedding == PopDirectionalFormat) {
+ if (BidiContext* parentContext = toContext->parent())
+ toContext = parentContext;
+ } else {
+ Direction direction = (embedding == RightToLeftEmbedding || embedding == RightToLeftOverride) ? RightToLeft : LeftToRight;
+ bool override = embedding == LeftToRightOverride || embedding == RightToLeftOverride;
+ unsigned char level = toContext->level();
+ if (direction == RightToLeft) {
+ // Go to the least greater odd integer
+ level += 1;
+ level |= 1;
+ } else {
+ // Go to the least greater even integer
+ level += 2;
+ level &= ~1;
+ }
+ if (level < 61)
+ toContext = new BidiContext(level, direction, override, toContext.get());
}
}
- m_adjustEmbedding = b;
+
+ unsigned char toLevel = toContext->level();
+
+ if (toLevel > fromLevel)
+ raiseExplicitEmbeddingLevel(fromLevel % 2 ? RightToLeft : LeftToRight, toLevel % 2 ? RightToLeft : LeftToRight);
+ else if (toLevel < fromLevel)
+ lowerExplicitEmbeddingLevel(fromLevel % 2 ? RightToLeft : LeftToRight);
+
+ setContext(toContext);
+
+ m_currentExplicitEmbeddingSequence.clear();
}
template <class Iterator, class Run>
@@ -312,8 +418,8 @@ void BidiResolver<Iterator, Run>::deleteRuns()
Run* curr = m_firstRun;
while (curr) {
- Run* s = curr->m_next;
- delete curr;
+ Run* s = curr->next();
+ curr->destroy();
curr = s;
}
@@ -323,18 +429,18 @@ void BidiResolver<Iterator, Run>::deleteRuns()
}
template <class Iterator, class Run>
-void BidiResolver<Iterator, Run>::reverseRuns(int start, int end)
+void BidiResolver<Iterator, Run>::reverseRuns(unsigned start, unsigned end)
{
if (start >= end)
return;
- ASSERT(start >= 0 && end < m_runCount);
+ ASSERT(end < m_runCount);
// Get the item before the start of the runs to reverse and put it in
// |beforeStart|. |curr| should point to the first run to reverse.
Run* curr = m_firstRun;
Run* beforeStart = 0;
- int i = 0;
+ unsigned i = 0;
while (i < start) {
i++;
beforeStart = curr;
@@ -373,7 +479,7 @@ void BidiResolver<Iterator, Run>::reverseRuns(int start, int end)
}
template <class Iterator, class Run>
-void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& start, const Iterator& end, bool visualOrder, bool hardLineBreak)
+void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& end, bool visualOrder, bool hardLineBreak)
{
using namespace WTF::Unicode;
@@ -383,7 +489,6 @@ void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& start, c
eor = Iterator();
- current = start;
last = current;
bool pastEnd = false;
BidiResolver<Iterator, Run> stateAtEnd;
@@ -426,6 +531,7 @@ void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& start, c
case LeftToRightOverride:
case PopDirectionalFormat:
embed(dirCurrent);
+ commitExplicitEmbedding();
break;
// strong types
@@ -684,12 +790,11 @@ void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& start, c
}
appendRun();
}
+ current = end;
m_status = stateAtEnd.m_status;
- current = stateAtEnd.current;
sor = stateAtEnd.sor;
eor = stateAtEnd.eor;
last = stateAtEnd.last;
- m_adjustEmbedding = stateAtEnd.m_adjustEmbedding;
reachedEndOfLine = stateAtEnd.reachedEndOfLine;
lastBeforeET = stateAtEnd.lastBeforeET;
emptyRun = stateAtEnd.emptyRun;
@@ -747,11 +852,10 @@ void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& start, c
emptyRun = false;
}
- // this causes the operator ++ to open and close embedding levels as needed
- // for the CSS unicode-bidi property
- m_adjustEmbedding = true;
- current.increment(*this);
- m_adjustEmbedding = false;
+ increment();
+ if (!m_currentExplicitEmbeddingSequence.isEmpty())
+ commitExplicitEmbedding();
+
if (emptyRun && (dirCurrent == RightToLeftEmbedding
|| dirCurrent == LeftToRightEmbedding
|| dirCurrent == RightToLeftOverride
@@ -766,12 +870,20 @@ void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& start, c
if (!pastEnd && (current == end || current.atEnd())) {
if (emptyRun)
break;
- stateAtEnd = *this;
+ stateAtEnd.m_status = m_status;
+ stateAtEnd.sor = sor;
+ stateAtEnd.eor = eor;
+ stateAtEnd.last = last;
+ stateAtEnd.reachedEndOfLine = reachedEndOfLine;
+ stateAtEnd.lastBeforeET = lastBeforeET;
+ stateAtEnd.emptyRun = emptyRun;
endOfLine = last;
pastEnd = true;
}
}
+ m_logicallyLastRun = m_lastRun;
+
// reorder line according to run structure...
// do not reverse for visually ordered web sites
if (!visualOrder) {
@@ -796,22 +908,22 @@ void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& start, c
if (!(levelLow % 2))
levelLow++;
- int count = runCount() - 1;
+ unsigned count = runCount() - 1;
while (levelHigh >= levelLow) {
- int i = 0;
+ unsigned i = 0;
Run* currRun = firstRun();
while (i < count) {
while (i < count && currRun && currRun->m_level < levelHigh) {
i++;
currRun = currRun->next();
}
- int start = i;
+ unsigned start = i;
while (i <= count && currRun && currRun->m_level >= levelHigh) {
i++;
currRun = currRun->next();
}
- int end = i-1;
+ unsigned end = i - 1;
reverseRuns(start, end);
}
levelHigh--;
diff --git a/WebCore/platform/text/CString.cpp b/WebCore/platform/text/CString.cpp
index 4300b29..8e68628 100644
--- a/WebCore/platform/text/CString.cpp
+++ b/WebCore/platform/text/CString.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2003, 2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (C) 2003, 2006, 2008 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -26,7 +26,8 @@
#include "config.h"
#include "CString.h"
-#include "DeprecatedCString.h"
+
+using std::min;
namespace WebCore {
@@ -40,17 +41,12 @@ CString::CString(const char* str, unsigned length)
init(str, length);
}
-CString::CString(const DeprecatedCString& str)
-{
- init(str.data(), str.length());
-}
-
void CString::init(const char* str, unsigned length)
{
if (!str)
return;
- m_buffer = new CStringBuffer(length + 1);
+ m_buffer = CStringBuffer::create(length + 1);
memcpy(m_buffer->data(), str, length);
m_buffer->data()[length] = '\0';
}
@@ -72,16 +68,11 @@ unsigned CString::length() const
{
return m_buffer ? m_buffer->length() - 1 : 0;
}
-
-DeprecatedCString CString::deprecatedCString() const
-{
- return DeprecatedCString(data(), length() + 1);
-}
CString CString::newUninitialized(size_t length, char*& characterBuffer)
{
CString result;
- result.m_buffer = new CStringBuffer(length + 1);
+ result.m_buffer = CStringBuffer::create(length + 1);
char* bytes = result.m_buffer->data();
bytes[length] = '\0';
characterBuffer = bytes;
@@ -95,7 +86,7 @@ void CString::copyBufferIfNeeded()
int len = m_buffer->length();
RefPtr<CStringBuffer> m_temp = m_buffer;
- m_buffer = new CStringBuffer(len);
+ m_buffer = CStringBuffer::create(len);
memcpy(m_buffer->data(), m_temp->data(), len);
}
@@ -108,4 +99,17 @@ bool operator==(const CString& a, const CString& b)
return !strncmp(a.data(), b.data(), min(a.length(), b.length()));
}
+PassRefPtr<SharedBuffer> CString::releaseBuffer()
+{
+ if (!m_buffer)
+ return 0;
+
+ copyBufferIfNeeded();
+
+ RefPtr<SharedBuffer> result = m_buffer->releaseBuffer();
+ m_buffer = 0;
+ return result.release();
+}
+
+
}
diff --git a/WebCore/platform/text/CString.h b/WebCore/platform/text/CString.h
index bd1e06c..09f112f 100644
--- a/WebCore/platform/text/CString.h
+++ b/WebCore/platform/text/CString.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2003, 2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (C) 2003, 2006, 2008 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -26,23 +26,26 @@
#ifndef CString_h
#define CString_h
+#include "SharedBuffer.h"
+
+#include <wtf/PassRefPtr.h>
#include <wtf/RefCounted.h>
#include <wtf/Vector.h>
-using std::min;
-
namespace WebCore {
- class DeprecatedCString;
-
class CStringBuffer : public RefCounted<CStringBuffer> {
public:
- CStringBuffer(unsigned length) : m_vector(length) { }
+ static PassRefPtr<CStringBuffer> create(unsigned length) { return adoptRef(new CStringBuffer(length)); }
char* data() { return m_vector.data(); }
- unsigned length() const { return m_vector.size(); }
+ size_t length() const { return m_vector.size(); }
+
+ PassRefPtr<SharedBuffer> releaseBuffer() { return SharedBuffer::adoptVector(m_vector); }
private:
+ CStringBuffer(unsigned length) : m_vector(length) { }
+
Vector<char> m_vector;
};
@@ -60,9 +63,8 @@ namespace WebCore {
unsigned length() const;
bool isNull() const { return !m_buffer; }
-
- CString(const DeprecatedCString&);
- DeprecatedCString deprecatedCString() const;
+
+ PassRefPtr<SharedBuffer> releaseBuffer();
private:
void copyBufferIfNeeded();
@@ -73,6 +75,6 @@ namespace WebCore {
bool operator==(const CString& a, const CString& b);
inline bool operator!=(const CString& a, const CString& b) { return !(a == b); }
-}
+} // namespace WebCore
#endif // CString_h
diff --git a/WebCore/platform/text/CharacterNames.h b/WebCore/platform/text/CharacterNames.h
index 5b52479..f589a6c 100644
--- a/WebCore/platform/text/CharacterNames.h
+++ b/WebCore/platform/text/CharacterNames.h
@@ -39,6 +39,8 @@ namespace WebCore {
const UChar bullet = 0x2022;
const UChar horizontalEllipsis = 0x2026;
const UChar ideographicSpace = 0x3000;
+ const UChar ideographicComma = 0x3001;
+ const UChar ideographicFullStop = 0x3002;
const UChar leftToRightMark = 0x200E;
const UChar leftToRightEmbed = 0x202A;
const UChar leftToRightOverride = 0x202D;
@@ -46,6 +48,7 @@ namespace WebCore {
const UChar noBreakSpace = 0x00A0;
const UChar objectReplacementCharacter = 0xFFFC;
const UChar popDirectionalFormatting = 0x202C;
+ const UChar replacementCharacter = 0xFFFD;
const UChar rightToLeftMark = 0x200F;
const UChar rightToLeftEmbed = 0x202B;
const UChar rightToLeftOverride = 0x202E;
diff --git a/WebCore/platform/text/ParserUtilities.h b/WebCore/platform/text/ParserUtilities.h
new file mode 100644
index 0000000..3105214
--- /dev/null
+++ b/WebCore/platform/text/ParserUtilities.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2002, 2003 The Karbon Developers
+ * Copyright (C) 2006, 2007 Rob Buis <buis@kde.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef ParserUtilities_h
+#define ParserUtilities_h
+
+#include "PlatformString.h"
+
+namespace WebCore {
+
+ inline bool skipString(const UChar*& ptr, const UChar* end, const UChar* name, int length)
+ {
+ if (end - ptr < length)
+ return false;
+ if (memcmp(name, ptr, sizeof(UChar) * length))
+ return false;
+ ptr += length;
+ return true;
+ }
+
+ inline bool skipString(const UChar*& ptr, const UChar* end, const char* str)
+ {
+ int length = strlen(str);
+ if (end - ptr < length)
+ return false;
+ for (int i = 0; i < length; ++i) {
+ if (ptr[i] != str[i])
+ return false;
+ }
+ ptr += length;
+ return true;
+ }
+
+} // namspace WebCore
+
+#endif // ParserUtilities_h
diff --git a/WebCore/platform/text/PlatformString.h b/WebCore/platform/text/PlatformString.h
index f900513..73a44bd 100644
--- a/WebCore/platform/text/PlatformString.h
+++ b/WebCore/platform/text/PlatformString.h
@@ -27,23 +27,34 @@
#include "StringImpl.h"
-#if PLATFORM(CF)
+#include <wtf/PassRefPtr.h>
+
+#if USE(JSC)
+#include <kjs/identifier.h>
+#else
+// kjs/identifier.h includes HashMap.h. We explicitly include it in the case of
+// non-JSC builds to keep things consistent.
+#include <wtf/HashMap.h>
+#endif
+
+#if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN))
typedef const struct __CFString * CFStringRef;
#endif
#if PLATFORM(QT)
+QT_BEGIN_NAMESPACE
class QString;
+QT_END_NAMESPACE
#endif
#if PLATFORM(WX)
class wxString;
#endif
-
namespace WebCore {
class CString;
-class DeprecatedString;
+class SharedBuffer;
struct StringHash;
class String {
@@ -51,19 +62,28 @@ public:
String() { } // gives null string, distinguishable from an empty string
String(const UChar*, unsigned length);
String(const UChar*); // Specifically for null terminated UTF-16
- String(const KJS::Identifier&);
- String(const KJS::UString&);
+#if USE(JSC)
+ String(const JSC::Identifier&);
+ String(const JSC::UString&);
+#endif
String(const char*);
String(const char*, unsigned length);
String(StringImpl* i) : m_impl(i) { }
String(PassRefPtr<StringImpl> i) : m_impl(i) { }
String(RefPtr<StringImpl> i) : m_impl(i) { }
+ void swap(String& o) { m_impl.swap(o.m_impl); }
+
+ // Hash table deleted values, which are only constructed and never copied or destroyed.
+ String(WTF::HashTableDeletedValueType) : m_impl(WTF::HashTableDeletedValue) { }
+ bool isHashTableDeletedValue() const { return m_impl.isHashTableDeletedValue(); }
+
static String adopt(StringBuffer& buffer) { return StringImpl::adopt(buffer); }
static String adopt(Vector<UChar>& vector) { return StringImpl::adopt(vector); }
- operator KJS::Identifier() const;
- operator KJS::UString() const;
+#if USE(JSC)
+ operator JSC::UString() const;
+#endif
unsigned length() const;
const UChar* characters() const;
@@ -132,17 +152,24 @@ public:
static String format(const char *, ...) WTF_ATTRIBUTE_PRINTF(1, 2);
- Vector<String> split(const String& separator, bool allowEmptyEntries = false) const;
- Vector<String> split(UChar separator, bool allowEmptyEntries = false) const;
+ void split(const String& separator, Vector<String>& result) const;
+ void split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const;
+ void split(UChar separator, Vector<String>& result) const;
+ void split(UChar separator, bool allowEmptyEntries, Vector<String>& result) const;
+
+ int toIntStrict(bool* ok = 0, int base = 10) const;
+ unsigned toUIntStrict(bool* ok = 0, int base = 10) const;
+ int64_t toInt64Strict(bool* ok = 0, int base = 10) const;
+ uint64_t toUInt64Strict(bool* ok = 0, int base = 10) const;
int toInt(bool* ok = 0) const;
+ unsigned toUInt(bool* ok = 0) const;
int64_t toInt64(bool* ok = 0) const;
uint64_t toUInt64(bool* ok = 0) const;
double toDouble(bool* ok = 0) const;
float toFloat(bool* ok = 0) const;
- Length* toLengthArray(int& len) const;
- Length* toCoordsArray(int& len) const;
- bool percentage(int &_percentage) const;
+
+ bool percentage(int& percentage) const;
// Makes a deep copy. Helpful only if you need to use a String on another thread.
// Since the underlying StringImpl objects are immutable, there's no other reason
@@ -154,7 +181,7 @@ public:
StringImpl* impl() const { return m_impl.get(); }
-#if PLATFORM(CF)
+#if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN))
String(CFStringRef);
CFStringRef createCFString() const;
#endif
@@ -196,10 +223,7 @@ public:
// Determines the writing direction using the Unicode Bidi Algorithm rules P2 and P3.
WTF::Unicode::Direction defaultWritingDirection() const { return m_impl ? m_impl->defaultWritingDirection() : WTF::Unicode::LeftToRight; }
-
- String(const DeprecatedString&);
- DeprecatedString deprecatedString() const;
-
+
private:
RefPtr<StringImpl> m_impl;
};
@@ -222,13 +246,30 @@ inline bool equalIgnoringCase(const String& a, const String& b) { return equalIg
inline bool equalIgnoringCase(const String& a, const char* b) { return equalIgnoringCase(a.impl(), b); }
inline bool equalIgnoringCase(const char* a, const String& b) { return equalIgnoringCase(a, b.impl()); }
-bool operator==(const String& a, const DeprecatedString& b);
-inline bool operator==(const DeprecatedString& b, const String& a) { return a == b; }
-inline bool operator!=(const String& a, const DeprecatedString& b) { return !(a == b); }
-inline bool operator!=(const DeprecatedString& b, const String& a ) { return !(a == b); }
-
inline bool operator!(const String& str) { return str.isNull(); }
+inline void swap(String& a, String& b) { a.swap(b); }
+
+// String Operations
+
+bool charactersAreAllASCII(const UChar*, size_t);
+
+int charactersToIntStrict(const UChar*, size_t, bool* ok = 0, int base = 10);
+unsigned charactersToUIntStrict(const UChar*, size_t, bool* ok = 0, int base = 10);
+int64_t charactersToInt64Strict(const UChar*, size_t, bool* ok = 0, int base = 10);
+uint64_t charactersToUInt64Strict(const UChar*, size_t, bool* ok = 0, int base = 10);
+
+int charactersToInt(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
+unsigned charactersToUInt(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
+int64_t charactersToInt64(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
+uint64_t charactersToUInt64(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
+
+double charactersToDouble(const UChar*, size_t, bool* ok = 0);
+float charactersToFloat(const UChar*, size_t, bool* ok = 0);
+
+int find(const UChar*, size_t, UChar, int startPosition = 0);
+int reverseFind(const UChar*, size_t, UChar, int startPosition = -1);
+
#ifdef __OBJC__
// This is for situations in WebKit where the long standing behavior has been
// "nil if empty", so we try to maintain longstanding behavior for the sake of
@@ -236,8 +277,73 @@ inline bool operator!(const String& str) { return str.isNull(); }
inline NSString* nsStringNilIfEmpty(const String& str) { return str.isEmpty() ? nil : (NSString*)str; }
#endif
+inline bool charactersAreAllASCII(const UChar* characters, size_t length)
+{
+ UChar ored = 0;
+ for (size_t i = 0; i < length; ++i)
+ ored |= characters[i];
+ return !(ored & 0xFF80);
}
+inline int find(const UChar* characters, size_t length, UChar character, int startPosition)
+{
+ if (startPosition >= static_cast<int>(length))
+ return -1;
+ for (size_t i = startPosition; i < length; ++i) {
+ if (characters[i] == character)
+ return static_cast<int>(i);
+ }
+ return -1;
+}
+
+inline int reverseFind(const UChar* characters, size_t length, UChar character, int startPosition)
+{
+ if (startPosition >= static_cast<int>(length) || !length)
+ return -1;
+ if (startPosition < 0)
+ startPosition += static_cast<int>(length);
+ while (true) {
+ if (characters[startPosition] == character)
+ return startPosition;
+ if (!startPosition)
+ return -1;
+ startPosition--;
+ }
+ ASSERT_NOT_REACHED();
+ return -1;
+}
+
+inline void append(Vector<UChar>& vector, const String& string)
+{
+ vector.append(string.characters(), string.length());
+}
+
+inline void appendNumber(Vector<UChar>& vector, unsigned char number)
+{
+ int numberLength = number > 99 ? 3 : (number > 9 ? 2 : 1);
+ size_t vectorSize = vector.size();
+ vector.grow(vectorSize + numberLength);
+
+ switch (numberLength) {
+ case 3:
+ vector[vectorSize + 2] = number % 10 + '0';
+ number /= 10;
+
+ case 2:
+ vector[vectorSize + 1] = number % 10 + '0';
+ number /= 10;
+
+ case 1:
+ vector[vectorSize] = number % 10 + '0';
+ }
+}
+
+
+
+PassRefPtr<SharedBuffer> utf8Buffer(const String&);
+
+} // namespace WebCore
+
namespace WTF {
// StringHash is the default hash for String
diff --git a/WebCore/platform/text/RegularExpression.cpp b/WebCore/platform/text/RegularExpression.cpp
index 0c26d33..1b933ff 100644
--- a/WebCore/platform/text/RegularExpression.cpp
+++ b/WebCore/platform/text/RegularExpression.cpp
@@ -1,5 +1,6 @@
/*
- * Copyright (C) 2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (C) 2004, 2008 Apple Inc. All rights reserved.
+ * Copyright (C) 2008 Collabora Ltd.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -26,6 +27,7 @@
#include "config.h"
#include "RegularExpression.h"
+#include "PlatformString.h"
#include "Logging.h"
#include <wtf/RefCounted.h>
#include <pcre/pcre.h>
@@ -36,73 +38,47 @@ namespace WebCore {
const size_t maxSubstrings = 10;
const size_t maxOffsets = 3 * maxSubstrings;
-class RegularExpression::Private : public RefCounted<RegularExpression::Private>
-{
+class RegularExpression::Private : public RefCounted<Private> {
public:
- Private();
- Private(DeprecatedString pattern, bool caseSensitive, bool glob);
+ static PassRefPtr<Private> create() { return adoptRef(new Private); }
+ static PassRefPtr<Private> create(const String& pattern, bool caseSensitive) { return adoptRef(new Private(pattern, caseSensitive)); }
+
~Private();
- void compile(bool caseSensitive, bool glob);
+ void compile(bool caseSensitive);
- DeprecatedString pattern;
+ String pattern;
JSRegExp* regex;
- DeprecatedString lastMatchString;
+ String lastMatchString;
int lastMatchOffsets[maxOffsets];
int lastMatchCount;
int lastMatchPos;
int lastMatchLength;
+
+private:
+ Private();
+ Private(const String& pattern, bool caseSensitive);
};
-RegularExpression::Private::Private() : pattern("")
+RegularExpression::Private::Private()
+ : pattern("")
{
- compile(true, false);
+ compile(true);
}
-RegularExpression::Private::Private(DeprecatedString p, bool caseSensitive, bool glob) : pattern(p), lastMatchPos(-1), lastMatchLength(-1)
+RegularExpression::Private::Private(const String& p, bool caseSensitive)
+ : pattern(p)
+ , lastMatchPos(-1)
+ , lastMatchLength(-1)
{
- compile(caseSensitive, glob);
+ compile(caseSensitive);
}
-static DeprecatedString RegExpFromGlob(DeprecatedString glob)
+void RegularExpression::Private::compile(bool caseSensitive)
{
- DeprecatedString result = glob;
-
- // escape regexp metacharacters which are NOT glob metacharacters
-
- result.replace(RegularExpression("\\\\"), "\\\\");
- result.replace(RegularExpression("\\."), "\\.");
- result.replace(RegularExpression("\\+"), "\\+");
- result.replace(RegularExpression("\\$"), "\\$");
- // FIXME: incorrect for ^ inside bracket group
- result.replace(RegularExpression("\\^"), "\\^");
-
- // translate glob metacharacters into regexp metacharacters
- result.replace(RegularExpression("\\*"), ".*");
- result.replace(RegularExpression("\\?"), ".");
-
- // Require the glob to match the whole string
- result = "^" + result + "$";
-
- return result;
-}
-
-void RegularExpression::Private::compile(bool caseSensitive, bool glob)
-{
- DeprecatedString p;
-
- if (glob) {
- p = RegExpFromGlob(pattern);
- } else {
- p = pattern;
- }
- // Note we don't honor the Qt syntax for various character classes. If we convert
- // to a different underlying engine, we may need to change client code that relies
- // on the regex syntax (see FrameMac.mm for a couple examples).
-
const char* errorMessage;
- regex = jsRegExpCompile(reinterpret_cast<const UChar*>(p.unicode()), p.length(),
+ regex = jsRegExpCompile(pattern.characters(), pattern.length(),
caseSensitive ? JSRegExpDoNotIgnoreCase : JSRegExpIgnoreCase, JSRegExpSingleLine,
0, &errorMessage);
if (!regex)
@@ -115,20 +91,24 @@ RegularExpression::Private::~Private()
}
-RegularExpression::RegularExpression() : d(new RegularExpression::Private())
+RegularExpression::RegularExpression()
+ : d(Private::create())
{
}
-RegularExpression::RegularExpression(const DeprecatedString &pattern, bool caseSensitive, bool glob) : d(new RegularExpression::Private(pattern, caseSensitive, glob))
+RegularExpression::RegularExpression(const String& pattern, bool caseSensitive)
+ : d(Private::create(pattern, caseSensitive))
{
}
-RegularExpression::RegularExpression(const char *cpattern) : d(new RegularExpression::Private(cpattern, true, false))
+RegularExpression::RegularExpression(const char* pattern)
+ : d(Private::create(pattern, true))
{
}
-RegularExpression::RegularExpression(const RegularExpression &re) : d (re.d)
+RegularExpression::RegularExpression(const RegularExpression& re)
+ : d(re.d)
{
}
@@ -136,57 +116,54 @@ RegularExpression::~RegularExpression()
{
}
-RegularExpression &RegularExpression::operator=(const RegularExpression &re)
+RegularExpression& RegularExpression::operator=(const RegularExpression& re)
{
RegularExpression tmp(re);
- RefPtr<RegularExpression::Private> tmpD = tmp.d;
-
- tmp.d = d;
- d = tmpD;
-
+ tmp.d.swap(d);
return *this;
}
-DeprecatedString RegularExpression::pattern() const
+String RegularExpression::pattern() const
{
return d->pattern;
}
-int RegularExpression::match(const DeprecatedString &str, int startFrom, int *matchLength) const
+int RegularExpression::match(const String& str, int startFrom, int* matchLength) const
{
+ if (str.isNull())
+ return -1;
+
d->lastMatchString = str;
// First 2 offsets are start and end offsets; 3rd entry is used internally by pcre
- d->lastMatchCount = jsRegExpExecute(d->regex, reinterpret_cast<const UChar*>(d->lastMatchString.unicode()), d->lastMatchString.length(), startFrom, d->lastMatchOffsets, maxOffsets);
+ d->lastMatchCount = jsRegExpExecute(d->regex, d->lastMatchString.characters(),
+ d->lastMatchString.length(), startFrom, d->lastMatchOffsets, maxOffsets);
if (d->lastMatchCount < 0) {
if (d->lastMatchCount != JSRegExpErrorNoMatch)
LOG_ERROR("RegularExpression: pcre_exec() failed with result %d", d->lastMatchCount);
d->lastMatchPos = -1;
d->lastMatchLength = -1;
- d->lastMatchString = DeprecatedString();
+ d->lastMatchString = String();
return -1;
}
// 1 means 1 match; 0 means more than one match. First match is recorded in offsets.
- //ASSERT(d->lastMatchCount < 2);
d->lastMatchPos = d->lastMatchOffsets[0];
d->lastMatchLength = d->lastMatchOffsets[1] - d->lastMatchOffsets[0];
- if (matchLength != NULL) {
+ if (matchLength)
*matchLength = d->lastMatchLength;
- }
return d->lastMatchPos;
}
-int RegularExpression::search(const DeprecatedString &str, int startFrom) const
+int RegularExpression::search(const String& str, int startFrom) const
{
- if (startFrom < 0) {
+ if (startFrom < 0)
startFrom = str.length() - startFrom;
- }
- return match(str, startFrom, NULL);
+ return match(str, startFrom, 0);
}
-int RegularExpression::searchRev(const DeprecatedString &str) const
+int RegularExpression::searchRev(const String& str) const
{
- // FIXME: Total hack for now. Search forward, return the last, greedy match
+ // FIXME: Total hack for now. Search forward, return the last, greedy match
int start = 0;
int pos;
int lastPos = -1;
@@ -195,7 +172,7 @@ int RegularExpression::searchRev(const DeprecatedString &str) const
int matchLength;
pos = match(str, start, &matchLength);
if (pos >= 0) {
- if ((pos+matchLength) > (lastPos+lastMatchLength)) {
+ if (pos + matchLength > lastPos + lastMatchLength) {
// replace last match if this one is later and not a subset of the last match
lastPos = pos;
lastMatchLength = matchLength;
@@ -219,4 +196,19 @@ int RegularExpression::matchedLength() const
return d->lastMatchLength;
}
+void replace(String& string, const RegularExpression& target, const String& replacement)
+{
+ int index = 0;
+ while (index < static_cast<int>(string.length())) {
+ int matchLength;
+ index = target.match(string, index, &matchLength);
+ if (index < 0)
+ break;
+ string.replace(index, matchLength, replacement);
+ index += replacement.length();
+ if (!matchLength)
+ break; // Avoid infinite loop on 0-length matches, e.g. [a-z]*
+ }
}
+
+} // namespace WebCore
diff --git a/WebCore/platform/text/RegularExpression.h b/WebCore/platform/text/RegularExpression.h
index ec1cdef..5d1991e 100644
--- a/WebCore/platform/text/RegularExpression.h
+++ b/WebCore/platform/text/RegularExpression.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2003 Apple Computer, Inc. All rights reserved.
+ * Copyright (C) 2003, 2008 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -26,34 +26,38 @@
#ifndef RegularExpression_h
#define RegularExpression_h
-#include "DeprecatedString.h"
+#include <wtf/RefPtr.h>
namespace WebCore {
+class String;
+
class RegularExpression {
public:
RegularExpression();
- RegularExpression(const DeprecatedString &, bool caseSensitive = false, bool glob = false);
- RegularExpression(const char *);
+ RegularExpression(const String&, bool caseSensitive = false);
+ RegularExpression(const char*);
~RegularExpression();
- RegularExpression(const RegularExpression &);
- RegularExpression &operator=(const RegularExpression &);
+ RegularExpression(const RegularExpression&);
+ RegularExpression& operator=(const RegularExpression&);
- DeprecatedString pattern() const;
- int match(const DeprecatedString &, int startFrom = 0, int *matchLength = 0) const;
+ String pattern() const;
+ int match(const String&, int startFrom = 0, int* matchLength = 0) const;
- int search(const DeprecatedString &, int startFrom = 0) const;
- int searchRev(const DeprecatedString &) const;
+ int search(const String&, int startFrom = 0) const;
+ int searchRev(const String&) const;
int pos(int n = 0);
int matchedLength() const;
-
+
private:
class Private;
RefPtr<Private> d;
};
-}
+void replace(String&, const RegularExpression&, const String&);
+
+} // namespace WebCore
-#endif
+#endif // RegularExpression_h
diff --git a/WebCore/platform/text/SegmentedString.cpp b/WebCore/platform/text/SegmentedString.cpp
index 0b3c7e9..9f5eb26 100644
--- a/WebCore/platform/text/SegmentedString.cpp
+++ b/WebCore/platform/text/SegmentedString.cpp
@@ -1,5 +1,5 @@
/*
- Copyright (C) 2004, 2005, 2006, 2007 Apple Inc. All rights reserved.
+ Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
@@ -59,10 +59,10 @@ unsigned SegmentedString::length() const
++length;
}
if (m_composite) {
- DeprecatedValueListConstIterator<SegmentedSubstring> i = m_substrings.begin();
- DeprecatedValueListConstIterator<SegmentedSubstring> e = m_substrings.end();
- for (; i != e; ++i)
- length += (*i).m_length;
+ Deque<SegmentedSubstring>::const_iterator it = m_substrings.begin();
+ Deque<SegmentedSubstring>::const_iterator e = m_substrings.end();
+ for (; it != e; ++it)
+ length += it->m_length;
}
return length;
}
@@ -70,10 +70,10 @@ unsigned SegmentedString::length() const
void SegmentedString::setExcludeLineNumbers()
{
if (m_composite) {
- DeprecatedValueListIterator<SegmentedSubstring> i = m_substrings.begin();
- DeprecatedValueListIterator<SegmentedSubstring> e = m_substrings.end();
- for (; i != e; ++i)
- (*i).setExcludeLineNumbers();
+ Deque<SegmentedSubstring>::iterator it = m_substrings.begin();
+ Deque<SegmentedSubstring>::iterator e = m_substrings.end();
+ for (; it != e; ++it)
+ it->setExcludeLineNumbers();
} else
m_currentString.setExcludeLineNumbers();
}
@@ -120,10 +120,10 @@ void SegmentedString::append(const SegmentedString &s)
ASSERT(!s.escaped());
append(s.m_currentString);
if (s.m_composite) {
- DeprecatedValueListConstIterator<SegmentedSubstring> i = s.m_substrings.begin();
- DeprecatedValueListConstIterator<SegmentedSubstring> e = s.m_substrings.end();
- for (; i != e; ++i)
- append(*i);
+ Deque<SegmentedSubstring>::const_iterator it = s.m_substrings.begin();
+ Deque<SegmentedSubstring>::const_iterator e = s.m_substrings.end();
+ for (; it != e; ++it)
+ append(*it);
}
m_currentChar = m_pushedChar1 ? &m_pushedChar1 : m_currentString.m_current;
}
@@ -133,10 +133,10 @@ void SegmentedString::prepend(const SegmentedString &s)
ASSERT(!escaped());
ASSERT(!s.escaped());
if (s.m_composite) {
- DeprecatedValueListConstIterator<SegmentedSubstring> i = s.m_substrings.fromLast();
- DeprecatedValueListConstIterator<SegmentedSubstring> e = s.m_substrings.end();
- for (; i != e; --i)
- prepend(*i);
+ Deque<SegmentedSubstring>::const_reverse_iterator it = s.m_substrings.rbegin();
+ Deque<SegmentedSubstring>::const_reverse_iterator e = s.m_substrings.rend();
+ for (; it != e; ++it)
+ prepend(*it);
}
prepend(s.m_currentString);
m_currentChar = m_pushedChar1 ? &m_pushedChar1 : m_currentString.m_current;
@@ -146,7 +146,7 @@ void SegmentedString::advanceSubstring()
{
if (m_composite) {
m_currentString = m_substrings.first();
- m_substrings.remove(m_substrings.begin());
+ m_substrings.removeFirst();
if (m_substrings.isEmpty())
m_composite = false;
} else {
@@ -164,10 +164,10 @@ String SegmentedString::toString() const
}
m_currentString.appendTo(result);
if (m_composite) {
- DeprecatedValueListConstIterator<SegmentedSubstring> i = m_substrings.begin();
- DeprecatedValueListConstIterator<SegmentedSubstring> e = m_substrings.end();
- for (; i != e; ++i)
- (*i).appendTo(result);
+ Deque<SegmentedSubstring>::const_iterator it = m_substrings.begin();
+ Deque<SegmentedSubstring>::const_iterator e = m_substrings.end();
+ for (; it != e; ++it)
+ it->appendTo(result);
}
return result;
}
diff --git a/WebCore/platform/text/SegmentedString.h b/WebCore/platform/text/SegmentedString.h
index 52178d3..79ed1f0 100644
--- a/WebCore/platform/text/SegmentedString.h
+++ b/WebCore/platform/text/SegmentedString.h
@@ -20,20 +20,22 @@
#ifndef SegmentedString_h
#define SegmentedString_h
-#include "DeprecatedValueList.h"
#include "PlatformString.h"
+#include <wtf/Deque.h>
namespace WebCore {
class SegmentedString;
class SegmentedSubstring {
-private:
- friend class SegmentedString;
-
+public:
SegmentedSubstring() : m_length(0), m_current(0), m_doNotExcludeLineNumbers(true) {}
- SegmentedSubstring(const String& str) : m_string(str), m_length(str.length()), m_doNotExcludeLineNumbers(true) {
- m_current = m_length == 0 ? 0 : m_string.characters();
+ SegmentedSubstring(const String& str)
+ : m_length(str.length())
+ , m_current(str.isEmpty() ? 0 : str.characters())
+ , m_string(str)
+ , m_doNotExcludeLineNumbers(true)
+ {
}
SegmentedSubstring(const UChar* str, int length) : m_length(length), m_current(length == 0 ? 0 : str), m_doNotExcludeLineNumbers(true) {}
@@ -45,7 +47,8 @@ private:
void setExcludeLineNumbers() { m_doNotExcludeLineNumbers = false; }
- void appendTo(String& str) const {
+ void appendTo(String& str) const
+ {
if (m_string.characters() == m_current) {
if (str.isEmpty())
str = m_string;
@@ -56,9 +59,12 @@ private:
}
}
- String m_string;
+public:
int m_length;
const UChar* m_current;
+
+private:
+ String m_string;
bool m_doNotExcludeLineNumbers;
};
@@ -77,8 +83,8 @@ public:
void clear();
- void append(const SegmentedString &);
- void prepend(const SegmentedString &);
+ void append(const SegmentedString&);
+ void prepend(const SegmentedString&);
bool excludeLineNumbers() const { return m_currentString.excludeLineNumbers(); }
void setExcludeLineNumbers();
@@ -149,8 +155,8 @@ public:
const UChar* operator->() const { return current(); }
private:
- void append(const SegmentedSubstring &);
- void prepend(const SegmentedSubstring &);
+ void append(const SegmentedSubstring&);
+ void prepend(const SegmentedSubstring&);
void advanceSlowCase();
void advanceSlowCase(int& lineNumber);
@@ -161,7 +167,7 @@ private:
UChar m_pushedChar2;
SegmentedSubstring m_currentString;
const UChar* m_currentChar;
- DeprecatedValueList<SegmentedSubstring> m_substrings;
+ Deque<SegmentedSubstring> m_substrings;
bool m_composite;
};
diff --git a/WebCore/platform/text/String.cpp b/WebCore/platform/text/String.cpp
index 967e7c8..44500e1 100644
--- a/WebCore/platform/text/String.cpp
+++ b/WebCore/platform/text/String.cpp
@@ -22,16 +22,25 @@
#include "PlatformString.h"
#include "CString.h"
-#include "DeprecatedString.h"
+#include "FloatConversion.h"
#include "StringBuffer.h"
#include "TextEncoding.h"
-#include <kjs/identifier.h>
+#include <kjs/dtoa.h>
+#include <limits>
+#include <stdarg.h>
+#include <wtf/ASCIICType.h>
#include <wtf/StringExtras.h>
#include <wtf/Vector.h>
-#include <stdarg.h>
+#include <wtf/unicode/Unicode.h>
+#include <wtf/unicode/UTF8.h>
+
+#if USE(JSC)
+using JSC::Identifier;
+using JSC::UString;
+#endif
-using KJS::Identifier;
-using KJS::UString;
+using namespace WTF;
+using namespace WTF::Unicode;
namespace WebCore {
@@ -54,13 +63,6 @@ String::String(const UChar* str)
m_impl = StringImpl::create(str, len);
}
-String::String(const DeprecatedString& str)
-{
- if (str.isNull())
- return;
- m_impl = StringImpl::create(reinterpret_cast<const UChar*>(str.unicode()), str.length());
-}
-
String::String(const char* str)
{
if (!str)
@@ -289,7 +291,7 @@ bool String::percentage(int& result) const
if ((*m_impl)[m_impl->length() - 1] != '%')
return false;
- result = DeprecatedConstString(reinterpret_cast<const DeprecatedChar*>(m_impl->characters()), m_impl->length() - 1).string().toInt();
+ result = charactersToIntStrict(m_impl->characters(), m_impl->length() - 1);
return true;
}
@@ -310,17 +312,21 @@ const UChar* String::charactersWithNullTermination()
return m_impl->characters();
}
-DeprecatedString String::deprecatedString() const
-{
- if (!m_impl)
- return DeprecatedString::null;
- if (!m_impl->characters())
- return DeprecatedString("", 0);
- return DeprecatedString(reinterpret_cast<const DeprecatedChar*>(m_impl->characters()), m_impl->length());
-}
-
String String::format(const char *format, ...)
{
+#if PLATFORM(QT)
+ // Use QString::vsprintf to avoid the locale dependent formatting of vsnprintf.
+ // https://bugs.webkit.org/show_bug.cgi?id=18994
+ va_list args;
+ va_start(args, format);
+
+ QString buffer;
+ buffer.vsprintf(format, args);
+
+ va_end(args);
+
+ return buffer;
+#else
va_list args;
va_start(args, format);
@@ -355,6 +361,7 @@ String String::format(const char *format, ...)
va_end(args);
return StringImpl::create(buffer.data(), len);
+#endif
}
String String::number(int n)
@@ -400,6 +407,46 @@ String String::number(double n)
return String::format("%.6lg", n);
}
+int String::toIntStrict(bool* ok, int base) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0;
+ }
+ return m_impl->toIntStrict(ok, base);
+}
+
+unsigned String::toUIntStrict(bool* ok, int base) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0;
+ }
+ return m_impl->toUIntStrict(ok, base);
+}
+
+int64_t String::toInt64Strict(bool* ok, int base) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0;
+ }
+ return m_impl->toInt64Strict(ok, base);
+}
+
+uint64_t String::toUInt64Strict(bool* ok, int base) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0;
+ }
+ return m_impl->toUInt64Strict(ok, base);
+}
+
int String::toInt(bool* ok) const
{
if (!m_impl) {
@@ -410,6 +457,16 @@ int String::toInt(bool* ok) const
return m_impl->toInt(ok);
}
+unsigned String::toUInt(bool* ok) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0;
+ }
+ return m_impl->toUInt(ok);
+}
+
int64_t String::toInt64(bool* ok) const
{
if (!m_impl) {
@@ -435,7 +492,7 @@ double String::toDouble(bool* ok) const
if (!m_impl) {
if (ok)
*ok = false;
- return 0;
+ return 0.0;
}
return m_impl->toDouble(ok);
}
@@ -462,38 +519,44 @@ bool String::isEmpty() const
return !m_impl || !m_impl->length();
}
-Length* String::toCoordsArray(int& len) const
+void String::split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const
{
- return m_impl ? m_impl->toCoordsArray(len) : 0;
+ result.clear();
+
+ int startPos = 0;
+ int endPos;
+ while ((endPos = find(separator, startPos)) != -1) {
+ if (allowEmptyEntries || startPos != endPos)
+ result.append(substring(startPos, endPos - startPos));
+ startPos = endPos + separator.length();
+ }
+ if (allowEmptyEntries || startPos != static_cast<int>(length()))
+ result.append(substring(startPos));
}
-Length* String::toLengthArray(int& len) const
+void String::split(const String& separator, Vector<String>& result) const
{
- return m_impl ? m_impl->toLengthArray(len) : 0;
+ return split(separator, false, result);
}
-Vector<String> String::split(const String& separator, bool allowEmptyEntries) const
+void String::split(UChar separator, bool allowEmptyEntries, Vector<String>& result) const
{
- Vector<String> result;
-
+ result.clear();
+
int startPos = 0;
int endPos;
while ((endPos = find(separator, startPos)) != -1) {
if (allowEmptyEntries || startPos != endPos)
result.append(substring(startPos, endPos - startPos));
- startPos = endPos + separator.length();
+ startPos = endPos + 1;
}
- if (allowEmptyEntries || startPos != (int)length())
+ if (allowEmptyEntries || startPos != static_cast<int>(length()))
result.append(substring(startPos));
-
- return result;
}
-Vector<String> String::split(UChar separator, bool allowEmptyEntries) const
+void String::split(UChar separator, Vector<String>& result) const
{
- Vector<String> result;
-
- return split(String(&separator, 1), allowEmptyEntries);
+ return split(String(&separator, 1), false, result);
}
#ifndef NDEBUG
@@ -514,61 +577,247 @@ Vector<char> String::ascii() const
CString String::latin1() const
{
- return Latin1Encoding().encode(characters(), length());
+ return Latin1Encoding().encode(characters(), length(), QuestionMarksForUnencodables);
}
CString String::utf8() const
{
- return UTF8Encoding().encode(characters(), length());
+ return UTF8Encoding().encode(characters(), length(), QuestionMarksForUnencodables);
}
String String::fromUTF8(const char* string, size_t size)
{
+ if (!string)
+ return String();
return UTF8Encoding().decode(string, size);
}
String String::fromUTF8(const char* string)
{
+ if (!string)
+ return String();
return UTF8Encoding().decode(string, strlen(string));
}
-
-bool operator==(const String& a, const DeprecatedString& b)
-{
- unsigned l = a.length();
- if (l != b.length())
- return false;
- if (!memcmp(a.characters(), b.unicode(), l * sizeof(UChar)))
- return true;
- return false;
-}
-
+#if USE(JSC)
String::String(const Identifier& str)
{
if (str.isNull())
return;
- m_impl = StringImpl::create(reinterpret_cast<const UChar*>(str.data()), str.size());
+ m_impl = StringImpl::create(str.data(), str.size());
}
String::String(const UString& str)
{
if (str.isNull())
return;
- m_impl = StringImpl::create(reinterpret_cast<const UChar*>(str.data()), str.size());
+ m_impl = StringImpl::create(str.data(), str.size());
}
-String::operator Identifier() const
+String::operator UString() const
{
if (!m_impl)
- return Identifier();
- return Identifier(reinterpret_cast<const KJS::UChar*>(m_impl->characters()), m_impl->length());
+ return UString();
+ return UString(m_impl->characters(), m_impl->length());
}
+#endif
-String::operator UString() const
+// String Operations
+
+static bool isCharacterAllowedInBase(UChar c, int base)
{
- if (!m_impl)
- return UString();
- return UString(reinterpret_cast<const KJS::UChar*>(m_impl->characters()), m_impl->length());
+ if (c > 0x7F)
+ return false;
+ if (isASCIIDigit(c))
+ return c - '0' < base;
+ if (isASCIIAlpha(c)) {
+ if (base > 36)
+ base = 36;
+ return (c >= 'a' && c < 'a' + base - 10)
+ || (c >= 'A' && c < 'A' + base - 10);
+ }
+ return false;
+}
+
+template <typename IntegralType>
+static inline IntegralType toIntegralType(const UChar* data, size_t length, bool* ok, int base)
+{
+ static const IntegralType integralMax = std::numeric_limits<IntegralType>::max();
+ static const bool isSigned = std::numeric_limits<IntegralType>::is_signed;
+ const IntegralType maxMultiplier = integralMax / base;
+
+ IntegralType value = 0;
+ bool isOk = false;
+ bool isNegative = false;
+
+ if (!data)
+ goto bye;
+
+ // skip leading whitespace
+ while (length && isSpaceOrNewline(*data)) {
+ length--;
+ data++;
+ }
+
+ if (isSigned && length && *data == '-') {
+ length--;
+ data++;
+ isNegative = true;
+ } else if (length && *data == '+') {
+ length--;
+ data++;
+ }
+
+ if (!length || !isCharacterAllowedInBase(*data, base))
+ goto bye;
+
+ while (length && isCharacterAllowedInBase(*data, base)) {
+ length--;
+ IntegralType digitValue;
+ UChar c = *data;
+ if (isASCIIDigit(c))
+ digitValue = c - '0';
+ else if (c >= 'a')
+ digitValue = c - 'a' + 10;
+ else
+ digitValue = c - 'A' + 10;
+
+ if (value > maxMultiplier || (value == maxMultiplier && digitValue > (integralMax % base) + isNegative))
+ goto bye;
+
+ value = base * value + digitValue;
+ data++;
+ }
+
+#if COMPILER(MSVC)
+#pragma warning(push, 0)
+#pragma warning(disable:4146)
+#endif
+
+ if (isNegative)
+ value = -value;
+
+#if COMPILER(MSVC)
+#pragma warning(pop)
+#endif
+
+ // skip trailing space
+ while (length && isSpaceOrNewline(*data)) {
+ length--;
+ data++;
+ }
+
+ if (!length)
+ isOk = true;
+bye:
+ if (ok)
+ *ok = isOk;
+ return isOk ? value : 0;
+}
+
+static unsigned lengthOfCharactersAsInteger(const UChar* data, size_t length)
+{
+ size_t i = 0;
+
+ // Allow leading spaces.
+ for (; i != length; ++i) {
+ if (!isSpaceOrNewline(data[i]))
+ break;
+ }
+
+ // Allow sign.
+ if (i != length && (data[i] == '+' || data[i] == '-'))
+ ++i;
+
+ // Allow digits.
+ for (; i != length; ++i) {
+ if (!isASCIIDigit(data[i]))
+ break;
+ }
+
+ return i;
+}
+
+int charactersToIntStrict(const UChar* data, size_t length, bool* ok, int base)
+{
+ return toIntegralType<int>(data, length, ok, base);
+}
+
+unsigned charactersToUIntStrict(const UChar* data, size_t length, bool* ok, int base)
+{
+ return toIntegralType<unsigned>(data, length, ok, base);
+}
+
+int64_t charactersToInt64Strict(const UChar* data, size_t length, bool* ok, int base)
+{
+ return toIntegralType<int64_t>(data, length, ok, base);
+}
+
+uint64_t charactersToUInt64Strict(const UChar* data, size_t length, bool* ok, int base)
+{
+ return toIntegralType<uint64_t>(data, length, ok, base);
+}
+
+int charactersToInt(const UChar* data, size_t length, bool* ok)
+{
+ return toIntegralType<int>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
+}
+
+unsigned charactersToUInt(const UChar* data, size_t length, bool* ok)
+{
+ return toIntegralType<unsigned>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
+}
+
+int64_t charactersToInt64(const UChar* data, size_t length, bool* ok)
+{
+ return toIntegralType<int64_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
+}
+
+uint64_t charactersToUInt64(const UChar* data, size_t length, bool* ok)
+{
+ return toIntegralType<uint64_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
+}
+
+double charactersToDouble(const UChar* data, size_t length, bool* ok)
+{
+ if (!length) {
+ if (ok)
+ *ok = false;
+ return 0.0;
+ }
+
+ Vector<char, 256> bytes(length + 1);
+ for (unsigned i = 0; i < length; ++i)
+ bytes[i] = data[i] < 0x7F ? data[i] : '?';
+ bytes[length] = '\0';
+ char* end;
+ double val = JSC::strtod(bytes.data(), &end);
+ if (ok)
+ *ok = (end == 0 || *end == '\0');
+ return val;
+}
+
+float charactersToFloat(const UChar* data, size_t length, bool* ok)
+{
+ // FIXME: This will return ok even when the string fits into a double but not a float.
+ return narrowPrecisionToFloat(charactersToDouble(data, length, ok));
+}
+
+PassRefPtr<SharedBuffer> utf8Buffer(const String& string)
+{
+ // Allocate a buffer big enough to hold all the characters.
+ const int length = string.length();
+ Vector<char> buffer(length * 3);
+
+ // Convert to runs of 8-bit characters.
+ char* p = buffer.data();
+ const UChar* d = string.characters();
+ ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), true);
+ if (result != conversionOK)
+ return 0;
+
+ buffer.shrink(p - buffer.data());
+ return SharedBuffer::adoptVector(buffer);
}
} // namespace WebCore
diff --git a/WebCore/platform/text/StringBuilder.cpp b/WebCore/platform/text/StringBuilder.cpp
new file mode 100644
index 0000000..0e9555c
--- /dev/null
+++ b/WebCore/platform/text/StringBuilder.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
+ * its contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "StringBuilder.h"
+
+#include "StringBuffer.h"
+
+namespace WebCore {
+
+void StringBuilder::append(const String& string)
+{
+ if (string.isNull())
+ return;
+
+ if (m_totalLength == UINT_MAX)
+ m_totalLength = string.length();
+ else
+ m_totalLength += string.length();
+
+ if (!string.isEmpty())
+ m_strings.append(string);
+}
+
+void StringBuilder::append(UChar c)
+{
+ if (m_totalLength == UINT_MAX)
+ m_totalLength = 1;
+ else
+ m_totalLength += 1;
+
+ m_strings.append(String(&c, 1));
+}
+
+void StringBuilder::append(char c)
+{
+ if (m_totalLength == UINT_MAX)
+ m_totalLength = 1;
+ else
+ m_totalLength += 1;
+
+ m_strings.append(String(&c, 1));
+}
+
+String StringBuilder::toString() const
+{
+ if (isNull())
+ return String();
+
+ unsigned count = m_strings.size();
+
+ if (!count)
+ return String(StringImpl::empty());
+ if (count == 1)
+ return m_strings[0];
+
+ StringBuffer buffer(m_totalLength);
+
+ UChar* p = buffer.characters();
+ for (unsigned i = 0; i < count; ++i) {
+ StringImpl* string = m_strings[i].impl();
+ unsigned length = string->length();
+ memcpy(p, string->characters(), length * 2);
+ p += length;
+ }
+
+ ASSERT(p == m_totalLength + buffer.characters());
+
+ return String::adopt(buffer);
+}
+
+}
diff --git a/WebCore/platform/text/StringBuilder.h b/WebCore/platform/text/StringBuilder.h
new file mode 100644
index 0000000..8d76b9c
--- /dev/null
+++ b/WebCore/platform/text/StringBuilder.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
+ * its contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef StringBuilder_h
+#define StringBuilder_h
+
+#include "PlatformString.h"
+
+namespace WebCore {
+
+ class StringBuilder {
+ public:
+ StringBuilder() : m_totalLength(UINT_MAX) {}
+
+ void setNonNull() { if (m_totalLength == UINT_MAX) m_totalLength = 0; }
+
+ void append(const String&);
+ void append(UChar);
+ void append(char);
+
+ String toString() const;
+
+ private:
+ bool isNull() const { return m_totalLength == UINT_MAX; }
+
+ unsigned m_totalLength;
+ Vector<String, 16> m_strings;
+ };
+
+}
+
+#endif
diff --git a/WebCore/platform/text/StringHash.h b/WebCore/platform/text/StringHash.h
index 375b2e4..c6e08a6 100644
--- a/WebCore/platform/text/StringHash.h
+++ b/WebCore/platform/text/StringHash.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2006, 2007 Apple Inc. All rights reserved
+ * Copyright (C) 2006, 2007, 2008 Apple Inc. All rights reserved
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
@@ -28,6 +28,10 @@
namespace WebCore {
+ // FIXME: We should really figure out a way to put the computeHash function that's
+ // currently a member function of StringImpl into this file so we can be a little
+ // closer to having all the nearly-identical hash functions in one place.
+
struct StringHash {
static unsigned hash(StringImpl* key) { return key->hash(); }
static bool equal(StringImpl* a, StringImpl* b)
@@ -78,17 +82,17 @@ namespace WebCore {
public:
// Paul Hsieh's SuperFastHash
// http://www.azillionmonkeys.com/qed/hash.html
- static unsigned hash(StringImpl* str)
+ static unsigned hash(const UChar* data, unsigned length)
{
- unsigned l = str->length();
- const UChar* s = str->characters();
+ unsigned l = length;
+ const UChar* s = data;
uint32_t hash = PHI;
uint32_t tmp;
int rem = l & 1;
l >>= 1;
- // Main loop
+ // Main loop.
for (; l > 0; l--) {
hash += WTF::Unicode::foldCase(s[0]);
tmp = (WTF::Unicode::foldCase(s[1]) << 11) ^ hash;
@@ -97,28 +101,32 @@ namespace WebCore {
hash += hash >> 11;
}
- // Handle end case
+ // Handle end case.
if (rem) {
hash += WTF::Unicode::foldCase(s[0]);
hash ^= hash << 11;
hash += hash >> 17;
}
- // Force "avalanching" of final 127 bits
+ // Force "avalanching" of final 127 bits.
hash ^= hash << 3;
hash += hash >> 5;
hash ^= hash << 2;
hash += hash >> 15;
hash ^= hash << 10;
- // this avoids ever returning a hash code of 0, since that is used to
+ // This avoids ever returning a hash code of 0, since that is used to
// signal "hash not computed yet", using a value that is likely to be
- // effectively the same as 0 when the low bits are masked
- if (hash == 0)
- hash = 0x80000000;
+ // effectively the same as 0 when the low bits are masked.
+ hash |= !hash << 31;
return hash;
}
+
+ static unsigned hash(StringImpl* str)
+ {
+ return hash(str->characters(), str->length());
+ }
static unsigned hash(const char* str, unsigned length)
{
@@ -160,8 +168,7 @@ namespace WebCore {
// this avoids ever returning a hash code of 0, since that is used to
// signal "hash not computed yet", using a value that is likely to be
// effectively the same as 0 when the low bits are masked
- if (hash == 0)
- hash = 0x80000000;
+ hash |= !hash << 31;
return hash;
}
@@ -200,48 +207,34 @@ namespace WebCore {
static const bool safeToCompareToEmptyOrDeleted = false;
};
+ // This hash can be used in cases where the key is a hash of a string, but we don't
+ // want to store the string. It's not really specific to string hashing, but all our
+ // current uses of it are for strings.
+ struct AlreadyHashed : IntHash<unsigned> {
+ static unsigned hash(unsigned key) { return key; }
+
+ // To use a hash value as a key for a hash table, we need to eliminate the
+ // "deleted" value, which is negative one. That could be done by changing
+ // the string hash function to never generate negative one, but this works
+ // and is still relatively efficient.
+ static unsigned avoidDeletedValue(unsigned hash)
+ {
+ ASSERT(hash);
+ unsigned newHash = hash | (!(hash + 1) << 31);
+ ASSERT(newHash);
+ ASSERT(newHash != 0xFFFFFFFF);
+ return newHash;
+ }
+ };
+
}
namespace WTF {
- // store WebCore::String as StringImpl*
-
template<> struct HashTraits<WebCore::String> : GenericHashTraits<WebCore::String> {
- typedef HashTraits<WebCore::StringImpl*>::StorageTraits StorageTraits;
- typedef StorageTraits::TraitType StorageType;
static const bool emptyValueIsZero = true;
- static const bool needsRef = true;
-
- typedef union {
- WebCore::StringImpl* m_p;
- StorageType m_s;
- } UnionType;
-
- static void ref(const StorageType& s) { ref(reinterpret_cast<const UnionType*>(&s)->m_p); }
- static void deref(const StorageType& s) { deref(reinterpret_cast<const UnionType*>(&s)->m_p); }
-
- static void ref(const WebCore::StringImpl* str) { if (str) const_cast<WebCore::StringImpl*>(str)->ref(); }
- static void deref(const WebCore::StringImpl* str) { if (str) const_cast<WebCore::StringImpl*>(str)->deref(); }
- };
-
- // share code between StringImpl*, RefPtr<StringImpl>, and String
-
- template<> struct HashKeyStorageTraits<WebCore::StringHash, HashTraits<RefPtr<WebCore::StringImpl> > > {
- typedef WebCore::StringHash Hash;
- typedef HashTraits<WebCore::StringImpl*> Traits;
- };
- template<> struct HashKeyStorageTraits<WebCore::StringHash, HashTraits<WebCore::String> > {
- typedef WebCore::StringHash Hash;
- typedef HashTraits<WebCore::StringImpl*> Traits;
- };
-
- template<> struct HashKeyStorageTraits<WebCore::CaseFoldingHash, HashTraits<RefPtr<WebCore::StringImpl> > > {
- typedef WebCore::CaseFoldingHash Hash;
- typedef HashTraits<WebCore::StringImpl*> Traits;
- };
- template<> struct HashKeyStorageTraits<WebCore::CaseFoldingHash, HashTraits<WebCore::String> > {
- typedef WebCore::CaseFoldingHash Hash;
- typedef HashTraits<WebCore::StringImpl*> Traits;
+ static void constructDeletedValue(WebCore::String& slot) { new (&slot) WebCore::String(HashTableDeletedValue); }
+ static bool isDeletedValue(const WebCore::String& slot) { return slot.isHashTableDeletedValue(); }
};
}
diff --git a/WebCore/platform/text/StringImpl.cpp b/WebCore/platform/text/StringImpl.cpp
index 0643de6..911c0dc 100644
--- a/WebCore/platform/text/StringImpl.cpp
+++ b/WebCore/platform/text/StringImpl.cpp
@@ -28,24 +28,18 @@
#include "AtomicString.h"
#include "CString.h"
#include "CharacterNames.h"
-#include "DeprecatedString.h"
#include "FloatConversion.h"
-#include "Length.h"
#include "StringBuffer.h"
#include "StringHash.h"
#include "TextBreakIterator.h"
#include "TextEncoding.h"
#include <kjs/dtoa.h>
-#include <kjs/identifier.h>
#include <wtf/Assertions.h>
#include <wtf/unicode/Unicode.h>
using namespace WTF;
using namespace Unicode;
-using KJS::Identifier;
-using KJS::UString;
-
namespace WebCore {
static inline UChar* newUCharVector(unsigned n)
@@ -60,21 +54,23 @@ static inline void deleteUCharVector(const UChar* p)
// This constructor is used only to create the empty string.
StringImpl::StringImpl()
- : RefCounted<StringImpl>(1)
- , m_length(0)
+ : m_length(0)
, m_data(0)
, m_hash(0)
, m_inTable(false)
, m_hasTerminatingNullCharacter(false)
{
+ // Ensure that the hash is computed so that AtomicStringHash can call existingHash()
+ // with impunity. The empty string is special because it is never entered into
+ // AtomicString's HashKey, but still needs to compare correctly.
+ hash();
}
// This is one of the most common constructors, but it's also used for the copy()
// operation. Because of that, it's the one constructor that doesn't assert the
// length is non-zero, since we support copying the empty string.
inline StringImpl::StringImpl(const UChar* characters, unsigned length)
- : RefCounted<StringImpl>(1)
- , m_length(length)
+ : m_length(length)
, m_hash(0)
, m_inTable(false)
, m_hasTerminatingNullCharacter(false)
@@ -85,8 +81,7 @@ inline StringImpl::StringImpl(const UChar* characters, unsigned length)
}
inline StringImpl::StringImpl(const StringImpl& str, WithTerminatingNullCharacter)
- : RefCounted<StringImpl>(1)
- , m_length(str.m_length)
+ : m_length(str.m_length)
, m_hash(str.m_hash)
, m_inTable(false)
, m_hasTerminatingNullCharacter(true)
@@ -98,8 +93,7 @@ inline StringImpl::StringImpl(const StringImpl& str, WithTerminatingNullCharacte
}
inline StringImpl::StringImpl(const char* characters, unsigned length)
- : RefCounted<StringImpl>(1)
- , m_length(length)
+ : m_length(length)
, m_hash(0)
, m_inTable(false)
, m_hasTerminatingNullCharacter(false)
@@ -116,8 +110,7 @@ inline StringImpl::StringImpl(const char* characters, unsigned length)
}
inline StringImpl::StringImpl(UChar* characters, unsigned length, AdoptBuffer)
- : RefCounted<StringImpl>(1)
- , m_length(length)
+ : m_length(length)
, m_data(characters)
, m_hash(0)
, m_inTable(false)
@@ -127,15 +120,9 @@ inline StringImpl::StringImpl(UChar* characters, unsigned length, AdoptBuffer)
ASSERT(length);
}
-// FIXME: These AtomicString constructors return objects with a refCount of 0,
-// even though the others return objects with a refCount of 1. That preserves
-// the historical behavior for the hash map translator call sites inside the
-// AtomicString code, but is it correct?
-
// This constructor is only for use by AtomicString.
StringImpl::StringImpl(const UChar* characters, unsigned length, unsigned hash)
- : RefCounted<StringImpl>(0)
- , m_length(length)
+ : m_length(length)
, m_hash(hash)
, m_inTable(true)
, m_hasTerminatingNullCharacter(false)
@@ -151,8 +138,7 @@ StringImpl::StringImpl(const UChar* characters, unsigned length, unsigned hash)
// This constructor is only for use by AtomicString.
StringImpl::StringImpl(const char* characters, unsigned length, unsigned hash)
- : RefCounted<StringImpl>(0)
- , m_length(length)
+ : m_length(length)
, m_hash(hash)
, m_inTable(true)
, m_hasTerminatingNullCharacter(false)
@@ -178,8 +164,8 @@ StringImpl::~StringImpl()
StringImpl* StringImpl::empty()
{
- static StringImpl e;
- return &e;
+ static StringImpl* e = new StringImpl;
+ return e;
}
bool StringImpl::containsOnlyWhitespace()
@@ -211,128 +197,6 @@ UChar32 StringImpl::characterStartingAt(unsigned i)
return 0;
}
-static Length parseLength(const UChar* data, unsigned length)
-{
- if (length == 0)
- return Length(1, Relative);
-
- unsigned i = 0;
- while (i < length && isSpaceOrNewline(data[i]))
- ++i;
- if (i < length && (data[i] == '+' || data[i] == '-'))
- ++i;
- while (i < length && Unicode::isDigit(data[i]))
- ++i;
-
- bool ok;
- int r = DeprecatedConstString(reinterpret_cast<const DeprecatedChar*>(data), i).string().toInt(&ok);
-
- /* Skip over any remaining digits, we are not that accurate (5.5% => 5%) */
- while (i < length && (Unicode::isDigit(data[i]) || data[i] == '.'))
- ++i;
-
- /* IE Quirk: Skip any whitespace (20 % => 20%) */
- while (i < length && isSpaceOrNewline(data[i]))
- ++i;
-
- if (ok) {
- if (i < length) {
- UChar next = data[i];
- if (next == '%')
- return Length(static_cast<double>(r), Percent);
- if (next == '*')
- return Length(r, Relative);
- }
- return Length(r, Fixed);
- } else {
- if (i < length) {
- UChar next = data[i];
- if (next == '*')
- return Length(1, Relative);
- if (next == '%')
- return Length(1, Relative);
- }
- }
- return Length(0, Relative);
-}
-
-Length StringImpl::toLength()
-{
- return parseLength(m_data, m_length);
-}
-
-static int countCharacter(StringImpl* string, UChar character)
-{
- int count = 0;
- int length = string->length();
- for (int i = 0; i < length; ++i)
- count += (*string)[i] == character;
- return count;
-}
-
-Length* StringImpl::toCoordsArray(int& len)
-{
- StringBuffer spacified(m_length);
- for (unsigned i = 0; i < m_length; i++) {
- UChar cc = m_data[i];
- if (cc > '9' || (cc < '0' && cc != '-' && cc != '*' && cc != '.'))
- spacified[i] = ' ';
- else
- spacified[i] = cc;
- }
- RefPtr<StringImpl> str = adopt(spacified);
-
- str = str->simplifyWhiteSpace();
-
- len = countCharacter(str.get(), ' ') + 1;
- Length* r = new Length[len];
-
- int i = 0;
- int pos = 0;
- int pos2;
-
- while ((pos2 = str->find(' ', pos)) != -1) {
- r[i++] = parseLength(str->characters() + pos, pos2 - pos);
- pos = pos2+1;
- }
- r[i] = parseLength(str->characters() + pos, str->length() - pos);
-
- ASSERT(i == len - 1);
-
- return r;
-}
-
-Length* StringImpl::toLengthArray(int& len)
-{
- RefPtr<StringImpl> str = simplifyWhiteSpace();
- if (!str->length()) {
- len = 1;
- return 0;
- }
-
- len = countCharacter(str.get(), ',') + 1;
- Length* r = new Length[len];
-
- int i = 0;
- int pos = 0;
- int pos2;
-
- while ((pos2 = str->find(',', pos)) != -1) {
- r[i++] = parseLength(str->characters() + pos, pos2 - pos);
- pos = pos2+1;
- }
-
- ASSERT(i == len - 1);
-
- /* IE Quirk: If the last comma is the last char skip it and reduce len by one */
- if (str->length()-pos > 0)
- r[i] = parseLength(str->characters() + pos, str->length() - pos);
- else
- len--;
-
- return r;
-}
-
bool StringImpl::isLower()
{
// Do a faster loop for the case where all the characters are ASCII.
@@ -378,7 +242,7 @@ PassRefPtr<StringImpl> StringImpl::lower()
if (!error && realLength == length)
return adopt(data);
data.resize(realLength);
- Unicode::toLower(data.characters(), length, m_data, m_length, &error);
+ Unicode::toLower(data.characters(), realLength, m_data, m_length, &error);
if (error)
return this;
return adopt(data);
@@ -386,10 +250,26 @@ PassRefPtr<StringImpl> StringImpl::lower()
PassRefPtr<StringImpl> StringImpl::upper()
{
+ StringBuffer data(m_length);
+ int32_t length = m_length;
+
+ // Do a faster loop for the case where all the characters are ASCII.
+ UChar ored = 0;
+ for (int i = 0; i < length; i++) {
+ UChar c = m_data[i];
+ ored |= c;
+ data[i] = toASCIIUpper(c);
+ }
+ if (!(ored & ~0x7F))
+ return adopt(data);
+
+ // Do a slower implementation for cases that include non-ASCII characters.
bool error;
- int32_t length = Unicode::toUpper(0, 0, m_data, m_length, &error);
- StringBuffer data(length);
- Unicode::toUpper(data.characters(), length, m_data, m_length, &error);
+ int32_t realLength = Unicode::toUpper(data.characters(), length, m_data, m_length, &error);
+ if (!error && realLength == length)
+ return adopt(data);
+ data.resize(realLength);
+ Unicode::toUpper(data.characters(), realLength, m_data, m_length, &error);
if (error)
return this;
return adopt(data);
@@ -425,7 +305,7 @@ PassRefPtr<StringImpl> StringImpl::foldCase()
if (!error && realLength == length)
return adopt(data);
data.resize(realLength);
- Unicode::foldCase(data.characters(), length, m_data, m_length, &error);
+ Unicode::foldCase(data.characters(), realLength, m_data, m_length, &error);
if (error)
return this;
return adopt(data);
@@ -513,84 +393,54 @@ PassRefPtr<StringImpl> StringImpl::capitalize(UChar previous)
return adopt(data);
}
-int StringImpl::toInt(bool* ok)
+int StringImpl::toIntStrict(bool* ok, int base)
{
- unsigned i = 0;
+ return charactersToIntStrict(m_data, m_length, ok, base);
+}
- // Allow leading spaces.
- for (; i != m_length; ++i)
- if (!isSpaceOrNewline(m_data[i]))
- break;
-
- // Allow sign.
- if (i != m_length && (m_data[i] == '+' || m_data[i] == '-'))
- ++i;
-
- // Allow digits.
- for (; i != m_length; ++i)
- if (!Unicode::isDigit(m_data[i]))
- break;
-
- return DeprecatedConstString(reinterpret_cast<const DeprecatedChar*>(m_data), i).string().toInt(ok);
+unsigned StringImpl::toUIntStrict(bool* ok, int base)
+{
+ return charactersToUIntStrict(m_data, m_length, ok, base);
}
-int64_t StringImpl::toInt64(bool* ok)
+int64_t StringImpl::toInt64Strict(bool* ok, int base)
{
- unsigned i = 0;
+ return charactersToInt64Strict(m_data, m_length, ok, base);
+}
- // Allow leading spaces.
- for (; i != m_length; ++i)
- if (!isSpaceOrNewline(m_data[i]))
- break;
-
- // Allow sign.
- if (i != m_length && (m_data[i] == '+' || m_data[i] == '-'))
- ++i;
-
- // Allow digits.
- for (; i != m_length; ++i)
- if (!Unicode::isDigit(m_data[i]))
- break;
-
- return DeprecatedConstString(reinterpret_cast<const DeprecatedChar*>(m_data), i).string().toInt64(ok);
+uint64_t StringImpl::toUInt64Strict(bool* ok, int base)
+{
+ return charactersToUInt64Strict(m_data, m_length, ok, base);
}
-uint64_t StringImpl::toUInt64(bool* ok)
+int StringImpl::toInt(bool* ok)
{
- unsigned i = 0;
+ return charactersToInt(m_data, m_length, ok);
+}
- // Allow leading spaces.
- for (; i != m_length; ++i)
- if (!isSpaceOrNewline(m_data[i]))
- break;
+unsigned StringImpl::toUInt(bool* ok)
+{
+ return charactersToUInt(m_data, m_length, ok);
+}
- // Allow digits.
- for (; i != m_length; ++i)
- if (!Unicode::isDigit(m_data[i]))
- break;
-
- return DeprecatedConstString(reinterpret_cast<const DeprecatedChar*>(m_data), i).string().toUInt64(ok);
+int64_t StringImpl::toInt64(bool* ok)
+{
+ return charactersToInt64(m_data, m_length, ok);
+}
+
+uint64_t StringImpl::toUInt64(bool* ok)
+{
+ return charactersToUInt64(m_data, m_length, ok);
}
double StringImpl::toDouble(bool* ok)
{
- if (!m_length) {
- if (ok)
- *ok = false;
- return 0;
- }
- char *end;
- CString latin1String = Latin1Encoding().encode(characters(), length());
- double val = kjs_strtod(latin1String.data(), &end);
- if (ok)
- *ok = end == 0 || *end == '\0';
- return val;
+ return charactersToDouble(m_data, m_length, ok);
}
float StringImpl::toFloat(bool* ok)
{
- // FIXME: This will return ok even when the string fits into a double but not a float.
- return narrowPrecisionToFloat(toDouble(ok));
+ return charactersToFloat(m_data, m_length, ok);
}
static bool equal(const UChar* a, const char* b, int length)
@@ -657,15 +507,7 @@ int StringImpl::find(const char* chs, int index, bool caseSensitive)
int StringImpl::find(UChar c, int start)
{
- unsigned index = start;
- if (index >= m_length )
- return -1;
- while(index < m_length) {
- if (m_data[index] == c)
- return index;
- index++;
- }
- return -1;
+ return WebCore::find(m_data, m_length, c, start);
}
int StringImpl::find(StringImpl* str, int index, bool caseSensitive)
@@ -726,18 +568,7 @@ int StringImpl::find(StringImpl* str, int index, bool caseSensitive)
int StringImpl::reverseFind(UChar c, int index)
{
- if (index >= (int)m_length || m_length == 0)
- return -1;
-
- if (index < 0)
- index += m_length;
- while (1) {
- if (m_data[index] == c)
- return index;
- if (index == 0)
- return -1;
- index--;
- }
+ return WebCore::reverseFind(m_data, m_length, c, index);
}
int StringImpl::reverseFind(StringImpl* str, int index, bool caseSensitive)
@@ -1032,20 +863,28 @@ PassRefPtr<StringImpl> StringImpl::createStrippingNullCharacters(const UChar* ch
ASSERT(characters);
ASSERT(length);
- StringBuffer strippedCopy(length);
+ // Optimize for the case where there are no Null characters by quickly
+ // searching for nulls, and then using StringImpl::create, which will
+ // memcpy the whole buffer. This is faster than assigning character by
+ // character during the loop.
+
+ // Fast case.
int foundNull = 0;
- for (unsigned i = 0; i < length; i++) {
+ for (unsigned i = 0; !foundNull && i < length; i++) {
int c = characters[i]; // more efficient than using UChar here (at least on Intel Mac OS)
- strippedCopy[i] = c;
- foundNull |= ~c;
+ foundNull |= !c;
}
if (!foundNull)
- return adoptRef(new StringImpl(strippedCopy.release(), length, AdoptBuffer()));
+ return StringImpl::create(characters, length);
+
+ // Slow case.
+ StringBuffer strippedCopy(length);
unsigned strippedLength = 0;
for (unsigned i = 0; i < length; i++) {
if (int c = characters[i])
strippedCopy[strippedLength++] = c;
}
+ ASSERT(strippedLength < length); // Only take the slow case when stripping.
strippedCopy.shrink(strippedLength);
return adopt(strippedCopy);
}
diff --git a/WebCore/platform/text/StringImpl.h b/WebCore/platform/text/StringImpl.h
index dd50b2e..57f64c8 100644
--- a/WebCore/platform/text/StringImpl.h
+++ b/WebCore/platform/text/StringImpl.h
@@ -22,7 +22,6 @@
#ifndef StringImpl_h
#define StringImpl_h
-#include <kjs/identifier.h>
#include <limits.h>
#include <wtf/ASCIICType.h>
#include <wtf/Forward.h>
@@ -30,7 +29,7 @@
#include <wtf/Vector.h>
#include <wtf/unicode/Unicode.h>
-#if PLATFORM(CF)
+#if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN))
typedef const struct __CFString * CFStringRef;
#endif
@@ -44,11 +43,15 @@ class AtomicString;
class StringBuffer;
struct CStringTranslator;
-struct Length;
+struct HashAndCharactersTranslator;
struct StringHash;
struct UCharBufferTranslator;
class StringImpl : public RefCounted<StringImpl> {
+ friend class AtomicString;
+ friend struct CStringTranslator;
+ friend struct HashAndCharactersTranslator;
+ friend struct UCharBufferTranslator;
private:
StringImpl();
StringImpl(const UChar*, unsigned length);
@@ -83,6 +86,7 @@ public:
bool hasTerminatingNullCharacter() { return m_hasTerminatingNullCharacter; }
unsigned hash() { if (m_hash == 0) m_hash = computeHash(m_data, m_length); return m_hash; }
+ unsigned existingHash() const { ASSERT(m_hash); return m_hash; }
static unsigned computeHash(const UChar*, unsigned len);
static unsigned computeHash(const char*);
@@ -95,18 +99,21 @@ public:
UChar operator[](unsigned i) { ASSERT(i < m_length); return m_data[i]; }
UChar32 characterStartingAt(unsigned);
- Length toLength();
-
bool containsOnlyWhitespace();
- int toInt(bool* ok = 0); // ignores trailing garbage, unlike DeprecatedString
- int64_t toInt64(bool* ok = 0); // ignores trailing garbage, unlike DeprecatedString
- uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage, unlike DeprecatedString
+ int toIntStrict(bool* ok = 0, int base = 10);
+ unsigned toUIntStrict(bool* ok = 0, int base = 10);
+ int64_t toInt64Strict(bool* ok = 0, int base = 10);
+ uint64_t toUInt64Strict(bool* ok = 0, int base = 10);
+
+ int toInt(bool* ok = 0); // ignores trailing garbage
+ unsigned toUInt(bool* ok = 0); // ignores trailing garbage
+ int64_t toInt64(bool* ok = 0); // ignores trailing garbage
+ uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage
+
double toDouble(bool* ok = 0);
float toFloat(bool* ok = 0);
- Length* toCoordsArray(int& len);
- Length* toLengthArray(int& len);
bool isLower();
PassRefPtr<StringImpl> lower();
PassRefPtr<StringImpl> upper();
@@ -138,7 +145,7 @@ public:
WTF::Unicode::Direction defaultWritingDirection();
-#if PLATFORM(CF)
+#if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN))
CFStringRef createCFString();
#endif
#ifdef __OBJC__
@@ -146,10 +153,6 @@ public:
#endif
private:
- friend class AtomicString;
- friend struct UCharBufferTranslator;
- friend struct CStringTranslator;
-
unsigned m_length;
const UChar* m_data;
mutable unsigned m_hash;
diff --git a/WebCore/platform/text/TextBreakIteratorICU.cpp b/WebCore/platform/text/TextBreakIteratorICU.cpp
index 9fd2d0b..9941f58 100644
--- a/WebCore/platform/text/TextBreakIteratorICU.cpp
+++ b/WebCore/platform/text/TextBreakIteratorICU.cpp
@@ -25,6 +25,7 @@
#include "TextBreakIteratorInternalICU.h"
#include <unicode/ubrk.h>
+#include <wtf/Assertions.h>
namespace WebCore {
@@ -38,6 +39,7 @@ static TextBreakIterator* setUpIterator(bool& createdIterator, TextBreakIterator
UErrorCode openStatus = U_ZERO_ERROR;
iterator = static_cast<TextBreakIterator*>(ubrk_open(type, currentTextBreakLocaleID(), 0, 0, &openStatus));
createdIterator = true;
+ ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus);
}
if (!iterator)
return 0;
diff --git a/WebCore/platform/text/TextCodec.cpp b/WebCore/platform/text/TextCodec.cpp
index 1985c49..4222ee1 100644
--- a/WebCore/platform/text/TextCodec.cpp
+++ b/WebCore/platform/text/TextCodec.cpp
@@ -28,29 +28,31 @@
#include "TextCodec.h"
#include "PlatformString.h"
+#include <wtf/StringExtras.h>
namespace WebCore {
-const UChar BOM = 0xFEFF;
-
TextCodec::~TextCodec()
{
}
-// We strip BOM characters because they can show up both at the start of content
-// and inside content, and we never want them to end up in the decoded text.
-void TextCodec::appendOmittingBOM(Vector<UChar>& v, const UChar* characters, size_t length)
+int TextCodec::getUnencodableReplacement(unsigned codePoint, UnencodableHandling handling, UnencodableReplacementArray replacement)
{
- size_t start = 0;
- for (size_t i = 0; i != length; ++i) {
- if (BOM == characters[i]) {
- if (start != i)
- v.append(&characters[start], i - start);
- start = i + 1;
- }
+ switch (handling) {
+ case QuestionMarksForUnencodables:
+ replacement[0] = '?';
+ replacement[1] = 0;
+ return 1;
+ case EntitiesForUnencodables:
+ snprintf(replacement, sizeof(UnencodableReplacementArray), "&#%u;", codePoint);
+ return static_cast<int>(strlen(replacement));
+ case URLEncodedEntitiesForUnencodables:
+ snprintf(replacement, sizeof(UnencodableReplacementArray), "%%26%%23%u%%3B", codePoint);
+ return static_cast<int>(strlen(replacement));
}
- if (start != length)
- v.append(&characters[start], length - start);
+ ASSERT_NOT_REACHED();
+ replacement[0] = 0;
+ return 0;
}
} // namespace WebCore
diff --git a/WebCore/platform/text/TextCodec.h b/WebCore/platform/text/TextCodec.h
index 77ffcf4..0a56262 100644
--- a/WebCore/platform/text/TextCodec.h
+++ b/WebCore/platform/text/TextCodec.h
@@ -32,21 +32,46 @@
#include <wtf/Vector.h>
#include <wtf/unicode/Unicode.h>
-namespace WebCore {
+#include "PlatformString.h"
- class CString;
- class String;
+namespace WebCore {
class TextEncoding;
+ // Specifies what will happen when a character is encountered that is
+ // not encodable in the character set.
+ enum UnencodableHandling {
+ // Substitutes the replacement character "?".
+ QuestionMarksForUnencodables,
+
+ // Encodes the character as an XML entity. For example, U+06DE
+ // would be "&#1758;" (0x6DE = 1758 in octal).
+ EntitiesForUnencodables,
+
+ // Encodes the character as en entity as above, but escaped
+ // non-alphanumeric characters. This is used in URLs.
+ // For example, U+6DE would be "%26%231758%3B".
+ URLEncodedEntitiesForUnencodables,
+ };
+
+ typedef char UnencodableReplacementArray[32];
+
class TextCodec : Noncopyable {
public:
virtual ~TextCodec();
- virtual String decode(const char*, size_t length, bool flush = false) = 0;
- virtual CString encode(const UChar*, size_t length, bool allowEntities = false) = 0;
+ String decode(const char* str, size_t length, bool flush = false)
+ {
+ bool ignored;
+ return decode(str, length, flush, false, ignored);
+ }
+
+ virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError) = 0;
+ virtual CString encode(const UChar*, size_t length, UnencodableHandling) = 0;
- protected:
- static void appendOmittingBOM(Vector<UChar>&, const UChar*, size_t length);
+ // Fills a null-terminated string representation of the given
+ // unencodable character into the given replacement buffer.
+ // The length of the string (not including the null) will be returned.
+ static int getUnencodableReplacement(unsigned codePoint, UnencodableHandling, UnencodableReplacementArray);
};
typedef void (*EncodingNameRegistrar)(const char* alias, const char* name);
diff --git a/WebCore/platform/text/TextCodecICU.cpp b/WebCore/platform/text/TextCodecICU.cpp
index a89a74e..0a324a2 100644
--- a/WebCore/platform/text/TextCodecICU.cpp
+++ b/WebCore/platform/text/TextCodecICU.cpp
@@ -33,7 +33,7 @@
#include <unicode/ucnv.h>
#include <unicode/ucnv_cb.h>
#include <wtf/Assertions.h>
-#include <wtf/HashMap.h>
+#include <wtf/StringExtras.h>
using std::auto_ptr;
using std::min;
@@ -41,7 +41,7 @@ using std::min;
namespace WebCore {
const size_t ConversionBufferSize = 16384;
-
+
static UConverter* cachedConverterICU;
static auto_ptr<TextCodec> newTextCodecICU(const TextEncoding& encoding, const void*)
@@ -60,8 +60,7 @@ void TextCodecICU::registerBaseCodecs(TextCodecRegistrar registrar)
}
// FIXME: Registering all the encodings we get from ucnv_getAvailableName
-// includes encodings we don't want or need. For example: UTF16_PlatformEndian,
-// UTF16_OppositeEndian, UTF32_PlatformEndian, UTF32_OppositeEndian, and all
+// includes encodings we don't want or need. For example, all
// the encodings with commas and version numbers.
void TextCodecICU::registerExtendedEncodingNames(EncodingNameRegistrar registrar)
@@ -69,27 +68,41 @@ void TextCodecICU::registerExtendedEncodingNames(EncodingNameRegistrar registrar
// We register Hebrew with logical ordering using a separate name.
// Otherwise, this would share the same canonical name as the
// visual ordering case, and then TextEncoding could not tell them
- // apart; ICU works with either name.
+ // apart; ICU treats these names as synonyms.
registrar("ISO-8859-8-I", "ISO-8859-8-I");
int32_t numEncodings = ucnv_countAvailable();
for (int32_t i = 0; i < numEncodings; ++i) {
const char* name = ucnv_getAvailableName(i);
UErrorCode error = U_ZERO_ERROR;
- // FIXME: Should we use the "MIME" standard instead of "IANA"?
- const char* standardName = ucnv_getStandardName(name, "IANA", &error);
- if (!U_SUCCESS(error) || !standardName)
- continue;
+ // Try MIME before trying IANA to pick up commonly used names like
+ // 'EUC-JP' instead of horrendeously long names like
+ // 'Extended_UNIX_Code_Packed_Format_for_Japanese'.
+ const char* standardName = ucnv_getStandardName(name, "MIME", &error);
+ if (!U_SUCCESS(error) || !standardName) {
+ error = U_ZERO_ERROR;
+ // Try IANA to pick up 'windows-12xx' and other names
+ // which are not preferred MIME names but are widely used.
+ standardName = ucnv_getStandardName(name, "IANA", &error);
+ if (!U_SUCCESS(error) || !standardName)
+ continue;
+ }
// 1. Treat GB2312 encoding as GBK (its more modern superset), to match other browsers.
// 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides a native encoding
// for encoding GB_2312-80 and several others. So, we need to override this behavior, too.
if (strcmp(standardName, "GB2312") == 0 || strcmp(standardName, "GB_2312-80") == 0)
standardName = "GBK";
-#ifndef ANDROID
- else
-#endif
- registrar(standardName, standardName);
+ // Similarly, EUC-KR encodings all map to an extended version.
+ else if (strcmp(standardName, "KSC_5601") == 0 || strcmp(standardName, "EUC-KR") == 0 || strcmp(standardName, "cp1363") == 0)
+ standardName = "windows-949-2000";
+ // And so on.
+ else if (strcasecmp(standardName, "iso-8859-9") == 0) // This name is returned in different case by ICU 3.2 and 3.6.
+ standardName = "windows-1254";
+ else if (strcmp(standardName, "TIS-620") == 0)
+ standardName = "windows-874-2000";
+
+ registrar(standardName, standardName);
uint16_t numAliases = ucnv_countAliases(name, &error);
ASSERT(U_SUCCESS(error));
@@ -104,21 +117,25 @@ void TextCodecICU::registerExtendedEncodingNames(EncodingNameRegistrar registrar
}
// Additional aliases.
- // Perhaps we can get these added to ICU.
+ // These are present in modern versions of ICU, but not in ICU 3.2 (shipped with Mac OS X 10.4).
registrar("macroman", "macintosh");
- registrar("xmacroman", "macintosh");
+#ifndef ANDROID // Android does not have x-mac-cyrillic in its ICU library
+ registrar("maccyrillic", "x-mac-cyrillic");
+#endif
// Additional aliases that historically were present in the encoding
// table in WebKit on Macintosh that don't seem to be present in ICU.
// Perhaps we can prove these are not used on the web and remove them.
// Or perhaps we can get them added to ICU.
+ registrar("xmacroman", "macintosh");
+#ifndef ANDROID // Android does not have x-mac-cyrillic in its ICU library
+ registrar("xmacukrainian", "x-mac-cyrillic");
+#endif
registrar("cnbig5", "Big5");
registrar("cngb", "EUC-CN");
registrar("csISO88598I", "ISO_8859-8-I");
registrar("csgb231280", "EUC-CN");
- registrar("dos720", "cp864");
registrar("dos874", "cp874");
- registrar("jis7", "ISO-2022-JP");
registrar("koi", "KOI8-R");
registrar("logical", "ISO-8859-8-I");
registrar("unicode11utf8", "UTF-8");
@@ -127,7 +144,7 @@ void TextCodecICU::registerExtendedEncodingNames(EncodingNameRegistrar registrar
registrar("winarabic", "windows-1256");
registrar("winbaltic", "windows-1257");
registrar("wincyrillic", "windows-1251");
- registrar("windows874", "cp874");
+ registrar("iso885911", "windows874-2000");
registrar("wingreek", "windows-1253");
registrar("winhebrew", "windows-1255");
registrar("winlatin2", "windows-1250");
@@ -139,7 +156,17 @@ void TextCodecICU::registerExtendedEncodingNames(EncodingNameRegistrar registrar
registrar("xeuccn", "EUC-CN");
registrar("xgbk", "EUC-CN");
registrar("xunicode20utf8", "UTF-8");
+ registrar("xwindows949", "windows-949-2000");
registrar("xxbig5", "Big5");
+
+ // This alias is present in modern versions of ICU, but it has no standard name,
+ // so we give one to it manually. It is not present in ICU 3.2.
+ registrar("windows874", "windows874-2000");
+
+ // These aliases are present in modern versions of ICU, but use different codecs, and have no standard names.
+ // They are not present in ICU 3.2.
+ registrar("dos720", "cp864");
+ registrar("jis7", "ISO-2022-JP");
}
void TextCodecICU::registerExtendedCodecs(TextCodecRegistrar registrar)
@@ -151,10 +178,13 @@ void TextCodecICU::registerExtendedCodecs(TextCodecRegistrar registrar)
for (int32_t i = 0; i < numEncodings; ++i) {
const char* name = ucnv_getAvailableName(i);
UErrorCode error = U_ZERO_ERROR;
- // FIXME: Should we use the "MIME" standard instead of "IANA"?
- const char* standardName = ucnv_getStandardName(name, "IANA", &error);
- if (!U_SUCCESS(error) || !standardName)
- continue;
+ const char* standardName = ucnv_getStandardName(name, "MIME", &error);
+ if (!U_SUCCESS(error) || !standardName) {
+ error = U_ZERO_ERROR;
+ standardName = ucnv_getStandardName(name, "IANA", &error);
+ if (!U_SUCCESS(error) || !standardName)
+ continue;
+ }
registrar(standardName, newTextCodecICU, 0);
}
}
@@ -211,7 +241,50 @@ void TextCodecICU::createICUConverter() const
ucnv_setFallback(m_converterICU, TRUE);
}
-String TextCodecICU::decode(const char* bytes, size_t length, bool flush)
+int TextCodecICU::decodeToBuffer(UChar* target, UChar* targetLimit, const char*& source, const char* sourceLimit, int32_t* offsets, bool flush, UErrorCode& err)
+{
+ UChar* targetStart = target;
+ err = U_ZERO_ERROR;
+ ucnv_toUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, offsets, flush, &err);
+ return target - targetStart;
+}
+
+class ErrorCallbackSetter {
+public:
+ ErrorCallbackSetter(UConverter* converter, bool stopOnError)
+ : m_converter(converter)
+ , m_shouldStopOnEncodingErrors(stopOnError)
+ {
+ if (m_shouldStopOnEncodingErrors) {
+ UErrorCode err = U_ZERO_ERROR;
+ ucnv_setToUCallBack(m_converter, UCNV_TO_U_CALLBACK_SUBSTITUTE,
+ UCNV_SUB_STOP_ON_ILLEGAL, &m_savedAction,
+ &m_savedContext, &err);
+ ASSERT(err == U_ZERO_ERROR);
+ }
+ }
+ ~ErrorCallbackSetter()
+ {
+ if (m_shouldStopOnEncodingErrors) {
+ UErrorCode err = U_ZERO_ERROR;
+ const void* oldContext;
+ UConverterToUCallback oldAction;
+ ucnv_setToUCallBack(m_converter, m_savedAction,
+ m_savedContext, &oldAction,
+ &oldContext, &err);
+ ASSERT(oldAction == UCNV_TO_U_CALLBACK_SUBSTITUTE);
+ ASSERT(!strcmp(static_cast<const char*>(oldContext), UCNV_SUB_STOP_ON_ILLEGAL));
+ ASSERT(err == U_ZERO_ERROR);
+ }
+ }
+private:
+ UConverter* m_converter;
+ bool m_shouldStopOnEncodingErrors;
+ const void* m_savedContext;
+ UConverterToUCallback m_savedAction;
+};
+
+String TextCodecICU::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError)
{
// Get a converter for the passed-in encoding.
if (!m_converterICU) {
@@ -222,34 +295,29 @@ String TextCodecICU::decode(const char* bytes, size_t length, bool flush)
return String();
}
}
+
+ ErrorCallbackSetter callbackSetter(m_converterICU, stopOnError);
Vector<UChar> result;
UChar buffer[ConversionBufferSize];
+ UChar* bufferLimit = buffer + ConversionBufferSize;
const char* source = reinterpret_cast<const char*>(bytes);
const char* sourceLimit = source + length;
int32_t* offsets = NULL;
- UErrorCode err;
+ UErrorCode err = U_ZERO_ERROR;
do {
- UChar* target = buffer;
- const UChar* targetLimit = target + ConversionBufferSize;
- err = U_ZERO_ERROR;
- ucnv_toUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, offsets, flush, &err);
- int count = target - buffer;
- appendOmittingBOM(result, reinterpret_cast<const UChar*>(buffer), count);
+ int ucharsDecoded = decodeToBuffer(buffer, bufferLimit, source, sourceLimit, offsets, flush, err);
+ result.append(buffer, ucharsDecoded);
} while (err == U_BUFFER_OVERFLOW_ERROR);
if (U_FAILURE(err)) {
// flush the converter so it can be reused, and not be bothered by this error.
do {
- UChar *target = buffer;
- const UChar *targetLimit = target + ConversionBufferSize;
- err = U_ZERO_ERROR;
- ucnv_toUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, offsets, true, &err);
+ decodeToBuffer(buffer, bufferLimit, source, sourceLimit, offsets, true, err);
} while (source < sourceLimit);
- LOG_ERROR("ICU conversion error");
- return String();
+ sawError = true;
}
String resultString = String::adopt(result);
@@ -265,23 +333,43 @@ String TextCodecICU::decode(const char* bytes, size_t length, bool flush)
// We need to apply these fallbacks ourselves as they are not currently supported by ICU and
// they were provided by the old TEC encoding path
// Needed to fix <rdar://problem/4708689>
-static HashMap<UChar32, UChar>& gbkEscapes() {
- static HashMap<UChar32, UChar> escapes;
- if (escapes.isEmpty()) {
- escapes.add(0x01F9, 0xE7C8);
- escapes.add(0x1E3F, 0xE7C7);
- escapes.add(0x22EF, 0x2026);
- escapes.add(0x301C, 0xFF5E);
+static UChar getGbkEscape(UChar32 codePoint)
+{
+ switch (codePoint) {
+ case 0x01F9:
+ return 0xE7C8;
+ case 0x1E3F:
+ return 0xE7C7;
+ case 0x22EF:
+ return 0x2026;
+ case 0x301C:
+ return 0xFF5E;
+ default:
+ return 0;
}
-
- return escapes;
}
+// Invalid character handler when writing escaped entities for unrepresentable
+// characters. See the declaration of TextCodec::encode for more.
+static void urlEscapedEntityCallback(const void* context, UConverterFromUnicodeArgs* fromUArgs, const UChar* codeUnits, int32_t length,
+ UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err)
+{
+ if (reason == UCNV_UNASSIGNED) {
+ *err = U_ZERO_ERROR;
+
+ UnencodableReplacementArray entity;
+ int entityLen = TextCodec::getUnencodableReplacement(codePoint, URLEncodedEntitiesForUnencodables, entity);
+ ucnv_cbFromUWriteBytes(fromUArgs, entity, entityLen, 0, err);
+ } else
+ UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint, reason, err);
+}
+
+// Substitutes special GBK characters, escaping all other unassigned entities.
static void gbkCallbackEscape(const void* context, UConverterFromUnicodeArgs* fromUArgs, const UChar* codeUnits, int32_t length,
UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err)
{
- if (codePoint && gbkEscapes().contains(codePoint)) {
- UChar outChar = gbkEscapes().get(codePoint);
+ UChar outChar;
+ if (reason == UCNV_UNASSIGNED && (outChar = getGbkEscape(codePoint))) {
const UChar* source = &outChar;
*err = U_ZERO_ERROR;
ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err);
@@ -290,11 +378,28 @@ static void gbkCallbackEscape(const void* context, UConverterFromUnicodeArgs* fr
UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint, reason, err);
}
+// Combines both gbkUrlEscapedEntityCallback and GBK character substitution.
+static void gbkUrlEscapedEntityCallack(const void* context, UConverterFromUnicodeArgs* fromUArgs, const UChar* codeUnits, int32_t length,
+ UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err)
+{
+ if (reason == UCNV_UNASSIGNED) {
+ if (UChar outChar = getGbkEscape(codePoint)) {
+ const UChar* source = &outChar;
+ *err = U_ZERO_ERROR;
+ ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err);
+ return;
+ }
+ urlEscapedEntityCallback(context, fromUArgs, codeUnits, length, codePoint, reason, err);
+ return;
+ }
+ UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint, reason, err);
+}
+
static void gbkCallbackSubstitute(const void* context, UConverterFromUnicodeArgs* fromUArgs, const UChar* codeUnits, int32_t length,
UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err)
{
- if (gbkEscapes().contains(codePoint)) {
- UChar outChar = gbkEscapes().get(codePoint);
+ UChar outChar;
+ if (reason == UCNV_UNASSIGNED && (outChar = getGbkEscape(codePoint))) {
const UChar* source = &outChar;
*err = U_ZERO_ERROR;
ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err);
@@ -303,7 +408,7 @@ static void gbkCallbackSubstitute(const void* context, UConverterFromUnicodeArgs
UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codePoint, reason, err);
}
-CString TextCodecICU::encode(const UChar* characters, size_t length, bool allowEntities)
+CString TextCodecICU::encode(const UChar* characters, size_t length, UnencodableHandling handling)
{
if (!length)
return "";
@@ -321,14 +426,20 @@ CString TextCodecICU::encode(const UChar* characters, size_t length, bool allowE
const UChar* source = copy.characters();
const UChar* sourceLimit = source + copy.length();
-
+
UErrorCode err = U_ZERO_ERROR;
- if (allowEntities)
- ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackEscape : UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err);
- else {
- ucnv_setSubstChars(m_converterICU, "?", 1, &err);
- ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackSubstitute : UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err);
+ switch (handling) {
+ case QuestionMarksForUnencodables:
+ ucnv_setSubstChars(m_converterICU, "?", 1, &err);
+ ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackSubstitute : UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err);
+ break;
+ case EntitiesForUnencodables:
+ ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackEscape : UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err);
+ break;
+ case URLEncodedEntitiesForUnencodables:
+ ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkUrlEscapedEntityCallack : urlEscapedEntityCallback, 0, 0, 0, &err);
+ break;
}
ASSERT(U_SUCCESS(err));
diff --git a/WebCore/platform/text/TextCodecICU.h b/WebCore/platform/text/TextCodecICU.h
index c2a30b1..9c9a4a7b 100644
--- a/WebCore/platform/text/TextCodecICU.h
+++ b/WebCore/platform/text/TextCodecICU.h
@@ -45,18 +45,21 @@ namespace WebCore {
TextCodecICU(const TextEncoding&);
virtual ~TextCodecICU();
- virtual String decode(const char*, size_t length, bool flush = false);
- virtual CString encode(const UChar*, size_t length, bool allowEntities = false);
+ virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
+ virtual CString encode(const UChar*, size_t length, UnencodableHandling);
private:
void createICUConverter() const;
void releaseICUConverter() const;
bool needsGBKFallbacks() const { return m_needsGBKFallbacks; }
- void setNeedsGBKFallbacks(bool needsFallbacks) { m_needsGBKFallbacks = needsFallbacks; }
+ void setNeedsGBKFallbacks(bool needsFallbacks) { m_needsGBKFallbacks = needsFallbacks; }
+
+ int decodeToBuffer(UChar* buffer, UChar* bufferLimit, const char*& source,
+ const char* sourceLimit, int32_t* offsets, bool flush, UErrorCode& err);
TextEncoding m_encoding;
unsigned m_numBufferedBytes;
- unsigned char m_bufferedBytes[16]; // bigger than any single multi-byte character
+ unsigned char m_bufferedBytes[16]; // bigger than any single multi-byte character
mutable UConverter* m_converterICU;
mutable bool m_needsGBKFallbacks;
};
diff --git a/WebCore/platform/text/TextCodecLatin1.cpp b/WebCore/platform/text/TextCodecLatin1.cpp
index a687235..50f9f97 100644
--- a/WebCore/platform/text/TextCodecLatin1.cpp
+++ b/WebCore/platform/text/TextCodecLatin1.cpp
@@ -29,6 +29,7 @@
#include "CString.h"
#include "PlatformString.h"
#include "StringBuffer.h"
+#include <stdio.h>
using std::auto_ptr;
@@ -117,7 +118,7 @@ void TextCodecLatin1::registerCodecs(TextCodecRegistrar registrar)
registrar("US-ASCII", newStreamingTextDecoderWindowsLatin1, 0);
}
-String TextCodecLatin1::decode(const char* bytes, size_t length, bool)
+String TextCodecLatin1::decode(const char* bytes, size_t length, bool, bool, bool&)
{
StringBuffer characters(length);
@@ -141,7 +142,7 @@ String TextCodecLatin1::decode(const char* bytes, size_t length, bool)
return String::adopt(characters);
}
-static CString encodeComplexWindowsLatin1(const UChar* characters, size_t length, bool allowEntities)
+static CString encodeComplexWindowsLatin1(const UChar* characters, size_t length, UnencodableHandling handling)
{
Vector<char> result(length);
char* bytes = result.data();
@@ -158,17 +159,13 @@ static CString encodeComplexWindowsLatin1(const UChar* characters, size_t length
if (table[b] == c)
goto gotByte;
// No way to encode this character with Windows Latin-1.
- if (allowEntities) {
- char entityBuffer[16];
- sprintf(entityBuffer, "&#%u;", c);
- size_t entityLength = strlen(entityBuffer);
- result.grow(resultLength + entityLength + length - i);
- bytes = result.data();
- memcpy(bytes + resultLength, entityBuffer, entityLength);
- resultLength += entityLength;
- continue;
- }
- b = '?';
+ UnencodableReplacementArray replacement;
+ int replacementLength = TextCodec::getUnencodableReplacement(c, handling, replacement);
+ result.grow(resultLength + replacementLength + length - i);
+ bytes = result.data();
+ memcpy(bytes + resultLength, replacement, replacementLength);
+ resultLength += replacementLength;
+ continue;
}
gotByte:
bytes[resultLength++] = b;
@@ -177,7 +174,7 @@ static CString encodeComplexWindowsLatin1(const UChar* characters, size_t length
return CString(bytes, resultLength);
}
-CString TextCodecLatin1::encode(const UChar* characters, size_t length, bool allowEntities)
+CString TextCodecLatin1::encode(const UChar* characters, size_t length, UnencodableHandling handling)
{
{
char* bytes;
@@ -196,7 +193,7 @@ CString TextCodecLatin1::encode(const UChar* characters, size_t length, bool all
}
// If it wasn't all ASCII, call the function that handles more-complex cases.
- return encodeComplexWindowsLatin1(characters, length, allowEntities);
+ return encodeComplexWindowsLatin1(characters, length, handling);
}
} // namespace WebCore
diff --git a/WebCore/platform/text/TextCodecLatin1.h b/WebCore/platform/text/TextCodecLatin1.h
index 46d6e66..f035d01 100644
--- a/WebCore/platform/text/TextCodecLatin1.h
+++ b/WebCore/platform/text/TextCodecLatin1.h
@@ -35,8 +35,8 @@ namespace WebCore {
static void registerEncodingNames(EncodingNameRegistrar);
static void registerCodecs(TextCodecRegistrar);
- virtual String decode(const char*, size_t length, bool flush = false);
- virtual CString encode(const UChar*, size_t length, bool allowEntities = false);
+ virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
+ virtual CString encode(const UChar*, size_t length, UnencodableHandling);
};
} // namespace WebCore
diff --git a/WebCore/platform/text/TextCodecUTF16.cpp b/WebCore/platform/text/TextCodecUTF16.cpp
index 9ecd2a9..88e4e73 100644
--- a/WebCore/platform/text/TextCodecUTF16.cpp
+++ b/WebCore/platform/text/TextCodecUTF16.cpp
@@ -34,8 +34,6 @@ using std::auto_ptr;
namespace WebCore {
-const UChar BOM = 0xFEFF;
-
void TextCodecUTF16::registerEncodingNames(EncodingNameRegistrar registrar)
{
registrar("UTF-16LE", "UTF-16LE");
@@ -67,7 +65,7 @@ void TextCodecUTF16::registerCodecs(TextCodecRegistrar registrar)
registrar("UTF-16BE", newStreamingTextDecoderUTF16BE, 0);
}
-String TextCodecUTF16::decode(const char* bytes, size_t length, bool)
+String TextCodecUTF16::decode(const char* bytes, size_t length, bool, bool stopOnError, bool& sawError)
{
if (!length)
return String();
@@ -85,8 +83,7 @@ String TextCodecUTF16::decode(const char* bytes, size_t length, bool)
c = m_bufferedByte | (p[0] << 8);
else
c = (m_bufferedByte << 8) | p[0];
- if (c != BOM)
- *q++ = c;
+ *q++ = c;
m_haveBufferedByte = false;
p += 1;
numChars -= 1;
@@ -96,15 +93,13 @@ String TextCodecUTF16::decode(const char* bytes, size_t length, bool)
for (size_t i = 0; i < numChars; ++i) {
UChar c = p[0] | (p[1] << 8);
p += 2;
- if (c != BOM)
- *q++ = c;
+ *q++ = c;
}
else
for (size_t i = 0; i < numChars; ++i) {
UChar c = (p[0] << 8) | p[1];
p += 2;
- if (c != BOM)
- *q++ = c;
+ *q++ = c;
}
if (numBytes & 1) {
@@ -118,7 +113,7 @@ String TextCodecUTF16::decode(const char* bytes, size_t length, bool)
return String::adopt(buffer);
}
-CString TextCodecUTF16::encode(const UChar* characters, size_t length, bool)
+CString TextCodecUTF16::encode(const UChar* characters, size_t length, UnencodableHandling)
{
char* bytes;
CString string = CString::newUninitialized(length * 2, bytes);
diff --git a/WebCore/platform/text/TextCodecUTF16.h b/WebCore/platform/text/TextCodecUTF16.h
index 2bde221..8ce9476 100644
--- a/WebCore/platform/text/TextCodecUTF16.h
+++ b/WebCore/platform/text/TextCodecUTF16.h
@@ -37,8 +37,8 @@ namespace WebCore {
TextCodecUTF16(bool littleEndian) : m_littleEndian(littleEndian), m_haveBufferedByte(false) { }
- virtual String decode(const char*, size_t length, bool flush = false);
- virtual CString encode(const UChar*, size_t length, bool allowEntities = false);
+ virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
+ virtual CString encode(const UChar*, size_t length, UnencodableHandling);
private:
bool m_littleEndian;
diff --git a/WebCore/platform/text/TextCodecUserDefined.cpp b/WebCore/platform/text/TextCodecUserDefined.cpp
index a420992..2dae0f3 100644
--- a/WebCore/platform/text/TextCodecUserDefined.cpp
+++ b/WebCore/platform/text/TextCodecUserDefined.cpp
@@ -29,6 +29,7 @@
#include "CString.h"
#include "PlatformString.h"
#include "StringBuffer.h"
+#include <stdio.h>
using std::auto_ptr;
@@ -49,7 +50,7 @@ void TextCodecUserDefined::registerCodecs(TextCodecRegistrar registrar)
registrar("x-user-defined", newStreamingTextDecoderUserDefined, 0);
}
-String TextCodecUserDefined::decode(const char* bytes, size_t length, bool)
+String TextCodecUserDefined::decode(const char* bytes, size_t length, bool, bool, bool&)
{
StringBuffer buffer(length);
@@ -61,7 +62,7 @@ String TextCodecUserDefined::decode(const char* bytes, size_t length, bool)
return String::adopt(buffer);
}
-static CString encodeComplexUserDefined(const UChar* characters, size_t length, bool allowEntities)
+static CString encodeComplexUserDefined(const UChar* characters, size_t length, UnencodableHandling handling)
{
Vector<char> result(length);
char* bytes = result.data();
@@ -71,27 +72,23 @@ static CString encodeComplexUserDefined(const UChar* characters, size_t length,
UChar32 c;
U16_NEXT(characters, i, length, c);
signed char signedByte = c;
- if ((signedByte & 0xf7ff) == c)
+ if ((signedByte & 0xF7FF) == c)
bytes[resultLength++] = signedByte;
else {
// No way to encode this character with x-user-defined.
- if (allowEntities) {
- char entityBuffer[16];
- sprintf(entityBuffer, "&#%u;", c);
- size_t entityLength = strlen(entityBuffer);
- result.grow(resultLength + entityLength + length - i);
- bytes = result.data();
- memcpy(bytes + resultLength, entityBuffer, entityLength);
- resultLength += entityLength;
- } else
- bytes[resultLength++] = '?';
+ UnencodableReplacementArray replacement;
+ int replacementLength = TextCodec::getUnencodableReplacement(c, handling, replacement);
+ result.grow(resultLength + replacementLength + length - i);
+ bytes = result.data();
+ memcpy(bytes + resultLength, replacement, replacementLength);
+ resultLength += replacementLength;
}
}
return CString(bytes, resultLength);
}
-CString TextCodecUserDefined::encode(const UChar* characters, size_t length, bool allowEntities)
+CString TextCodecUserDefined::encode(const UChar* characters, size_t length, UnencodableHandling handling)
{
char* bytes;
CString string = CString::newUninitialized(length, bytes);
@@ -108,7 +105,7 @@ CString TextCodecUserDefined::encode(const UChar* characters, size_t length, boo
return string;
// If it wasn't all ASCII, call the function that handles more-complex cases.
- return encodeComplexUserDefined(characters, length, allowEntities);
+ return encodeComplexUserDefined(characters, length, handling);
}
} // namespace WebCore
diff --git a/WebCore/platform/text/TextCodecUserDefined.h b/WebCore/platform/text/TextCodecUserDefined.h
index 4fba907..d1b3160 100644
--- a/WebCore/platform/text/TextCodecUserDefined.h
+++ b/WebCore/platform/text/TextCodecUserDefined.h
@@ -35,8 +35,8 @@ namespace WebCore {
static void registerEncodingNames(EncodingNameRegistrar);
static void registerCodecs(TextCodecRegistrar);
- virtual String decode(const char*, size_t length, bool flush = false);
- virtual CString encode(const UChar*, size_t length, bool allowEntities = false);
+ virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
+ virtual CString encode(const UChar*, size_t length, UnencodableHandling);
};
} // namespace WebCore
diff --git a/WebCore/platform/text/TextDecoder.cpp b/WebCore/platform/text/TextDecoder.cpp
index 8633e9f..e39a6b7 100644
--- a/WebCore/platform/text/TextDecoder.cpp
+++ b/WebCore/platform/text/TextDecoder.cpp
@@ -47,8 +47,10 @@ void TextDecoder::reset(const TextEncoding& encoding)
m_numBufferedBytes = 0;
}
-String TextDecoder::checkForBOM(const char* data, size_t length, bool flush)
+String TextDecoder::checkForBOM(const char* data, size_t length, bool flush, bool stopOnError, bool& sawError)
{
+ ASSERT(!m_checkedForBOM);
+
// Check to see if we found a BOM.
size_t numBufferedBytes = m_numBufferedBytes;
size_t buf1Len = numBufferedBytes;
@@ -62,22 +64,28 @@ String TextDecoder::checkForBOM(const char* data, size_t length, bool flush)
const TextEncoding* encodingConsideringBOM = &m_encoding;
bool foundBOM = true;
+ size_t lengthOfBOM = 0;
if (c1 == 0xFF && c2 == 0xFE) {
- if (c3 != 0 || c4 != 0)
+ if (c3 != 0 || c4 != 0) {
encodingConsideringBOM = &UTF16LittleEndianEncoding();
- else if (numBufferedBytes + length > sizeof(m_bufferedBytes))
+ lengthOfBOM = 2;
+ } else if (numBufferedBytes + length > sizeof(m_bufferedBytes)) {
encodingConsideringBOM = &UTF32LittleEndianEncoding();
- else
+ lengthOfBOM = 4;
+ } else
foundBOM = false;
- }
- else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF)
+ } else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) {
encodingConsideringBOM = &UTF8Encoding();
- else if (c1 == 0xFE && c2 == 0xFF)
+ lengthOfBOM = 3;
+ } else if (c1 == 0xFE && c2 == 0xFF) {
encodingConsideringBOM = &UTF16BigEndianEncoding();
- else if (c1 == 0 && c2 == 0 && c3 == 0xFE && c4 == 0xFF)
+ lengthOfBOM = 2;
+ } else if (c1 == 0 && c2 == 0 && c3 == 0xFE && c4 == 0xFF) {
encodingConsideringBOM = &UTF32BigEndianEncoding();
- else
+ lengthOfBOM = 4;
+ } else
foundBOM = false;
+
if (!foundBOM && numBufferedBytes + length <= sizeof(m_bufferedBytes) && !flush) {
// Continue to look for the BOM.
memcpy(&m_bufferedBytes[numBufferedBytes], data, length);
@@ -91,16 +99,31 @@ String TextDecoder::checkForBOM(const char* data, size_t length, bool flush)
return String();
m_checkedForBOM = true;
+ // Skip the BOM.
+ if (foundBOM) {
+ ASSERT(numBufferedBytes < lengthOfBOM);
+ size_t numUnbufferedBOMBytes = lengthOfBOM - numBufferedBytes;
+ ASSERT(numUnbufferedBOMBytes <= length);
+
+ data += numUnbufferedBOMBytes;
+ length -= numUnbufferedBOMBytes;
+ numBufferedBytes = 0;
+ m_numBufferedBytes = 0;
+ }
+
// Handle case where we have some buffered bytes to deal with.
if (numBufferedBytes) {
char bufferedBytes[sizeof(m_bufferedBytes)];
memcpy(bufferedBytes, m_bufferedBytes, numBufferedBytes);
m_numBufferedBytes = 0;
- return m_codec->decode(bufferedBytes, numBufferedBytes, false)
- + m_codec->decode(data, length, flush);
+
+ String bufferedResult = m_codec->decode(bufferedBytes, numBufferedBytes, false, stopOnError, sawError);
+ if (stopOnError && sawError)
+ return bufferedResult;
+ return bufferedResult + m_codec->decode(data, length, flush, stopOnError, sawError);
}
- return m_codec->decode(data, length, flush);
+ return m_codec->decode(data, length, flush, stopOnError, sawError);
}
} // namespace WebCore
diff --git a/WebCore/platform/text/TextDecoder.h b/WebCore/platform/text/TextDecoder.h
index 3892032..171cb59 100644
--- a/WebCore/platform/text/TextDecoder.h
+++ b/WebCore/platform/text/TextDecoder.h
@@ -41,15 +41,15 @@ namespace WebCore {
void reset(const TextEncoding&);
const TextEncoding& encoding() const { return m_encoding; };
- String decode(const char* data, size_t length, bool flush = false)
+ String decode(const char* data, size_t length, bool flush, bool stopOnError, bool& sawError)
{
if (!m_checkedForBOM)
- return checkForBOM(data, length, flush);
- return m_codec->decode(data, length, flush);
+ return checkForBOM(data, length, flush, stopOnError, sawError);
+ return m_codec->decode(data, length, flush, stopOnError, sawError);
}
private:
- String checkForBOM(const char*, size_t length, bool flush);
+ String checkForBOM(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
TextEncoding m_encoding;
OwnPtr<TextCodec> m_codec;
diff --git a/WebCore/platform/text/TextEncoding.cpp b/WebCore/platform/text/TextEncoding.cpp
index c7676e9..9026049 100644
--- a/WebCore/platform/text/TextEncoding.cpp
+++ b/WebCore/platform/text/TextEncoding.cpp
@@ -59,15 +59,15 @@ TextEncoding::TextEncoding(const String& name)
{
}
-String TextEncoding::decode(const char* data, size_t length) const
+String TextEncoding::decode(const char* data, size_t length, bool stopOnError, bool& sawError) const
{
if (!m_name)
return String();
- return TextDecoder(*this).decode(data, length, true);
+ return TextDecoder(*this).decode(data, length, true, stopOnError, sawError);
}
-CString TextEncoding::encode(const UChar* characters, size_t length, bool allowEntities) const
+CString TextEncoding::encode(const UChar* characters, size_t length, UnencodableHandling handling) const
{
if (!m_name)
return CString();
@@ -100,11 +100,11 @@ CString TextEncoding::encode(const UChar* characters, size_t length, bool allowE
source = normalizedCharacters.data();
sourceLength = normalizedLength;
}
- return newTextCodec(*this)->encode(source, sourceLength, allowEntities);
+ return newTextCodec(*this)->encode(source, sourceLength, handling);
#elif USE(QT4_UNICODE)
QString str(reinterpret_cast<const QChar*>(characters), length);
str = str.normalized(QString::NormalizationForm_C);
- return newTextCodec(*this)->encode(reinterpret_cast<const UChar *>(str.utf16()), str.length(), allowEntities);
+ return newTextCodec(*this)->encode(reinterpret_cast<const UChar *>(str.utf16()), str.length(), handling);
#endif
}
diff --git a/WebCore/platform/text/TextEncoding.h b/WebCore/platform/text/TextEncoding.h
index 59d225c..0a0ab8c 100644
--- a/WebCore/platform/text/TextEncoding.h
+++ b/WebCore/platform/text/TextEncoding.h
@@ -26,6 +26,7 @@
#ifndef TextEncoding_h
#define TextEncoding_h
+#include "TextCodec.h"
#include <wtf/unicode/Unicode.h>
namespace WebCore {
@@ -46,8 +47,13 @@ namespace WebCore {
UChar backslashAsCurrencySymbol() const;
const TextEncoding& closest8BitEquivalent() const;
- String decode(const char*, size_t length) const;
- CString encode(const UChar*, size_t length, bool allowEntities = false) const;
+ String decode(const char* str, size_t length) const
+ {
+ bool ignored;
+ return decode(str, length, false, ignored);
+ }
+ String decode(const char*, size_t length, bool stopOnError, bool& sawError) const;
+ CString encode(const UChar*, size_t length, UnencodableHandling) const;
private:
const char* m_name;
diff --git a/WebCore/platform/text/TextEncodingRegistry.cpp b/WebCore/platform/text/TextEncodingRegistry.cpp
index a7ad879..3f1f078 100644
--- a/WebCore/platform/text/TextEncodingRegistry.cpp
+++ b/WebCore/platform/text/TextEncodingRegistry.cpp
@@ -33,6 +33,7 @@
#include <wtf/ASCIICType.h>
#include <wtf/Assertions.h>
#include <wtf/HashMap.h>
+#include <wtf/StringExtras.h>
#if USE(ICU_UNICODE)
#include "TextCodecICU.h"
@@ -132,7 +133,7 @@ static void checkExistingName(const char* alias, const char* atomicName)
// Keep the warning silent about one case where we know this will happen.
if (strcmp(alias, "ISO-8859-8-I") == 0
&& strcmp(oldAtomicName, "ISO-8859-8-I") == 0
- && strcmp(atomicName, "ISO_8859-8:1988") == 0)
+ && strcasecmp(atomicName, "iso-8859-8") == 0)
return;
LOG_ERROR("alias %s maps to %s already, but someone is trying to make it map to %s",
alias, oldAtomicName, atomicName);
diff --git a/WebCore/platform/text/TextStream.cpp b/WebCore/platform/text/TextStream.cpp
index b23e769..5b7a0c7 100644
--- a/WebCore/platform/text/TextStream.cpp
+++ b/WebCore/platform/text/TextStream.cpp
@@ -26,145 +26,89 @@
#include "config.h"
#include "TextStream.h"
-#include "DeprecatedString.h"
-#include "Logging.h"
#include "PlatformString.h"
-#include <wtf/Vector.h>
+#include <wtf/StringExtras.h>
namespace WebCore {
-const size_t integerOrPointerAsStringBufferSize = 100; // large enough for any integer or pointer in string format, including trailing null character
-const char* const precisionFormats[7] = { "%.0f", "%.1f", "%.2f", "%.3f", "%.4f", "%.5f", "%.6f"};
-const int maxPrecision = 6; // must match size of precisionFormats
-const int defaultPrecision = 6; // matches qt and sprintf(.., "%f", ...) behaviour
-
-TextStream::TextStream(DeprecatedString* s)
- : m_hasByteArray(false), m_string(s), m_precision(defaultPrecision)
-{
-}
-
-TextStream& TextStream::operator<<(char c)
-{
- if (m_hasByteArray)
- m_byteArray.append(c);
-
- if (m_string)
- m_string->append(DeprecatedChar(c));
- return *this;
-}
-
-TextStream& TextStream::operator<<(short i)
-{
- char buffer[integerOrPointerAsStringBufferSize];
- sprintf(buffer, "%d", i);
- return *this << buffer;
-}
-
-TextStream& TextStream::operator<<(unsigned short i)
-{
- char buffer[integerOrPointerAsStringBufferSize];
- sprintf(buffer, "%u", i);
- return *this << buffer;
-}
+static const size_t printBufferSize = 100; // large enough for any integer or floating point value in string format, including trailing null character
TextStream& TextStream::operator<<(int i)
{
- char buffer[integerOrPointerAsStringBufferSize];
- sprintf(buffer, "%d", i);
+ char buffer[printBufferSize];
+ snprintf(buffer, sizeof(buffer) - 1, "%d", i);
return *this << buffer;
}
TextStream& TextStream::operator<<(unsigned i)
{
- char buffer[integerOrPointerAsStringBufferSize];
- sprintf(buffer, "%u", i);
+ char buffer[printBufferSize];
+ snprintf(buffer, sizeof(buffer) - 1, "%u", i);
return *this << buffer;
}
TextStream& TextStream::operator<<(long i)
{
- char buffer[integerOrPointerAsStringBufferSize];
- sprintf(buffer, "%ld", i);
+ char buffer[printBufferSize];
+ snprintf(buffer, sizeof(buffer) - 1, "%ld", i);
return *this << buffer;
}
TextStream& TextStream::operator<<(unsigned long i)
{
- char buffer[integerOrPointerAsStringBufferSize];
- sprintf(buffer, "%lu", i);
+ char buffer[printBufferSize];
+ snprintf(buffer, sizeof(buffer) - 1, "%lu", i);
return *this << buffer;
}
TextStream& TextStream::operator<<(float f)
{
- char buffer[integerOrPointerAsStringBufferSize];
- sprintf(buffer, precisionFormats[m_precision], f);
+ char buffer[printBufferSize];
+ snprintf(buffer, sizeof(buffer) - 1, "%.2f", f);
return *this << buffer;
}
TextStream& TextStream::operator<<(double d)
{
- char buffer[integerOrPointerAsStringBufferSize];
- sprintf(buffer, precisionFormats[m_precision], d);
+ char buffer[printBufferSize];
+ snprintf(buffer, sizeof(buffer) - 1, "%.2f", d);
return *this << buffer;
}
-TextStream& TextStream::operator<<(const char* s)
+TextStream& TextStream::operator<<(const char* string)
{
- if (m_hasByteArray) {
- unsigned length = strlen(s);
- unsigned oldSize = m_byteArray.size();
- m_byteArray.grow(oldSize + length);
- memcpy(m_byteArray.data() + oldSize, s, length);
- }
- if (m_string)
- m_string->append(s);
+ size_t stringLength = strlen(string);
+ size_t textLength = m_text.size();
+ m_text.grow(textLength + stringLength);
+ for (size_t i = 0; i < stringLength; ++i)
+ m_text[textLength + i] = string[i];
return *this;
}
-TextStream& TextStream::operator<<(const DeprecatedString& s)
+TextStream& TextStream::operator<<(const String& string)
{
- if (m_hasByteArray) {
- unsigned length = s.length();
- unsigned oldSize = m_byteArray.size();
- m_byteArray.grow(oldSize + length);
- memcpy(m_byteArray.data() + oldSize, s.latin1(), length);
- }
- if (m_string)
- m_string->append(s);
+ append(m_text, string);
return *this;
}
-TextStream& TextStream::operator<<(const String& s)
+String TextStream::release()
{
- return (*this) << s.deprecatedString();
+ return String::adopt(m_text);
}
-TextStream& TextStream::operator<<(void* p)
+#if PLATFORM(WIN_OS) && PLATFORM(X86_64) && COMPILER(MSVC)
+TextStream& TextStream::operator<<(__int64 i)
{
- char buffer[integerOrPointerAsStringBufferSize];
- sprintf(buffer, "%p", p);
+ char buffer[printBufferSize];
+ snprintf(buffer, sizeof(buffer) - 1, "%I64i", i);
return *this << buffer;
}
-
-TextStream& TextStream::operator<<(const TextStreamManipulator& m)
-{
- return m(*this);
-}
-
-int TextStream::precision(int p)
-{
- int oldPrecision = m_precision;
-
- if (p >= 0 && p <= maxPrecision)
- m_precision = p;
-
- return oldPrecision;
-}
-
-TextStream &endl(TextStream& stream)
+TextStream& TextStream::operator<<(unsigned __int64 i)
{
- return stream << '\n';
+ char buffer[printBufferSize];
+ snprintf(buffer, sizeof(buffer) - 1, "%I64u", i);
+ return *this << buffer;
}
+#endif
}
diff --git a/WebCore/platform/text/TextStream.h b/WebCore/platform/text/TextStream.h
index 897c267..6fb3f4b 100644
--- a/WebCore/platform/text/TextStream.h
+++ b/WebCore/platform/text/TextStream.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (C) 2004, 2008 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -27,26 +27,14 @@
#define TextStream_h
#include <wtf/Vector.h>
+#include <wtf/unicode/Unicode.h>
namespace WebCore {
-class DeprecatedChar;
-class DeprecatedString;
class String;
-class TextStream;
-
-typedef TextStream& (*TextStreamManipulator)(TextStream&);
-
-TextStream& endl(TextStream&);
class TextStream {
public:
- TextStream(DeprecatedString*);
-
- TextStream& operator<<(char);
- TextStream& operator<<(const DeprecatedChar&);
- TextStream& operator<<(short);
- TextStream& operator<<(unsigned short);
TextStream& operator<<(int);
TextStream& operator<<(unsigned);
TextStream& operator<<(long);
@@ -55,21 +43,15 @@ public:
TextStream& operator<<(double);
TextStream& operator<<(const char*);
TextStream& operator<<(const String&);
- TextStream& operator<<(const DeprecatedString&);
- TextStream& operator<<(void*);
-
- TextStream& operator<<(const TextStreamManipulator&);
+#if PLATFORM(WIN_OS) && PLATFORM(X86_64) && COMPILER(MSVC)
+ TextStream& operator<<(unsigned __int64);
+ TextStream& operator<<(__int64);
+#endif
- int precision(int);
+ String release();
private:
- TextStream(const TextStream&);
- TextStream& operator=(const TextStream&);
-
- bool m_hasByteArray;
- Vector<char> m_byteArray;
- DeprecatedString* m_string;
- int m_precision;
+ Vector<UChar> m_text;
};
}
diff --git a/WebCore/platform/text/cf/StringCF.cpp b/WebCore/platform/text/cf/StringCF.cpp
index 9e0d5f2..5e12ba9 100644
--- a/WebCore/platform/text/cf/StringCF.cpp
+++ b/WebCore/platform/text/cf/StringCF.cpp
@@ -21,7 +21,7 @@
#include "config.h"
#include "PlatformString.h"
-#if PLATFORM(CF)
+#if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN))
#include <CoreFoundation/CoreFoundation.h>
@@ -52,4 +52,4 @@ CFStringRef String::createCFString() const
}
-#endif // PLATFORM(CF)
+#endif // PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN))
diff --git a/WebCore/platform/text/cf/StringImplCF.cpp b/WebCore/platform/text/cf/StringImplCF.cpp
index 21b43df..ff595a5 100644
--- a/WebCore/platform/text/cf/StringImplCF.cpp
+++ b/WebCore/platform/text/cf/StringImplCF.cpp
@@ -21,7 +21,7 @@
#include "config.h"
#include "StringImpl.h"
-#if PLATFORM(CF)
+#if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN))
#include <CoreFoundation/CoreFoundation.h>
@@ -34,4 +34,4 @@ CFStringRef StringImpl::createCFString()
}
-#endif // PLATFORM(CF)
+#endif // PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN))
diff --git a/WebCore/platform/text/mac/ShapeArabic.c b/WebCore/platform/text/mac/ShapeArabic.c
index 4706e7c..6dbc008 100644
--- a/WebCore/platform/text/mac/ShapeArabic.c
+++ b/WebCore/platform/text/mac/ShapeArabic.c
@@ -2,8 +2,28 @@
******************************************************************************
*
* Copyright (C) 2000-2004, International Business Machines
-* Corporation and others. All Rights Reserved.
-* Copyright (C) 2007 Apple Inc. All rights reserved.
+* Corporation and others. All Rights Reserved.
+* Copyright (C) 2007 Apple Inc. All rights reserved.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a copy of this
+* software and associated documentation files (the "Software"), to deal in the Software
+* without restriction, including without limitation the rights to use, copy, modify,
+* merge, publish, distribute, and/or sell copies of the Software, and to permit persons
+* to whom the Software is furnished to do so, provided that the above copyright notice(s)
+* and this permission notice appear in all copies of the Software and that both the above
+* copyright notice(s) and this permission notice appear in supporting documentation.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+* PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER
+* OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR
+* CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+* PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+* OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*
+* Except as contained in this notice, the name of a copyright holder shall not be used in
+* advertising or otherwise to promote the sale, use or other dealings in this Software
+* without prior written authorization of the copyright holder.
*
******************************************************************************
*
@@ -11,6 +31,9 @@
*/
#include "config.h"
+
+#if USE(ATSUI)
+
#include "ShapeArabic.h"
#include <unicode/utypes.h>
@@ -528,3 +551,5 @@ int32_t shapeArabic(const UChar *source, int32_t sourceLength, UChar *dest, int3
return sourceLength;
}
+
+#endif // USE(ATSUI)
diff --git a/WebCore/platform/text/mac/ShapeArabic.h b/WebCore/platform/text/mac/ShapeArabic.h
index 2f85ea0..8aa577d 100644
--- a/WebCore/platform/text/mac/ShapeArabic.h
+++ b/WebCore/platform/text/mac/ShapeArabic.h
@@ -26,6 +26,8 @@
#ifndef ShapeArabic_h
#define ShapeArabic_h
+#if USE(ATSUI)
+
#include <unicode/ushape.h>
#ifdef __cplusplus
@@ -38,4 +40,5 @@ int32_t shapeArabic(const UChar *source, int32_t sourceLength, UChar *dest, int3
}
#endif
+#endif // USE(ATSUI)
#endif // ShapeArabic_h
diff --git a/WebCore/platform/text/mac/TextCodecMac.cpp b/WebCore/platform/text/mac/TextCodecMac.cpp
index 7270a26..ac1f0fb 100644
--- a/WebCore/platform/text/mac/TextCodecMac.cpp
+++ b/WebCore/platform/text/mac/TextCodecMac.cpp
@@ -78,7 +78,6 @@ void TextCodecMac::registerCodecs(TextCodecRegistrar registrar)
TextCodecMac::TextCodecMac(TECTextEncodingID encoding)
: m_encoding(encoding)
- , m_error(false)
, m_numBufferedBytes(0)
, m_converterTEC(0)
{
@@ -179,16 +178,15 @@ OSStatus TextCodecMac::decode(const unsigned char* inputBuffer, int inputBufferL
}
// Work around bug 3351093, where sometimes we get kTECBufferBelowMinimumSizeErr instead of kTECOutputBufferFullStatus.
- if (status == kTECBufferBelowMinimumSizeErr && bytesWritten != 0) {
+ if (status == kTECBufferBelowMinimumSizeErr && bytesWritten != 0)
status = kTECOutputBufferFullStatus;
- }
inputLength = bytesRead;
outputLength = bytesWritten;
return status;
}
-String TextCodecMac::decode(const char* bytes, size_t length, bool flush)
+String TextCodecMac::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError)
{
// Get a converter for the passed-in encoding.
if (!m_converterTEC && createTECConverter() != noErr)
@@ -201,7 +199,7 @@ String TextCodecMac::decode(const char* bytes, size_t length, bool flush)
bool bufferWasFull = false;
UniChar buffer[ConversionBufferSize];
- while (sourceLength || bufferWasFull) {
+ while ((sourceLength || bufferWasFull) && !sawError) {
int bytesRead = 0;
int bytesWritten = 0;
OSStatus status = decode(sourcePointer, sourceLength, bytesRead, buffer, sizeof(buffer), bytesWritten);
@@ -217,6 +215,10 @@ String TextCodecMac::decode(const char* bytes, size_t length, bool flush)
case kTextUndefinedElementErr:
// FIXME: Put FFFD character into the output string in this case?
TECClearConverterContextInfo(m_converterTEC);
+ if (stopOnError) {
+ sawError = true;
+ break;
+ }
if (sourceLength) {
sourcePointer += 1;
sourceLength -= 1;
@@ -236,13 +238,12 @@ String TextCodecMac::decode(const char* bytes, size_t length, bool flush)
break;
}
default:
- LOG_ERROR("text decoding failed with error %ld", static_cast<long>(status));
- m_error = true;
+ sawError = true;
return String();
}
ASSERT(!(bytesWritten % sizeof(UChar)));
- appendOmittingBOM(result, buffer, bytesWritten / sizeof(UChar));
+ result.append(buffer, bytesWritten / sizeof(UChar));
bufferWasFull = status == kTECOutputBufferFullStatus;
}
@@ -251,7 +252,7 @@ String TextCodecMac::decode(const char* bytes, size_t length, bool flush)
unsigned long bytesWritten = 0;
TECFlushText(m_converterTEC, reinterpret_cast<unsigned char*>(buffer), sizeof(buffer), &bytesWritten);
ASSERT(!(bytesWritten % sizeof(UChar)));
- appendOmittingBOM(result, buffer, bytesWritten / sizeof(UChar));
+ result.append(buffer, bytesWritten / sizeof(UChar));
}
String resultString = String::adopt(result);
@@ -266,7 +267,7 @@ String TextCodecMac::decode(const char* bytes, size_t length, bool flush)
return resultString;
}
-CString TextCodecMac::encode(const UChar* characters, size_t length, bool allowEntities)
+CString TextCodecMac::encode(const UChar* characters, size_t length, UnencodableHandling handling)
{
// FIXME: We should really use TEC here instead of CFString for consistency with the other direction.
@@ -280,7 +281,7 @@ CString TextCodecMac::encode(const UChar* characters, size_t length, bool allowE
CFIndex charactersLeft = CFStringGetLength(cfs);
Vector<char> result;
size_t size = 0;
- UInt8 lossByte = allowEntities ? 0 : '?';
+ UInt8 lossByte = handling == QuestionMarksForUnencodables ? '?' : 0;
while (charactersLeft > 0) {
CFRange range = CFRangeMake(startPos, charactersLeft);
CFIndex bufferLength;
@@ -303,11 +304,10 @@ CString TextCodecMac::encode(const UChar* characters, size_t length, bool allowE
++charactersConverted;
}
}
- char entityBuffer[16];
- sprintf(entityBuffer, "&#%u;", badChar);
- size_t entityLength = strlen(entityBuffer);
+ UnencodableReplacementArray entity;
+ int entityLength = getUnencodableReplacement(badChar, handling, entity);
result.grow(size + entityLength);
- memcpy(result.data() + size, entityBuffer, entityLength);
+ memcpy(result.data() + size, entity, entityLength);
size += entityLength;
}
diff --git a/WebCore/platform/text/mac/TextCodecMac.h b/WebCore/platform/text/mac/TextCodecMac.h
index 639e214..aee4a97 100644
--- a/WebCore/platform/text/mac/TextCodecMac.h
+++ b/WebCore/platform/text/mac/TextCodecMac.h
@@ -43,8 +43,8 @@ namespace WebCore {
explicit TextCodecMac(TECTextEncodingID);
virtual ~TextCodecMac();
- virtual String decode(const char*, size_t length, bool flush = false);
- virtual CString encode(const UChar*, size_t length, bool allowEntities = false);
+ virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
+ virtual CString encode(const UChar*, size_t length, UnencodableHandling);
private:
OSStatus decode(const unsigned char* inputBuffer, int inputBufferLength, int& inputLength,
@@ -55,7 +55,6 @@ namespace WebCore {
TECTextEncodingID m_encoding;
UChar m_backslashAsCurrencySymbol;
- bool m_error;
unsigned m_numBufferedBytes;
unsigned char m_bufferedBytes[16]; // bigger than any single multi-byte character
mutable TECObjectRef m_converterTEC;
diff --git a/WebCore/platform/text/mac/mac-encodings.txt b/WebCore/platform/text/mac/mac-encodings.txt
index 270c625..bb45e22 100644
--- a/WebCore/platform/text/mac/mac-encodings.txt
+++ b/WebCore/platform/text/mac/mac-encodings.txt
@@ -22,15 +22,12 @@ JIS_X0208_90: JIS_X0208-1990
JIS_X0212_90: JIS_X0212-1990
KOI8_U: KOI8-U
MacArabic: x-mac-arabic
-MacCentralEurRoman: x-mac-centraleurroman, xmacce
MacChineseSimp: x-mac-chinesesimp, xmacsimpchinese
MacChineseTrad: x-mac-chinesetrad, xmactradchinese
MacCroatian: x-mac-croatian
-MacCyrillic: x-mac-cyrillic, maccyrillic, xmacukrainian
MacDevanagari: x-mac-devanagari
MacDingbats: x-mac-dingbats
MacFarsi: x-mac-farsi
-MacGreek: x-mac-greek
MacGujarati: x-mac-gujarati
MacGurmukhi: x-mac-gurmukhi
MacHebrew: x-mac-hebrew
@@ -42,7 +39,6 @@ MacRomanian: x-mac-romanian
MacSymbol: x-mac-symbol
MacThai: x-mac-thai
MacTibetan: x-mac-tibetan
-MacTurkish: x-mac-turkish
MacVT100: x-mac-vt100
NextStepLatin: x-nextstep
ShiftJIS_X0213_00: Shift_JIS_X0213-2000
diff --git a/WebCore/platform/text/qt/StringQt.cpp b/WebCore/platform/text/qt/StringQt.cpp
index 23a684b..de9f527 100644
--- a/WebCore/platform/text/qt/StringQt.cpp
+++ b/WebCore/platform/text/qt/StringQt.cpp
@@ -26,7 +26,6 @@
#include "config.h"
#include "PlatformString.h"
-#include "DeprecatedString.h"
#include <QString>
@@ -44,21 +43,14 @@ String::String(const QStringRef& ref)
{
if (!ref.string())
return;
- m_impl = StringImpl::create(reinterpret_cast<const UChar *>(ref.unicode()), ref.length());
+ m_impl = StringImpl::create(reinterpret_cast<const UChar*>(ref.unicode()), ref.length());
}
-
String::operator QString() const
{
return QString(reinterpret_cast<const QChar*>(characters()), length());
}
-// DeprecatedString conversions
-DeprecatedString::operator QString() const
-{
- return QString(reinterpret_cast<const QChar*>(unicode()), length());
-}
-
}
// vim: ts=4 sw=4 et
diff --git a/WebCore/platform/text/qt/TextCodecQt.cpp b/WebCore/platform/text/qt/TextCodecQt.cpp
index 888c6af..0f385dd 100644
--- a/WebCore/platform/text/qt/TextCodecQt.cpp
+++ b/WebCore/platform/text/qt/TextCodecQt.cpp
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
+ * Copyright (C) 2008 Holger Hans Peter Freyther
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -91,9 +92,10 @@ TextCodecQt::~TextCodecQt()
}
-String TextCodecQt::decode(const char* bytes, size_t length, bool flush)
+String TextCodecQt::decode(const char* bytes, size_t length, bool flush, bool /*stopOnError*/, bool& sawError)
{
QString unicode = m_codec->toUnicode(bytes, length, &m_state);
+ sawError = m_state.invalidChars != 0;
if (flush) {
m_state.flags = QTextCodec::DefaultConversion;
@@ -104,12 +106,12 @@ String TextCodecQt::decode(const char* bytes, size_t length, bool flush)
return unicode;
}
-CString TextCodecQt::encode(const UChar* characters, size_t length, bool allowEntities)
+CString TextCodecQt::encode(const UChar* characters, size_t length, UnencodableHandling)
{
if (!length)
return "";
- // FIXME: do something sensible with allowEntities
+ // FIXME: do something sensible with UnencodableHandling
QByteArray ba = m_codec->fromUnicode(reinterpret_cast<const QChar*>(characters), length, 0);
return CString(ba.constData(), ba.length());
diff --git a/WebCore/platform/text/qt/TextCodecQt.h b/WebCore/platform/text/qt/TextCodecQt.h
index 9bbb80b..f28f0bb 100644
--- a/WebCore/platform/text/qt/TextCodecQt.h
+++ b/WebCore/platform/text/qt/TextCodecQt.h
@@ -30,8 +30,6 @@
#include "TextEncoding.h"
#include <QTextCodec>
-class QTextCodec;
-
namespace WebCore {
class TextCodecQt : public TextCodec {
@@ -42,8 +40,8 @@ namespace WebCore {
TextCodecQt(const TextEncoding&);
virtual ~TextCodecQt();
- virtual String decode(const char*, size_t length, bool flush = false);
- virtual CString encode(const UChar*, size_t length, bool allowEntities = false);
+ virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
+ virtual CString encode(const UChar*, size_t length, UnencodableHandling);
private:
TextEncoding m_encoding;
diff --git a/WebCore/platform/text/wx/StringWx.cpp b/WebCore/platform/text/wx/StringWx.cpp
index 7f91dbf..50919c4 100644
--- a/WebCore/platform/text/wx/StringWx.cpp
+++ b/WebCore/platform/text/wx/StringWx.cpp
@@ -27,7 +27,6 @@
#include "PlatformString.h"
#include "CString.h"
-#include "DeprecatedString.h"
#include "unicode/ustring.h"
#include <wx/defs.h>
@@ -88,12 +87,6 @@ String::operator wxString() const
return wxString(utf8().data(), wxConvUTF8);
}
-// DeprecatedString conversions
-DeprecatedString::operator wxString() const
-{
- return wxString(utf8().data(), wxConvUTF8);
-}
-
}
// vim: ts=4 sw=4 et