diff options
author | The Android Open Source Project <initial-contribution@android.com> | 2008-12-17 18:05:15 -0800 |
---|---|---|
committer | The Android Open Source Project <initial-contribution@android.com> | 2008-12-17 18:05:15 -0800 |
commit | 1cbdecfa9fc428ac2d8aca0fa91c9580b3d57353 (patch) | |
tree | 4457a7306ea5acb43fe05bfe0973b1f7faf97ba2 /WebCore/platform/text | |
parent | 9364f22aed35e1a1e9d07c121510f80be3ab0502 (diff) | |
download | external_webkit-1cbdecfa9fc428ac2d8aca0fa91c9580b3d57353.zip external_webkit-1cbdecfa9fc428ac2d8aca0fa91c9580b3d57353.tar.gz external_webkit-1cbdecfa9fc428ac2d8aca0fa91c9580b3d57353.tar.bz2 |
Code drop from //branches/cupcake/...@124589
Diffstat (limited to 'WebCore/platform/text')
48 files changed, 1868 insertions, 1079 deletions
diff --git a/WebCore/platform/text/AtomicString.cpp b/WebCore/platform/text/AtomicString.cpp index c584e6c..dc573e1 100644 --- a/WebCore/platform/text/AtomicString.cpp +++ b/WebCore/platform/text/AtomicString.cpp @@ -20,27 +20,46 @@ #include "config.h" -#ifdef AVOID_STATIC_CONSTRUCTORS +#ifdef SKIP_STATIC_CONSTRUCTORS_ON_GCC #define ATOMICSTRING_HIDE_GLOBALS 1 #endif #include "AtomicString.h" -#include "DeprecatedString.h" #include "StaticConstructors.h" #include "StringHash.h" #include <kjs/identifier.h> +#include <wtf/Threading.h> #include <wtf/HashSet.h> -using KJS::Identifier; -using KJS::UString; +#if ENABLE(WORKERS) +#include <wtf/ThreadSpecific.h> +using namespace WTF; +#endif + +#if USE(JSC) +using JSC::Identifier; +using JSC::UString; +#endif namespace WebCore { -static HashSet<StringImpl*>* stringTable; +#if ENABLE(WORKERS) +static ThreadSpecific<HashSet<StringImpl*> >* staticStringTable; +#else +static HashSet<StringImpl*>* staticStringTable; +#endif -struct CStringTranslator +static inline HashSet<StringImpl*>* stringTable() { +#if ENABLE(WORKERS) + return *staticStringTable; +#else + return staticStringTable; +#endif +} + +struct CStringTranslator { static unsigned hash(const char* c) { return StringImpl::computeHash(c); @@ -74,13 +93,16 @@ bool operator==(const AtomicString& a, const char* b) return CStringTranslator::equal(impl, b); } -StringImpl* AtomicString::add(const char* c) +PassRefPtr<StringImpl> AtomicString::add(const char* c) { if (!c) return 0; if (!*c) return StringImpl::empty(); - return *stringTable->add<const char*, CStringTranslator>(c).first; + pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable()->add<const char*, CStringTranslator>(c); + if (!addResult.second) + return *addResult.first; + return adoptRef(*addResult.first); } struct UCharBuffer { @@ -88,6 +110,37 @@ struct UCharBuffer { unsigned length; }; +static inline bool equal(StringImpl* string, const UChar* characters, unsigned length) +{ + if (string->length() != length) + return false; + +#if PLATFORM(ARM) + const UChar* stringCharacters = string->characters(); + for (unsigned i = 0; i != length; ++i) { + if (*stringCharacters++ != *characters++) + return false; + } + return true; +#else + /* Do it 4-bytes-at-a-time on architectures where it's safe */ + + const uint32_t* stringCharacters = reinterpret_cast<const uint32_t*>(string->characters()); + const uint32_t* bufferCharacters = reinterpret_cast<const uint32_t*>(characters); + + unsigned halfLength = length >> 1; + for (unsigned i = 0; i != halfLength; ++i) { + if (*stringCharacters++ != *bufferCharacters++) + return false; + } + + if (length & 1 && *reinterpret_cast<const uint16_t*>(stringCharacters) != *reinterpret_cast<const uint16_t*>(bufferCharacters)) + return false; + + return true; +#endif +} + struct UCharBufferTranslator { static unsigned hash(const UCharBuffer& buf) { @@ -96,37 +149,7 @@ struct UCharBufferTranslator { static bool equal(StringImpl* const& str, const UCharBuffer& buf) { - unsigned strLength = str->length(); - unsigned bufLength = buf.length; - if (strLength != bufLength) - return false; - -#if PLATFORM(ARM) - const UChar* strChars = str->characters(); - const UChar* bufChars = buf.s; - - for (unsigned i = 0; i != strLength; ++i) { - if (*strChars++ != *bufChars++) - return false; - } - return true; -#else - /* Do it 4-bytes-at-a-time on architectures where it's safe */ - const uint32_t* strChars = reinterpret_cast<const uint32_t*>(str->characters()); - const uint32_t* bufChars = reinterpret_cast<const uint32_t*>(buf.s); - - unsigned halfLength = strLength >> 1; - for (unsigned i = 0; i != halfLength; ++i) { - if (*strChars++ != *bufChars++) - return false; - } - - if (strLength & 1 && - *reinterpret_cast<const uint16_t *>(strChars) != *reinterpret_cast<const uint16_t *>(bufChars)) - return false; - - return true; -#endif + return WebCore::equal(str, buf.s, buf.length); } static void translate(StringImpl*& location, const UCharBuffer& buf, unsigned hash) @@ -135,7 +158,31 @@ struct UCharBufferTranslator { } }; -StringImpl* AtomicString::add(const UChar* s, int length) +struct HashAndCharacters { + unsigned hash; + const UChar* characters; + unsigned length; +}; + +struct HashAndCharactersTranslator { + static unsigned hash(const HashAndCharacters& buffer) + { + ASSERT(buffer.hash == StringImpl::computeHash(buffer.characters, buffer.length)); + return buffer.hash; + } + + static bool equal(StringImpl* const& string, const HashAndCharacters& buffer) + { + return WebCore::equal(string, buffer.characters, buffer.length); + } + + static void translate(StringImpl*& location, const HashAndCharacters& buffer, unsigned hash) + { + location = new StringImpl(buffer.characters, buffer.length, hash); + } +}; + +PassRefPtr<StringImpl> AtomicString::add(const UChar* s, int length) { if (!s) return 0; @@ -143,11 +190,14 @@ StringImpl* AtomicString::add(const UChar* s, int length) if (length == 0) return StringImpl::empty(); - UCharBuffer buf = {s, length}; - return *stringTable->add<UCharBuffer, UCharBufferTranslator>(buf).first; + UCharBuffer buf = { s, length }; + pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable()->add<UCharBuffer, UCharBufferTranslator>(buf); + if (!addResult.second) + return *addResult.first; + return adoptRef(*addResult.first); } -StringImpl* AtomicString::add(const UChar* s) +PassRefPtr<StringImpl> AtomicString::add(const UChar* s) { if (!s) return 0; @@ -160,10 +210,13 @@ StringImpl* AtomicString::add(const UChar* s) return StringImpl::empty(); UCharBuffer buf = {s, length}; - return *stringTable->add<UCharBuffer, UCharBufferTranslator>(buf).first; + pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable()->add<UCharBuffer, UCharBufferTranslator>(buf); + if (!addResult.second) + return *addResult.first; + return adoptRef(*addResult.first); } -StringImpl* AtomicString::add(StringImpl* r) +PassRefPtr<StringImpl> AtomicString::add(StringImpl* r) { if (!r || r->m_inTable) return r; @@ -171,7 +224,7 @@ StringImpl* AtomicString::add(StringImpl* r) if (r->length() == 0) return StringImpl::empty(); - StringImpl* result = *stringTable->add(r).first; + StringImpl* result = *stringTable()->add(r).first; if (result == r) r->m_inTable = true; return result; @@ -179,37 +232,65 @@ StringImpl* AtomicString::add(StringImpl* r) void AtomicString::remove(StringImpl* r) { - stringTable->remove(r); + stringTable()->remove(r); } -StringImpl* AtomicString::add(const KJS::Identifier& str) +#if USE(JSC) +PassRefPtr<StringImpl> AtomicString::add(const JSC::Identifier& identifier) { - return add(reinterpret_cast<const UChar*>(str.data()), str.size()); -} + if (identifier.isNull()) + return 0; -StringImpl* AtomicString::add(const KJS::UString& str) -{ - return add(reinterpret_cast<const UChar*>(str.data()), str.size()); -} + UString::Rep* string = identifier.ustring().rep(); + unsigned length = string->size(); + if (!length) + return StringImpl::empty(); -AtomicString::operator Identifier() const -{ - return m_string; + HashAndCharacters buffer = { string->computedHash(), string->data(), length }; + pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable()->add<HashAndCharacters, HashAndCharactersTranslator>(buffer); + if (!addResult.second) + return *addResult.first; + return adoptRef(*addResult.first); } -AtomicString::operator UString() const +PassRefPtr<StringImpl> AtomicString::add(const JSC::UString& ustring) { - return m_string; + if (ustring.isNull()) + return 0; + + UString::Rep* string = ustring.rep(); + unsigned length = string->size(); + if (!length) + return StringImpl::empty(); + + HashAndCharacters buffer = { string->hash(), string->data(), length }; + pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable()->add<HashAndCharacters, HashAndCharactersTranslator>(buffer); + if (!addResult.second) + return *addResult.first; + return adoptRef(*addResult.first); } +#endif -AtomicString::AtomicString(const DeprecatedString& s) - : m_string(add(reinterpret_cast<const UChar*>(s.unicode()), s.length())) +AtomicStringImpl* AtomicString::find(const JSC::Identifier& identifier) { + if (identifier.isNull()) + return 0; + + UString::Rep* string = identifier.ustring().rep(); + unsigned length = string->size(); + if (!length) + return static_cast<AtomicStringImpl*>(StringImpl::empty()); + + HashAndCharacters buffer = { string->computedHash(), string->data(), length }; + HashSet<StringImpl*>::iterator iterator = stringTable()->find<HashAndCharacters, HashAndCharactersTranslator>(buffer); + if (iterator == stringTable()->end()) + return 0; + return static_cast<AtomicStringImpl*>(*iterator); } -DeprecatedString AtomicString::deprecatedString() const +AtomicString::operator UString() const { - return m_string.deprecatedString(); + return m_string; } DEFINE_GLOBAL(AtomicString, nullAtom) @@ -222,7 +303,14 @@ void AtomicString::init() { static bool initialized; if (!initialized) { - stringTable = new HashSet<StringImpl*>; + // Initialization is not thread safe, so this function must be called from the main thread first. + ASSERT(isMainThread()); + +#if ENABLE(WORKERS) + staticStringTable = new ThreadSpecific<HashSet<StringImpl*> >; +#else + staticStringTable = new HashSet<StringImpl*>; +#endif // Use placement new to initialize the globals. new ((void*)&nullAtom) AtomicString; diff --git a/WebCore/platform/text/AtomicString.h b/WebCore/platform/text/AtomicString.h index 4a0bb5b..ad034d9 100644 --- a/WebCore/platform/text/AtomicString.h +++ b/WebCore/platform/text/AtomicString.h @@ -1,7 +1,5 @@ /* - * This file is part of the DOM implementation for KDE. - * - * Copyright (C) 2004, 2005, 2006 Apple Computer, Inc. + * Copyright (C) 2004, 2005, 2006, 2008 Apple Inc. All rights reserved. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public @@ -28,6 +26,8 @@ namespace WebCore { +struct AtomicStringHash; + class AtomicString { public: static void init(); @@ -36,17 +36,28 @@ public: AtomicString(const char* s) : m_string(add(s)) { } AtomicString(const UChar* s, int length) : m_string(add(s, length)) { } AtomicString(const UChar* s) : m_string(add(s)) { } - AtomicString(const KJS::UString& s) : m_string(add(s)) { } - AtomicString(const KJS::Identifier& s) : m_string(add(s)) { } +#if USE(JSC) + AtomicString(const JSC::UString& s) : m_string(add(s)) { } + AtomicString(const JSC::Identifier& s) : m_string(add(s)) { } +#endif AtomicString(StringImpl* imp) : m_string(add(imp)) { } AtomicString(AtomicStringImpl* imp) : m_string(imp) { } AtomicString(const String& s) : m_string(add(s.impl())) { } + // Hash table deleted values, which are only constructed and never copied or destroyed. + AtomicString(WTF::HashTableDeletedValueType) : m_string(WTF::HashTableDeletedValue) { } + bool isHashTableDeletedValue() const { return m_string.isHashTableDeletedValue(); } + +#if USE(JSC) + static AtomicStringImpl* find(const JSC::Identifier&); +#endif + operator const String&() const { return m_string; } - const String& domString() const { return m_string; }; + const String& string() const { return m_string; }; - operator KJS::Identifier() const; - operator KJS::UString() const; +#if USE(JSC) + operator JSC::UString() const; +#endif AtomicStringImpl* impl() const { return static_cast<AtomicStringImpl *>(m_string.impl()); } @@ -57,23 +68,21 @@ public: bool contains(UChar c) const { return m_string.contains(c); } bool contains(const AtomicString& s, bool caseSensitive = true) const - { return m_string.contains(s.domString(), caseSensitive); } + { return m_string.contains(s.string(), caseSensitive); } int find(UChar c, int start = 0) const { return m_string.find(c, start); } int find(const AtomicString& s, int start = 0, bool caseSentitive = true) const - { return m_string.find(s.domString(), start, caseSentitive); } + { return m_string.find(s.string(), start, caseSentitive); } bool startsWith(const AtomicString& s, bool caseSensitive = true) const - { return m_string.startsWith(s.domString(), caseSensitive); } + { return m_string.startsWith(s.string(), caseSensitive); } bool endsWith(const AtomicString& s, bool caseSensitive = true) const - { return m_string.endsWith(s.domString(), caseSensitive); } + { return m_string.endsWith(s.string(), caseSensitive); } int toInt(bool* ok = 0) const { return m_string.toInt(ok); } double toDouble(bool* ok = 0) const { return m_string.toDouble(ok); } float toFloat(bool* ok = 0) const { return m_string.toFloat(ok); } bool percentage(int& p) const { return m_string.percentage(p); } - Length* toLengthArray(int& len) const { return m_string.toLengthArray(len); } - Length* toCoordsArray(int& len) const { return m_string.toCoordsArray(len); } bool isNull() const { return m_string.isNull(); } bool isEmpty() const { return m_string.isEmpty(); } @@ -93,18 +102,17 @@ public: operator QString() const { return m_string; } #endif - AtomicString(const DeprecatedString&); - DeprecatedString deprecatedString() const; - private: String m_string; - static StringImpl* add(const char*); - static StringImpl* add(const UChar*, int length); - static StringImpl* add(const UChar*); - static StringImpl* add(StringImpl*); - static StringImpl* add(const KJS::UString&); - static StringImpl* add(const KJS::Identifier&); + static PassRefPtr<StringImpl> add(const char*); + static PassRefPtr<StringImpl> add(const UChar*, int length); + static PassRefPtr<StringImpl> add(const UChar*); + static PassRefPtr<StringImpl> add(StringImpl*); +#if USE(JSC) + static PassRefPtr<StringImpl> add(const JSC::UString&); + static PassRefPtr<StringImpl> add(const JSC::Identifier&); +#endif }; inline bool operator==(const AtomicString& a, const AtomicString& b) { return a.impl() == b.impl(); } @@ -126,6 +134,7 @@ inline bool equalIgnoringCase(const char* a, const AtomicString& b) { return equ inline bool equalIgnoringCase(const String& a, const AtomicString& b) { return equalIgnoringCase(a.impl(), b.impl()); } // Define external global variables for the commonly used atomic strings. +// These are only usable from the main thread. #ifndef ATOMICSTRING_HIDE_GLOBALS extern const AtomicString nullAtom; extern const AtomicString emptyAtom; @@ -136,4 +145,15 @@ inline bool equalIgnoringCase(const String& a, const AtomicString& b) { return e } // namespace WebCore + +namespace WTF { + + // AtomicStringHash is the default hash for AtomicString + template<typename T> struct DefaultHash; + template<> struct DefaultHash<WebCore::AtomicString> { + typedef WebCore::AtomicStringHash Hash; + }; + +} // namespace WTF + #endif // AtomicString_h diff --git a/WebCore/platform/text/AtomicStringHash.h b/WebCore/platform/text/AtomicStringHash.h new file mode 100644 index 0000000..67a45de --- /dev/null +++ b/WebCore/platform/text/AtomicStringHash.h @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2008 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef AtomicStringHash_h +#define AtomicStringHash_h + +#include "AtomicString.h" +#include <wtf/HashTraits.h> + +namespace WebCore { + + struct AtomicStringHash { + static unsigned hash(const AtomicString& key) + { + return key.impl()->existingHash(); + } + + static bool equal(const AtomicString& a, const AtomicString& b) + { + return a == b; + } + + static const bool safeToCompareToEmptyOrDeleted = false; + }; + +} + +namespace WTF { + + // WebCore::AtomicStringHash is the default hash for AtomicString + template<> struct HashTraits<WebCore::AtomicString> : GenericHashTraits<WebCore::AtomicString> { + static const bool emptyValueIsZero = true; + static void constructDeletedValue(WebCore::AtomicString& slot) { new (&slot) WebCore::AtomicString(HashTableDeletedValue); } + static bool isDeletedValue(const WebCore::AtomicString& slot) { return slot.isHashTableDeletedValue(); } + }; + +} + +#endif diff --git a/WebCore/platform/text/BidiResolver.h b/WebCore/platform/text/BidiResolver.h index d2515a9..ffd3d51 100644 --- a/WebCore/platform/text/BidiResolver.h +++ b/WebCore/platform/text/BidiResolver.h @@ -1,6 +1,6 @@ /* * Copyright (C) 2000 Lars Knoll (knoll@kde.org) - * Copyright (C) 2003, 2004, 2006, 2007 Apple Inc. All right reserved. + * Copyright (C) 2003, 2004, 2006, 2007, 2008 Apple Inc. All right reserved. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public @@ -23,7 +23,9 @@ #define BidiResolver_h #include "BidiContext.h" +#include <wtf/Noncopyable.h> #include <wtf/PassRefPtr.h> +#include <wtf/Vector.h> namespace WebCore { @@ -85,6 +87,8 @@ struct BidiCharacterRun { } } + void destroy() { delete this; } + int start() const { return m_start; } int stop() const { return m_stop; } unsigned char level() const { return m_level; } @@ -100,19 +104,24 @@ struct BidiCharacterRun { BidiCharacterRun* m_next; }; -template <class Iterator, class Run> class BidiResolver { +template <class Iterator, class Run> class BidiResolver : public Noncopyable { public : BidiResolver() : m_direction(WTF::Unicode::OtherNeutral) - , m_adjustEmbedding(false) , reachedEndOfLine(false) , emptyRun(true) , m_firstRun(0) , m_lastRun(0) + , m_logicallyLastRun(0) , m_runCount(0) { } + const Iterator& position() const { return current; } + void setPosition(const Iterator& position) { current = position; } + + void increment() { current.increment(); } + BidiContext* context() const { return m_status.context.get(); } void setContext(PassRefPtr<BidiContext> c) { m_status.context = c; } @@ -126,22 +135,27 @@ public : const BidiStatus& status() const { return m_status; } void setStatus(const BidiStatus s) { m_status = s; } - bool adjustEmbedding() const { return m_adjustEmbedding; } - void setAdjustEmbedding(bool adjsutEmbedding) { m_adjustEmbedding = adjsutEmbedding; } - void embed(WTF::Unicode::Direction); - void createBidiRunsForLine(const Iterator& start, const Iterator& end, bool visualOrder = false, bool hardLineBreak = false); + void commitExplicitEmbedding(); + + void createBidiRunsForLine(const Iterator& end, bool visualOrder = false, bool hardLineBreak = false); Run* firstRun() const { return m_firstRun; } Run* lastRun() const { return m_lastRun; } - int runCount() const { return m_runCount; } + Run* logicallyLastRun() const { return m_logicallyLastRun; } + unsigned runCount() const { return m_runCount; } void addRun(Run*); + void prependRun(Run*); + + void moveRunToEnd(Run*); + void moveRunToBeginning(Run*); + void deleteRuns(); protected: void appendRun(); - void reverseRuns(int start, int end); + void reverseRuns(unsigned start, unsigned end); Iterator current; Iterator sor; @@ -149,7 +163,6 @@ protected: Iterator last; BidiStatus m_status; WTF::Unicode::Direction m_direction; - bool m_adjustEmbedding; Iterator endOfLine; bool reachedEndOfLine; Iterator lastBeforeET; @@ -157,25 +170,96 @@ protected: Run* m_firstRun; Run* m_lastRun; - int m_runCount; + Run* m_logicallyLastRun; + unsigned m_runCount; + +private: + void raiseExplicitEmbeddingLevel(WTF::Unicode::Direction from, WTF::Unicode::Direction to); + void lowerExplicitEmbeddingLevel(WTF::Unicode::Direction from); + + Vector<WTF::Unicode::Direction, 8> m_currentExplicitEmbeddingSequence; }; template <class Iterator, class Run> -void BidiResolver<Iterator, Run>::appendRun() +inline void BidiResolver<Iterator, Run>::addRun(Run* run) { - if (emptyRun || eor.atEnd()) - return; - - Run* bidiRun = new Run(sor.offset(), eor.offset() + 1, context(), m_direction); if (!m_firstRun) - m_firstRun = bidiRun; + m_firstRun = run; + else + m_lastRun->m_next = run; + m_lastRun = run; + m_runCount++; +} + +template <class Iterator, class Run> +inline void BidiResolver<Iterator, Run>::prependRun(Run* run) +{ + ASSERT(!run->m_next); + + if (!m_lastRun) + m_lastRun = run; else - m_lastRun->m_next = bidiRun; - m_lastRun = bidiRun; + run->m_next = m_firstRun; + m_firstRun = run; m_runCount++; +} + +template <class Iterator, class Run> +inline void BidiResolver<Iterator, Run>::moveRunToEnd(Run* run) +{ + ASSERT(m_firstRun); + ASSERT(m_lastRun); + ASSERT(run->m_next); + + Run* current = 0; + Run* next = m_firstRun; + while (next != run) { + current = next; + next = current->next(); + } + + if (!current) + m_firstRun = run->next(); + else + current->m_next = run->m_next; + + run->m_next = 0; + m_lastRun->m_next = run; + m_lastRun = run; +} + +template <class Iterator, class Run> +inline void BidiResolver<Iterator, Run>::moveRunToBeginning(Run* run) +{ + ASSERT(m_firstRun); + ASSERT(m_lastRun); + ASSERT(run != m_firstRun); + + Run* current = m_firstRun; + Run* next = current->next(); + while (next != run) { + current = next; + next = current->next(); + } + + current->m_next = run->m_next; + if (run == m_lastRun) + m_lastRun = current; + + run->m_next = m_firstRun; + m_firstRun = run; +} + +template <class Iterator, class Run> +void BidiResolver<Iterator, Run>::appendRun() +{ + if (!emptyRun && !eor.atEnd()) { + addRun(new Run(sor.offset(), eor.offset() + 1, context(), m_direction)); + + eor.increment(); + sor = eor; + } - eor.increment(*this); - sor = eor; m_direction = WTF::Unicode::OtherNeutral; m_status.eor = WTF::Unicode::OtherNeutral; } @@ -185,122 +269,144 @@ void BidiResolver<Iterator, Run>::embed(WTF::Unicode::Direction d) { using namespace WTF::Unicode; - bool b = m_adjustEmbedding; - m_adjustEmbedding = false; - if (d == PopDirectionalFormat) { - BidiContext* c = context()->parent(); - if (c) { - if (!emptyRun && eor != last) { - ASSERT(m_status.eor != OtherNeutral || eor.atEnd()); - // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last - ASSERT(m_status.last == EuropeanNumberSeparator - || m_status.last == EuropeanNumberTerminator - || m_status.last == CommonNumberSeparator - || m_status.last == BoundaryNeutral - || m_status.last == BlockSeparator - || m_status.last == SegmentSeparator - || m_status.last == WhiteSpaceNeutral - || m_status.last == OtherNeutral); - if (m_direction == OtherNeutral) - m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft; - if (context()->dir() == LeftToRight) { - // bidi.sor ... bidi.eor ... bidi.last L - if (m_status.eor == EuropeanNumber) { - if (m_status.lastStrong != LeftToRight) { - m_direction = EuropeanNumber; - appendRun(); - } - } else if (m_status.eor == ArabicNumber) { - m_direction = ArabicNumber; - appendRun(); - } else if (m_status.lastStrong != LeftToRight) { - if (context()->dir() == RightToLeft) - m_direction = RightToLeft; - else { - appendRun(); - m_direction = LeftToRight; - } - } - } else if (m_status.eor == EuropeanNumber || m_status.eor == ArabicNumber || m_status.lastStrong == LeftToRight) { + ASSERT(d == PopDirectionalFormat || d == LeftToRightEmbedding || d == LeftToRightOverride || d == RightToLeftEmbedding || d == RightToLeftOverride); + m_currentExplicitEmbeddingSequence.append(d); +} + +template <class Iterator, class Run> +void BidiResolver<Iterator, Run>::lowerExplicitEmbeddingLevel(WTF::Unicode::Direction from) +{ + using namespace WTF::Unicode; + + if (!emptyRun && eor != last) { + ASSERT(m_status.eor != OtherNeutral || eor.atEnd()); + // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last + ASSERT(m_status.last == EuropeanNumberSeparator + || m_status.last == EuropeanNumberTerminator + || m_status.last == CommonNumberSeparator + || m_status.last == BoundaryNeutral + || m_status.last == BlockSeparator + || m_status.last == SegmentSeparator + || m_status.last == WhiteSpaceNeutral + || m_status.last == OtherNeutral); + if (m_direction == OtherNeutral) + m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft; + if (from == LeftToRight) { + // bidi.sor ... bidi.eor ... bidi.last L + if (m_status.eor == EuropeanNumber) { + if (m_status.lastStrong != LeftToRight) { + m_direction = EuropeanNumber; appendRun(); - m_direction = RightToLeft; } - eor = last; + } else if (m_status.eor == ArabicNumber) { + m_direction = ArabicNumber; + appendRun(); + } else if (m_status.lastStrong != LeftToRight) { + appendRun(); + m_direction = LeftToRight; } + } else if (m_status.eor == EuropeanNumber || m_status.eor == ArabicNumber || m_status.lastStrong == LeftToRight) { appendRun(); - emptyRun = true; - // sor for the new run is determined by the higher level (rule X10) - setLastDir(context()->dir()); - setLastStrongDir(context()->dir()); - setContext(c); - eor = Iterator(); - } - } else { - Direction runDir; - if (d == RightToLeftEmbedding || d == RightToLeftOverride) - runDir = RightToLeft; - else - runDir = LeftToRight; - bool override = d == LeftToRightOverride || d == RightToLeftOverride; - - unsigned char level = context()->level(); - if (runDir == RightToLeft) { - if (level % 2) // we have an odd level - level += 2; - else - level++; - } else { - if (level % 2) // we have an odd level - level++; - else - level += 2; + m_direction = RightToLeft; } + eor = last; + } + appendRun(); + emptyRun = true; + // sor for the new run is determined by the higher level (rule X10) + setLastDir(from); + setLastStrongDir(from); + eor = Iterator(); +} - if (level < 61) { - if (!emptyRun && eor != last) { - ASSERT(m_status.eor != OtherNeutral || eor.atEnd()); - // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last - ASSERT(m_status.last == EuropeanNumberSeparator - || m_status.last == EuropeanNumberTerminator - || m_status.last == CommonNumberSeparator - || m_status.last == BoundaryNeutral - || m_status.last == BlockSeparator - || m_status.last == SegmentSeparator - || m_status.last == WhiteSpaceNeutral - || m_status.last == OtherNeutral); - if (m_direction == OtherNeutral) - m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft; - if (runDir == LeftToRight) { - // bidi.sor ... bidi.eor ... bidi.last L - if (m_status.eor == EuropeanNumber) { - if (m_status.lastStrong != LeftToRight) { - m_direction = EuropeanNumber; - appendRun(); - } - } else if (m_status.eor == ArabicNumber) { - m_direction = ArabicNumber; - appendRun(); - } else if (m_status.lastStrong != LeftToRight && context()->dir() == LeftToRight) { - appendRun(); - m_direction = LeftToRight; - } - } else if (m_status.eor == ArabicNumber - || m_status.eor == EuropeanNumber && (m_status.lastStrong != LeftToRight || context()->dir() == RightToLeft) - || m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && context()->dir() == RightToLeft) { +template <class Iterator, class Run> +void BidiResolver<Iterator, Run>::raiseExplicitEmbeddingLevel(WTF::Unicode::Direction from, WTF::Unicode::Direction to) +{ + using namespace WTF::Unicode; + + if (!emptyRun && eor != last) { + ASSERT(m_status.eor != OtherNeutral || eor.atEnd()); + // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last + ASSERT(m_status.last == EuropeanNumberSeparator + || m_status.last == EuropeanNumberTerminator + || m_status.last == CommonNumberSeparator + || m_status.last == BoundaryNeutral + || m_status.last == BlockSeparator + || m_status.last == SegmentSeparator + || m_status.last == WhiteSpaceNeutral + || m_status.last == OtherNeutral); + if (m_direction == OtherNeutral) + m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft; + if (to == LeftToRight) { + // bidi.sor ... bidi.eor ... bidi.last L + if (m_status.eor == EuropeanNumber) { + if (m_status.lastStrong != LeftToRight) { + m_direction = EuropeanNumber; appendRun(); - m_direction = RightToLeft; } - eor = last; + } else if (m_status.eor == ArabicNumber) { + m_direction = ArabicNumber; + appendRun(); + } else if (m_status.lastStrong != LeftToRight && from == LeftToRight) { + appendRun(); + m_direction = LeftToRight; } + } else if (m_status.eor == ArabicNumber + || m_status.eor == EuropeanNumber && (m_status.lastStrong != LeftToRight || from == RightToLeft) + || m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && from == RightToLeft) { appendRun(); - emptyRun = true; - setContext(new BidiContext(level, runDir, override, context())); - setLastDir(runDir); - setLastStrongDir(runDir); - eor = Iterator(); + m_direction = RightToLeft; + } + eor = last; + } + appendRun(); + emptyRun = true; + setLastDir(to); + setLastStrongDir(to); + eor = Iterator(); +} + +template <class Iterator, class Run> +void BidiResolver<Iterator, Run>::commitExplicitEmbedding() +{ + using namespace WTF::Unicode; + + unsigned char fromLevel = context()->level(); + RefPtr<BidiContext> toContext = context(); + + for (size_t i = 0; i < m_currentExplicitEmbeddingSequence.size(); ++i) { + Direction embedding = m_currentExplicitEmbeddingSequence[i]; + if (embedding == PopDirectionalFormat) { + if (BidiContext* parentContext = toContext->parent()) + toContext = parentContext; + } else { + Direction direction = (embedding == RightToLeftEmbedding || embedding == RightToLeftOverride) ? RightToLeft : LeftToRight; + bool override = embedding == LeftToRightOverride || embedding == RightToLeftOverride; + unsigned char level = toContext->level(); + if (direction == RightToLeft) { + // Go to the least greater odd integer + level += 1; + level |= 1; + } else { + // Go to the least greater even integer + level += 2; + level &= ~1; + } + if (level < 61) + toContext = new BidiContext(level, direction, override, toContext.get()); } } - m_adjustEmbedding = b; + + unsigned char toLevel = toContext->level(); + + if (toLevel > fromLevel) + raiseExplicitEmbeddingLevel(fromLevel % 2 ? RightToLeft : LeftToRight, toLevel % 2 ? RightToLeft : LeftToRight); + else if (toLevel < fromLevel) + lowerExplicitEmbeddingLevel(fromLevel % 2 ? RightToLeft : LeftToRight); + + setContext(toContext); + + m_currentExplicitEmbeddingSequence.clear(); } template <class Iterator, class Run> @@ -312,8 +418,8 @@ void BidiResolver<Iterator, Run>::deleteRuns() Run* curr = m_firstRun; while (curr) { - Run* s = curr->m_next; - delete curr; + Run* s = curr->next(); + curr->destroy(); curr = s; } @@ -323,18 +429,18 @@ void BidiResolver<Iterator, Run>::deleteRuns() } template <class Iterator, class Run> -void BidiResolver<Iterator, Run>::reverseRuns(int start, int end) +void BidiResolver<Iterator, Run>::reverseRuns(unsigned start, unsigned end) { if (start >= end) return; - ASSERT(start >= 0 && end < m_runCount); + ASSERT(end < m_runCount); // Get the item before the start of the runs to reverse and put it in // |beforeStart|. |curr| should point to the first run to reverse. Run* curr = m_firstRun; Run* beforeStart = 0; - int i = 0; + unsigned i = 0; while (i < start) { i++; beforeStart = curr; @@ -373,7 +479,7 @@ void BidiResolver<Iterator, Run>::reverseRuns(int start, int end) } template <class Iterator, class Run> -void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& start, const Iterator& end, bool visualOrder, bool hardLineBreak) +void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& end, bool visualOrder, bool hardLineBreak) { using namespace WTF::Unicode; @@ -383,7 +489,6 @@ void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& start, c eor = Iterator(); - current = start; last = current; bool pastEnd = false; BidiResolver<Iterator, Run> stateAtEnd; @@ -426,6 +531,7 @@ void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& start, c case LeftToRightOverride: case PopDirectionalFormat: embed(dirCurrent); + commitExplicitEmbedding(); break; // strong types @@ -684,12 +790,11 @@ void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& start, c } appendRun(); } + current = end; m_status = stateAtEnd.m_status; - current = stateAtEnd.current; sor = stateAtEnd.sor; eor = stateAtEnd.eor; last = stateAtEnd.last; - m_adjustEmbedding = stateAtEnd.m_adjustEmbedding; reachedEndOfLine = stateAtEnd.reachedEndOfLine; lastBeforeET = stateAtEnd.lastBeforeET; emptyRun = stateAtEnd.emptyRun; @@ -747,11 +852,10 @@ void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& start, c emptyRun = false; } - // this causes the operator ++ to open and close embedding levels as needed - // for the CSS unicode-bidi property - m_adjustEmbedding = true; - current.increment(*this); - m_adjustEmbedding = false; + increment(); + if (!m_currentExplicitEmbeddingSequence.isEmpty()) + commitExplicitEmbedding(); + if (emptyRun && (dirCurrent == RightToLeftEmbedding || dirCurrent == LeftToRightEmbedding || dirCurrent == RightToLeftOverride @@ -766,12 +870,20 @@ void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& start, c if (!pastEnd && (current == end || current.atEnd())) { if (emptyRun) break; - stateAtEnd = *this; + stateAtEnd.m_status = m_status; + stateAtEnd.sor = sor; + stateAtEnd.eor = eor; + stateAtEnd.last = last; + stateAtEnd.reachedEndOfLine = reachedEndOfLine; + stateAtEnd.lastBeforeET = lastBeforeET; + stateAtEnd.emptyRun = emptyRun; endOfLine = last; pastEnd = true; } } + m_logicallyLastRun = m_lastRun; + // reorder line according to run structure... // do not reverse for visually ordered web sites if (!visualOrder) { @@ -796,22 +908,22 @@ void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& start, c if (!(levelLow % 2)) levelLow++; - int count = runCount() - 1; + unsigned count = runCount() - 1; while (levelHigh >= levelLow) { - int i = 0; + unsigned i = 0; Run* currRun = firstRun(); while (i < count) { while (i < count && currRun && currRun->m_level < levelHigh) { i++; currRun = currRun->next(); } - int start = i; + unsigned start = i; while (i <= count && currRun && currRun->m_level >= levelHigh) { i++; currRun = currRun->next(); } - int end = i-1; + unsigned end = i - 1; reverseRuns(start, end); } levelHigh--; diff --git a/WebCore/platform/text/CString.cpp b/WebCore/platform/text/CString.cpp index 4300b29..8e68628 100644 --- a/WebCore/platform/text/CString.cpp +++ b/WebCore/platform/text/CString.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2003, 2006 Apple Computer, Inc. All rights reserved. + * Copyright (C) 2003, 2006, 2008 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -26,7 +26,8 @@ #include "config.h" #include "CString.h" -#include "DeprecatedCString.h" + +using std::min; namespace WebCore { @@ -40,17 +41,12 @@ CString::CString(const char* str, unsigned length) init(str, length); } -CString::CString(const DeprecatedCString& str) -{ - init(str.data(), str.length()); -} - void CString::init(const char* str, unsigned length) { if (!str) return; - m_buffer = new CStringBuffer(length + 1); + m_buffer = CStringBuffer::create(length + 1); memcpy(m_buffer->data(), str, length); m_buffer->data()[length] = '\0'; } @@ -72,16 +68,11 @@ unsigned CString::length() const { return m_buffer ? m_buffer->length() - 1 : 0; } - -DeprecatedCString CString::deprecatedCString() const -{ - return DeprecatedCString(data(), length() + 1); -} CString CString::newUninitialized(size_t length, char*& characterBuffer) { CString result; - result.m_buffer = new CStringBuffer(length + 1); + result.m_buffer = CStringBuffer::create(length + 1); char* bytes = result.m_buffer->data(); bytes[length] = '\0'; characterBuffer = bytes; @@ -95,7 +86,7 @@ void CString::copyBufferIfNeeded() int len = m_buffer->length(); RefPtr<CStringBuffer> m_temp = m_buffer; - m_buffer = new CStringBuffer(len); + m_buffer = CStringBuffer::create(len); memcpy(m_buffer->data(), m_temp->data(), len); } @@ -108,4 +99,17 @@ bool operator==(const CString& a, const CString& b) return !strncmp(a.data(), b.data(), min(a.length(), b.length())); } +PassRefPtr<SharedBuffer> CString::releaseBuffer() +{ + if (!m_buffer) + return 0; + + copyBufferIfNeeded(); + + RefPtr<SharedBuffer> result = m_buffer->releaseBuffer(); + m_buffer = 0; + return result.release(); +} + + } diff --git a/WebCore/platform/text/CString.h b/WebCore/platform/text/CString.h index bd1e06c..09f112f 100644 --- a/WebCore/platform/text/CString.h +++ b/WebCore/platform/text/CString.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2003, 2006 Apple Computer, Inc. All rights reserved. + * Copyright (C) 2003, 2006, 2008 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -26,23 +26,26 @@ #ifndef CString_h #define CString_h +#include "SharedBuffer.h" + +#include <wtf/PassRefPtr.h> #include <wtf/RefCounted.h> #include <wtf/Vector.h> -using std::min; - namespace WebCore { - class DeprecatedCString; - class CStringBuffer : public RefCounted<CStringBuffer> { public: - CStringBuffer(unsigned length) : m_vector(length) { } + static PassRefPtr<CStringBuffer> create(unsigned length) { return adoptRef(new CStringBuffer(length)); } char* data() { return m_vector.data(); } - unsigned length() const { return m_vector.size(); } + size_t length() const { return m_vector.size(); } + + PassRefPtr<SharedBuffer> releaseBuffer() { return SharedBuffer::adoptVector(m_vector); } private: + CStringBuffer(unsigned length) : m_vector(length) { } + Vector<char> m_vector; }; @@ -60,9 +63,8 @@ namespace WebCore { unsigned length() const; bool isNull() const { return !m_buffer; } - - CString(const DeprecatedCString&); - DeprecatedCString deprecatedCString() const; + + PassRefPtr<SharedBuffer> releaseBuffer(); private: void copyBufferIfNeeded(); @@ -73,6 +75,6 @@ namespace WebCore { bool operator==(const CString& a, const CString& b); inline bool operator!=(const CString& a, const CString& b) { return !(a == b); } -} +} // namespace WebCore #endif // CString_h diff --git a/WebCore/platform/text/CharacterNames.h b/WebCore/platform/text/CharacterNames.h index 5b52479..f589a6c 100644 --- a/WebCore/platform/text/CharacterNames.h +++ b/WebCore/platform/text/CharacterNames.h @@ -39,6 +39,8 @@ namespace WebCore { const UChar bullet = 0x2022; const UChar horizontalEllipsis = 0x2026; const UChar ideographicSpace = 0x3000; + const UChar ideographicComma = 0x3001; + const UChar ideographicFullStop = 0x3002; const UChar leftToRightMark = 0x200E; const UChar leftToRightEmbed = 0x202A; const UChar leftToRightOverride = 0x202D; @@ -46,6 +48,7 @@ namespace WebCore { const UChar noBreakSpace = 0x00A0; const UChar objectReplacementCharacter = 0xFFFC; const UChar popDirectionalFormatting = 0x202C; + const UChar replacementCharacter = 0xFFFD; const UChar rightToLeftMark = 0x200F; const UChar rightToLeftEmbed = 0x202B; const UChar rightToLeftOverride = 0x202E; diff --git a/WebCore/platform/text/ParserUtilities.h b/WebCore/platform/text/ParserUtilities.h new file mode 100644 index 0000000..3105214 --- /dev/null +++ b/WebCore/platform/text/ParserUtilities.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2008 Apple Inc. All Rights Reserved. + * Copyright (C) 2002, 2003 The Karbon Developers + * Copyright (C) 2006, 2007 Rob Buis <buis@kde.org> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifndef ParserUtilities_h +#define ParserUtilities_h + +#include "PlatformString.h" + +namespace WebCore { + + inline bool skipString(const UChar*& ptr, const UChar* end, const UChar* name, int length) + { + if (end - ptr < length) + return false; + if (memcmp(name, ptr, sizeof(UChar) * length)) + return false; + ptr += length; + return true; + } + + inline bool skipString(const UChar*& ptr, const UChar* end, const char* str) + { + int length = strlen(str); + if (end - ptr < length) + return false; + for (int i = 0; i < length; ++i) { + if (ptr[i] != str[i]) + return false; + } + ptr += length; + return true; + } + +} // namspace WebCore + +#endif // ParserUtilities_h diff --git a/WebCore/platform/text/PlatformString.h b/WebCore/platform/text/PlatformString.h index f900513..73a44bd 100644 --- a/WebCore/platform/text/PlatformString.h +++ b/WebCore/platform/text/PlatformString.h @@ -27,23 +27,34 @@ #include "StringImpl.h" -#if PLATFORM(CF) +#include <wtf/PassRefPtr.h> + +#if USE(JSC) +#include <kjs/identifier.h> +#else +// kjs/identifier.h includes HashMap.h. We explicitly include it in the case of +// non-JSC builds to keep things consistent. +#include <wtf/HashMap.h> +#endif + +#if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN)) typedef const struct __CFString * CFStringRef; #endif #if PLATFORM(QT) +QT_BEGIN_NAMESPACE class QString; +QT_END_NAMESPACE #endif #if PLATFORM(WX) class wxString; #endif - namespace WebCore { class CString; -class DeprecatedString; +class SharedBuffer; struct StringHash; class String { @@ -51,19 +62,28 @@ public: String() { } // gives null string, distinguishable from an empty string String(const UChar*, unsigned length); String(const UChar*); // Specifically for null terminated UTF-16 - String(const KJS::Identifier&); - String(const KJS::UString&); +#if USE(JSC) + String(const JSC::Identifier&); + String(const JSC::UString&); +#endif String(const char*); String(const char*, unsigned length); String(StringImpl* i) : m_impl(i) { } String(PassRefPtr<StringImpl> i) : m_impl(i) { } String(RefPtr<StringImpl> i) : m_impl(i) { } + void swap(String& o) { m_impl.swap(o.m_impl); } + + // Hash table deleted values, which are only constructed and never copied or destroyed. + String(WTF::HashTableDeletedValueType) : m_impl(WTF::HashTableDeletedValue) { } + bool isHashTableDeletedValue() const { return m_impl.isHashTableDeletedValue(); } + static String adopt(StringBuffer& buffer) { return StringImpl::adopt(buffer); } static String adopt(Vector<UChar>& vector) { return StringImpl::adopt(vector); } - operator KJS::Identifier() const; - operator KJS::UString() const; +#if USE(JSC) + operator JSC::UString() const; +#endif unsigned length() const; const UChar* characters() const; @@ -132,17 +152,24 @@ public: static String format(const char *, ...) WTF_ATTRIBUTE_PRINTF(1, 2); - Vector<String> split(const String& separator, bool allowEmptyEntries = false) const; - Vector<String> split(UChar separator, bool allowEmptyEntries = false) const; + void split(const String& separator, Vector<String>& result) const; + void split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const; + void split(UChar separator, Vector<String>& result) const; + void split(UChar separator, bool allowEmptyEntries, Vector<String>& result) const; + + int toIntStrict(bool* ok = 0, int base = 10) const; + unsigned toUIntStrict(bool* ok = 0, int base = 10) const; + int64_t toInt64Strict(bool* ok = 0, int base = 10) const; + uint64_t toUInt64Strict(bool* ok = 0, int base = 10) const; int toInt(bool* ok = 0) const; + unsigned toUInt(bool* ok = 0) const; int64_t toInt64(bool* ok = 0) const; uint64_t toUInt64(bool* ok = 0) const; double toDouble(bool* ok = 0) const; float toFloat(bool* ok = 0) const; - Length* toLengthArray(int& len) const; - Length* toCoordsArray(int& len) const; - bool percentage(int &_percentage) const; + + bool percentage(int& percentage) const; // Makes a deep copy. Helpful only if you need to use a String on another thread. // Since the underlying StringImpl objects are immutable, there's no other reason @@ -154,7 +181,7 @@ public: StringImpl* impl() const { return m_impl.get(); } -#if PLATFORM(CF) +#if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN)) String(CFStringRef); CFStringRef createCFString() const; #endif @@ -196,10 +223,7 @@ public: // Determines the writing direction using the Unicode Bidi Algorithm rules P2 and P3. WTF::Unicode::Direction defaultWritingDirection() const { return m_impl ? m_impl->defaultWritingDirection() : WTF::Unicode::LeftToRight; } - - String(const DeprecatedString&); - DeprecatedString deprecatedString() const; - + private: RefPtr<StringImpl> m_impl; }; @@ -222,13 +246,30 @@ inline bool equalIgnoringCase(const String& a, const String& b) { return equalIg inline bool equalIgnoringCase(const String& a, const char* b) { return equalIgnoringCase(a.impl(), b); } inline bool equalIgnoringCase(const char* a, const String& b) { return equalIgnoringCase(a, b.impl()); } -bool operator==(const String& a, const DeprecatedString& b); -inline bool operator==(const DeprecatedString& b, const String& a) { return a == b; } -inline bool operator!=(const String& a, const DeprecatedString& b) { return !(a == b); } -inline bool operator!=(const DeprecatedString& b, const String& a ) { return !(a == b); } - inline bool operator!(const String& str) { return str.isNull(); } +inline void swap(String& a, String& b) { a.swap(b); } + +// String Operations + +bool charactersAreAllASCII(const UChar*, size_t); + +int charactersToIntStrict(const UChar*, size_t, bool* ok = 0, int base = 10); +unsigned charactersToUIntStrict(const UChar*, size_t, bool* ok = 0, int base = 10); +int64_t charactersToInt64Strict(const UChar*, size_t, bool* ok = 0, int base = 10); +uint64_t charactersToUInt64Strict(const UChar*, size_t, bool* ok = 0, int base = 10); + +int charactersToInt(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage +unsigned charactersToUInt(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage +int64_t charactersToInt64(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage +uint64_t charactersToUInt64(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage + +double charactersToDouble(const UChar*, size_t, bool* ok = 0); +float charactersToFloat(const UChar*, size_t, bool* ok = 0); + +int find(const UChar*, size_t, UChar, int startPosition = 0); +int reverseFind(const UChar*, size_t, UChar, int startPosition = -1); + #ifdef __OBJC__ // This is for situations in WebKit where the long standing behavior has been // "nil if empty", so we try to maintain longstanding behavior for the sake of @@ -236,8 +277,73 @@ inline bool operator!(const String& str) { return str.isNull(); } inline NSString* nsStringNilIfEmpty(const String& str) { return str.isEmpty() ? nil : (NSString*)str; } #endif +inline bool charactersAreAllASCII(const UChar* characters, size_t length) +{ + UChar ored = 0; + for (size_t i = 0; i < length; ++i) + ored |= characters[i]; + return !(ored & 0xFF80); } +inline int find(const UChar* characters, size_t length, UChar character, int startPosition) +{ + if (startPosition >= static_cast<int>(length)) + return -1; + for (size_t i = startPosition; i < length; ++i) { + if (characters[i] == character) + return static_cast<int>(i); + } + return -1; +} + +inline int reverseFind(const UChar* characters, size_t length, UChar character, int startPosition) +{ + if (startPosition >= static_cast<int>(length) || !length) + return -1; + if (startPosition < 0) + startPosition += static_cast<int>(length); + while (true) { + if (characters[startPosition] == character) + return startPosition; + if (!startPosition) + return -1; + startPosition--; + } + ASSERT_NOT_REACHED(); + return -1; +} + +inline void append(Vector<UChar>& vector, const String& string) +{ + vector.append(string.characters(), string.length()); +} + +inline void appendNumber(Vector<UChar>& vector, unsigned char number) +{ + int numberLength = number > 99 ? 3 : (number > 9 ? 2 : 1); + size_t vectorSize = vector.size(); + vector.grow(vectorSize + numberLength); + + switch (numberLength) { + case 3: + vector[vectorSize + 2] = number % 10 + '0'; + number /= 10; + + case 2: + vector[vectorSize + 1] = number % 10 + '0'; + number /= 10; + + case 1: + vector[vectorSize] = number % 10 + '0'; + } +} + + + +PassRefPtr<SharedBuffer> utf8Buffer(const String&); + +} // namespace WebCore + namespace WTF { // StringHash is the default hash for String diff --git a/WebCore/platform/text/RegularExpression.cpp b/WebCore/platform/text/RegularExpression.cpp index 0c26d33..1b933ff 100644 --- a/WebCore/platform/text/RegularExpression.cpp +++ b/WebCore/platform/text/RegularExpression.cpp @@ -1,5 +1,6 @@ /* - * Copyright (C) 2004 Apple Computer, Inc. All rights reserved. + * Copyright (C) 2004, 2008 Apple Inc. All rights reserved. + * Copyright (C) 2008 Collabora Ltd. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -26,6 +27,7 @@ #include "config.h" #include "RegularExpression.h" +#include "PlatformString.h" #include "Logging.h" #include <wtf/RefCounted.h> #include <pcre/pcre.h> @@ -36,73 +38,47 @@ namespace WebCore { const size_t maxSubstrings = 10; const size_t maxOffsets = 3 * maxSubstrings; -class RegularExpression::Private : public RefCounted<RegularExpression::Private> -{ +class RegularExpression::Private : public RefCounted<Private> { public: - Private(); - Private(DeprecatedString pattern, bool caseSensitive, bool glob); + static PassRefPtr<Private> create() { return adoptRef(new Private); } + static PassRefPtr<Private> create(const String& pattern, bool caseSensitive) { return adoptRef(new Private(pattern, caseSensitive)); } + ~Private(); - void compile(bool caseSensitive, bool glob); + void compile(bool caseSensitive); - DeprecatedString pattern; + String pattern; JSRegExp* regex; - DeprecatedString lastMatchString; + String lastMatchString; int lastMatchOffsets[maxOffsets]; int lastMatchCount; int lastMatchPos; int lastMatchLength; + +private: + Private(); + Private(const String& pattern, bool caseSensitive); }; -RegularExpression::Private::Private() : pattern("") +RegularExpression::Private::Private() + : pattern("") { - compile(true, false); + compile(true); } -RegularExpression::Private::Private(DeprecatedString p, bool caseSensitive, bool glob) : pattern(p), lastMatchPos(-1), lastMatchLength(-1) +RegularExpression::Private::Private(const String& p, bool caseSensitive) + : pattern(p) + , lastMatchPos(-1) + , lastMatchLength(-1) { - compile(caseSensitive, glob); + compile(caseSensitive); } -static DeprecatedString RegExpFromGlob(DeprecatedString glob) +void RegularExpression::Private::compile(bool caseSensitive) { - DeprecatedString result = glob; - - // escape regexp metacharacters which are NOT glob metacharacters - - result.replace(RegularExpression("\\\\"), "\\\\"); - result.replace(RegularExpression("\\."), "\\."); - result.replace(RegularExpression("\\+"), "\\+"); - result.replace(RegularExpression("\\$"), "\\$"); - // FIXME: incorrect for ^ inside bracket group - result.replace(RegularExpression("\\^"), "\\^"); - - // translate glob metacharacters into regexp metacharacters - result.replace(RegularExpression("\\*"), ".*"); - result.replace(RegularExpression("\\?"), "."); - - // Require the glob to match the whole string - result = "^" + result + "$"; - - return result; -} - -void RegularExpression::Private::compile(bool caseSensitive, bool glob) -{ - DeprecatedString p; - - if (glob) { - p = RegExpFromGlob(pattern); - } else { - p = pattern; - } - // Note we don't honor the Qt syntax for various character classes. If we convert - // to a different underlying engine, we may need to change client code that relies - // on the regex syntax (see FrameMac.mm for a couple examples). - const char* errorMessage; - regex = jsRegExpCompile(reinterpret_cast<const UChar*>(p.unicode()), p.length(), + regex = jsRegExpCompile(pattern.characters(), pattern.length(), caseSensitive ? JSRegExpDoNotIgnoreCase : JSRegExpIgnoreCase, JSRegExpSingleLine, 0, &errorMessage); if (!regex) @@ -115,20 +91,24 @@ RegularExpression::Private::~Private() } -RegularExpression::RegularExpression() : d(new RegularExpression::Private()) +RegularExpression::RegularExpression() + : d(Private::create()) { } -RegularExpression::RegularExpression(const DeprecatedString &pattern, bool caseSensitive, bool glob) : d(new RegularExpression::Private(pattern, caseSensitive, glob)) +RegularExpression::RegularExpression(const String& pattern, bool caseSensitive) + : d(Private::create(pattern, caseSensitive)) { } -RegularExpression::RegularExpression(const char *cpattern) : d(new RegularExpression::Private(cpattern, true, false)) +RegularExpression::RegularExpression(const char* pattern) + : d(Private::create(pattern, true)) { } -RegularExpression::RegularExpression(const RegularExpression &re) : d (re.d) +RegularExpression::RegularExpression(const RegularExpression& re) + : d(re.d) { } @@ -136,57 +116,54 @@ RegularExpression::~RegularExpression() { } -RegularExpression &RegularExpression::operator=(const RegularExpression &re) +RegularExpression& RegularExpression::operator=(const RegularExpression& re) { RegularExpression tmp(re); - RefPtr<RegularExpression::Private> tmpD = tmp.d; - - tmp.d = d; - d = tmpD; - + tmp.d.swap(d); return *this; } -DeprecatedString RegularExpression::pattern() const +String RegularExpression::pattern() const { return d->pattern; } -int RegularExpression::match(const DeprecatedString &str, int startFrom, int *matchLength) const +int RegularExpression::match(const String& str, int startFrom, int* matchLength) const { + if (str.isNull()) + return -1; + d->lastMatchString = str; // First 2 offsets are start and end offsets; 3rd entry is used internally by pcre - d->lastMatchCount = jsRegExpExecute(d->regex, reinterpret_cast<const UChar*>(d->lastMatchString.unicode()), d->lastMatchString.length(), startFrom, d->lastMatchOffsets, maxOffsets); + d->lastMatchCount = jsRegExpExecute(d->regex, d->lastMatchString.characters(), + d->lastMatchString.length(), startFrom, d->lastMatchOffsets, maxOffsets); if (d->lastMatchCount < 0) { if (d->lastMatchCount != JSRegExpErrorNoMatch) LOG_ERROR("RegularExpression: pcre_exec() failed with result %d", d->lastMatchCount); d->lastMatchPos = -1; d->lastMatchLength = -1; - d->lastMatchString = DeprecatedString(); + d->lastMatchString = String(); return -1; } // 1 means 1 match; 0 means more than one match. First match is recorded in offsets. - //ASSERT(d->lastMatchCount < 2); d->lastMatchPos = d->lastMatchOffsets[0]; d->lastMatchLength = d->lastMatchOffsets[1] - d->lastMatchOffsets[0]; - if (matchLength != NULL) { + if (matchLength) *matchLength = d->lastMatchLength; - } return d->lastMatchPos; } -int RegularExpression::search(const DeprecatedString &str, int startFrom) const +int RegularExpression::search(const String& str, int startFrom) const { - if (startFrom < 0) { + if (startFrom < 0) startFrom = str.length() - startFrom; - } - return match(str, startFrom, NULL); + return match(str, startFrom, 0); } -int RegularExpression::searchRev(const DeprecatedString &str) const +int RegularExpression::searchRev(const String& str) const { - // FIXME: Total hack for now. Search forward, return the last, greedy match + // FIXME: Total hack for now. Search forward, return the last, greedy match int start = 0; int pos; int lastPos = -1; @@ -195,7 +172,7 @@ int RegularExpression::searchRev(const DeprecatedString &str) const int matchLength; pos = match(str, start, &matchLength); if (pos >= 0) { - if ((pos+matchLength) > (lastPos+lastMatchLength)) { + if (pos + matchLength > lastPos + lastMatchLength) { // replace last match if this one is later and not a subset of the last match lastPos = pos; lastMatchLength = matchLength; @@ -219,4 +196,19 @@ int RegularExpression::matchedLength() const return d->lastMatchLength; } +void replace(String& string, const RegularExpression& target, const String& replacement) +{ + int index = 0; + while (index < static_cast<int>(string.length())) { + int matchLength; + index = target.match(string, index, &matchLength); + if (index < 0) + break; + string.replace(index, matchLength, replacement); + index += replacement.length(); + if (!matchLength) + break; // Avoid infinite loop on 0-length matches, e.g. [a-z]* + } } + +} // namespace WebCore diff --git a/WebCore/platform/text/RegularExpression.h b/WebCore/platform/text/RegularExpression.h index ec1cdef..5d1991e 100644 --- a/WebCore/platform/text/RegularExpression.h +++ b/WebCore/platform/text/RegularExpression.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (C) 2003, 2008 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -26,34 +26,38 @@ #ifndef RegularExpression_h #define RegularExpression_h -#include "DeprecatedString.h" +#include <wtf/RefPtr.h> namespace WebCore { +class String; + class RegularExpression { public: RegularExpression(); - RegularExpression(const DeprecatedString &, bool caseSensitive = false, bool glob = false); - RegularExpression(const char *); + RegularExpression(const String&, bool caseSensitive = false); + RegularExpression(const char*); ~RegularExpression(); - RegularExpression(const RegularExpression &); - RegularExpression &operator=(const RegularExpression &); + RegularExpression(const RegularExpression&); + RegularExpression& operator=(const RegularExpression&); - DeprecatedString pattern() const; - int match(const DeprecatedString &, int startFrom = 0, int *matchLength = 0) const; + String pattern() const; + int match(const String&, int startFrom = 0, int* matchLength = 0) const; - int search(const DeprecatedString &, int startFrom = 0) const; - int searchRev(const DeprecatedString &) const; + int search(const String&, int startFrom = 0) const; + int searchRev(const String&) const; int pos(int n = 0); int matchedLength() const; - + private: class Private; RefPtr<Private> d; }; -} +void replace(String&, const RegularExpression&, const String&); + +} // namespace WebCore -#endif +#endif // RegularExpression_h diff --git a/WebCore/platform/text/SegmentedString.cpp b/WebCore/platform/text/SegmentedString.cpp index 0b3c7e9..9f5eb26 100644 --- a/WebCore/platform/text/SegmentedString.cpp +++ b/WebCore/platform/text/SegmentedString.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2004, 2005, 2006, 2007 Apple Inc. All rights reserved. + Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public @@ -59,10 +59,10 @@ unsigned SegmentedString::length() const ++length; } if (m_composite) { - DeprecatedValueListConstIterator<SegmentedSubstring> i = m_substrings.begin(); - DeprecatedValueListConstIterator<SegmentedSubstring> e = m_substrings.end(); - for (; i != e; ++i) - length += (*i).m_length; + Deque<SegmentedSubstring>::const_iterator it = m_substrings.begin(); + Deque<SegmentedSubstring>::const_iterator e = m_substrings.end(); + for (; it != e; ++it) + length += it->m_length; } return length; } @@ -70,10 +70,10 @@ unsigned SegmentedString::length() const void SegmentedString::setExcludeLineNumbers() { if (m_composite) { - DeprecatedValueListIterator<SegmentedSubstring> i = m_substrings.begin(); - DeprecatedValueListIterator<SegmentedSubstring> e = m_substrings.end(); - for (; i != e; ++i) - (*i).setExcludeLineNumbers(); + Deque<SegmentedSubstring>::iterator it = m_substrings.begin(); + Deque<SegmentedSubstring>::iterator e = m_substrings.end(); + for (; it != e; ++it) + it->setExcludeLineNumbers(); } else m_currentString.setExcludeLineNumbers(); } @@ -120,10 +120,10 @@ void SegmentedString::append(const SegmentedString &s) ASSERT(!s.escaped()); append(s.m_currentString); if (s.m_composite) { - DeprecatedValueListConstIterator<SegmentedSubstring> i = s.m_substrings.begin(); - DeprecatedValueListConstIterator<SegmentedSubstring> e = s.m_substrings.end(); - for (; i != e; ++i) - append(*i); + Deque<SegmentedSubstring>::const_iterator it = s.m_substrings.begin(); + Deque<SegmentedSubstring>::const_iterator e = s.m_substrings.end(); + for (; it != e; ++it) + append(*it); } m_currentChar = m_pushedChar1 ? &m_pushedChar1 : m_currentString.m_current; } @@ -133,10 +133,10 @@ void SegmentedString::prepend(const SegmentedString &s) ASSERT(!escaped()); ASSERT(!s.escaped()); if (s.m_composite) { - DeprecatedValueListConstIterator<SegmentedSubstring> i = s.m_substrings.fromLast(); - DeprecatedValueListConstIterator<SegmentedSubstring> e = s.m_substrings.end(); - for (; i != e; --i) - prepend(*i); + Deque<SegmentedSubstring>::const_reverse_iterator it = s.m_substrings.rbegin(); + Deque<SegmentedSubstring>::const_reverse_iterator e = s.m_substrings.rend(); + for (; it != e; ++it) + prepend(*it); } prepend(s.m_currentString); m_currentChar = m_pushedChar1 ? &m_pushedChar1 : m_currentString.m_current; @@ -146,7 +146,7 @@ void SegmentedString::advanceSubstring() { if (m_composite) { m_currentString = m_substrings.first(); - m_substrings.remove(m_substrings.begin()); + m_substrings.removeFirst(); if (m_substrings.isEmpty()) m_composite = false; } else { @@ -164,10 +164,10 @@ String SegmentedString::toString() const } m_currentString.appendTo(result); if (m_composite) { - DeprecatedValueListConstIterator<SegmentedSubstring> i = m_substrings.begin(); - DeprecatedValueListConstIterator<SegmentedSubstring> e = m_substrings.end(); - for (; i != e; ++i) - (*i).appendTo(result); + Deque<SegmentedSubstring>::const_iterator it = m_substrings.begin(); + Deque<SegmentedSubstring>::const_iterator e = m_substrings.end(); + for (; it != e; ++it) + it->appendTo(result); } return result; } diff --git a/WebCore/platform/text/SegmentedString.h b/WebCore/platform/text/SegmentedString.h index 52178d3..79ed1f0 100644 --- a/WebCore/platform/text/SegmentedString.h +++ b/WebCore/platform/text/SegmentedString.h @@ -20,20 +20,22 @@ #ifndef SegmentedString_h #define SegmentedString_h -#include "DeprecatedValueList.h" #include "PlatformString.h" +#include <wtf/Deque.h> namespace WebCore { class SegmentedString; class SegmentedSubstring { -private: - friend class SegmentedString; - +public: SegmentedSubstring() : m_length(0), m_current(0), m_doNotExcludeLineNumbers(true) {} - SegmentedSubstring(const String& str) : m_string(str), m_length(str.length()), m_doNotExcludeLineNumbers(true) { - m_current = m_length == 0 ? 0 : m_string.characters(); + SegmentedSubstring(const String& str) + : m_length(str.length()) + , m_current(str.isEmpty() ? 0 : str.characters()) + , m_string(str) + , m_doNotExcludeLineNumbers(true) + { } SegmentedSubstring(const UChar* str, int length) : m_length(length), m_current(length == 0 ? 0 : str), m_doNotExcludeLineNumbers(true) {} @@ -45,7 +47,8 @@ private: void setExcludeLineNumbers() { m_doNotExcludeLineNumbers = false; } - void appendTo(String& str) const { + void appendTo(String& str) const + { if (m_string.characters() == m_current) { if (str.isEmpty()) str = m_string; @@ -56,9 +59,12 @@ private: } } - String m_string; +public: int m_length; const UChar* m_current; + +private: + String m_string; bool m_doNotExcludeLineNumbers; }; @@ -77,8 +83,8 @@ public: void clear(); - void append(const SegmentedString &); - void prepend(const SegmentedString &); + void append(const SegmentedString&); + void prepend(const SegmentedString&); bool excludeLineNumbers() const { return m_currentString.excludeLineNumbers(); } void setExcludeLineNumbers(); @@ -149,8 +155,8 @@ public: const UChar* operator->() const { return current(); } private: - void append(const SegmentedSubstring &); - void prepend(const SegmentedSubstring &); + void append(const SegmentedSubstring&); + void prepend(const SegmentedSubstring&); void advanceSlowCase(); void advanceSlowCase(int& lineNumber); @@ -161,7 +167,7 @@ private: UChar m_pushedChar2; SegmentedSubstring m_currentString; const UChar* m_currentChar; - DeprecatedValueList<SegmentedSubstring> m_substrings; + Deque<SegmentedSubstring> m_substrings; bool m_composite; }; diff --git a/WebCore/platform/text/String.cpp b/WebCore/platform/text/String.cpp index 967e7c8..44500e1 100644 --- a/WebCore/platform/text/String.cpp +++ b/WebCore/platform/text/String.cpp @@ -22,16 +22,25 @@ #include "PlatformString.h" #include "CString.h" -#include "DeprecatedString.h" +#include "FloatConversion.h" #include "StringBuffer.h" #include "TextEncoding.h" -#include <kjs/identifier.h> +#include <kjs/dtoa.h> +#include <limits> +#include <stdarg.h> +#include <wtf/ASCIICType.h> #include <wtf/StringExtras.h> #include <wtf/Vector.h> -#include <stdarg.h> +#include <wtf/unicode/Unicode.h> +#include <wtf/unicode/UTF8.h> + +#if USE(JSC) +using JSC::Identifier; +using JSC::UString; +#endif -using KJS::Identifier; -using KJS::UString; +using namespace WTF; +using namespace WTF::Unicode; namespace WebCore { @@ -54,13 +63,6 @@ String::String(const UChar* str) m_impl = StringImpl::create(str, len); } -String::String(const DeprecatedString& str) -{ - if (str.isNull()) - return; - m_impl = StringImpl::create(reinterpret_cast<const UChar*>(str.unicode()), str.length()); -} - String::String(const char* str) { if (!str) @@ -289,7 +291,7 @@ bool String::percentage(int& result) const if ((*m_impl)[m_impl->length() - 1] != '%') return false; - result = DeprecatedConstString(reinterpret_cast<const DeprecatedChar*>(m_impl->characters()), m_impl->length() - 1).string().toInt(); + result = charactersToIntStrict(m_impl->characters(), m_impl->length() - 1); return true; } @@ -310,17 +312,21 @@ const UChar* String::charactersWithNullTermination() return m_impl->characters(); } -DeprecatedString String::deprecatedString() const -{ - if (!m_impl) - return DeprecatedString::null; - if (!m_impl->characters()) - return DeprecatedString("", 0); - return DeprecatedString(reinterpret_cast<const DeprecatedChar*>(m_impl->characters()), m_impl->length()); -} - String String::format(const char *format, ...) { +#if PLATFORM(QT) + // Use QString::vsprintf to avoid the locale dependent formatting of vsnprintf. + // https://bugs.webkit.org/show_bug.cgi?id=18994 + va_list args; + va_start(args, format); + + QString buffer; + buffer.vsprintf(format, args); + + va_end(args); + + return buffer; +#else va_list args; va_start(args, format); @@ -355,6 +361,7 @@ String String::format(const char *format, ...) va_end(args); return StringImpl::create(buffer.data(), len); +#endif } String String::number(int n) @@ -400,6 +407,46 @@ String String::number(double n) return String::format("%.6lg", n); } +int String::toIntStrict(bool* ok, int base) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toIntStrict(ok, base); +} + +unsigned String::toUIntStrict(bool* ok, int base) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toUIntStrict(ok, base); +} + +int64_t String::toInt64Strict(bool* ok, int base) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toInt64Strict(ok, base); +} + +uint64_t String::toUInt64Strict(bool* ok, int base) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toUInt64Strict(ok, base); +} + int String::toInt(bool* ok) const { if (!m_impl) { @@ -410,6 +457,16 @@ int String::toInt(bool* ok) const return m_impl->toInt(ok); } +unsigned String::toUInt(bool* ok) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toUInt(ok); +} + int64_t String::toInt64(bool* ok) const { if (!m_impl) { @@ -435,7 +492,7 @@ double String::toDouble(bool* ok) const if (!m_impl) { if (ok) *ok = false; - return 0; + return 0.0; } return m_impl->toDouble(ok); } @@ -462,38 +519,44 @@ bool String::isEmpty() const return !m_impl || !m_impl->length(); } -Length* String::toCoordsArray(int& len) const +void String::split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const { - return m_impl ? m_impl->toCoordsArray(len) : 0; + result.clear(); + + int startPos = 0; + int endPos; + while ((endPos = find(separator, startPos)) != -1) { + if (allowEmptyEntries || startPos != endPos) + result.append(substring(startPos, endPos - startPos)); + startPos = endPos + separator.length(); + } + if (allowEmptyEntries || startPos != static_cast<int>(length())) + result.append(substring(startPos)); } -Length* String::toLengthArray(int& len) const +void String::split(const String& separator, Vector<String>& result) const { - return m_impl ? m_impl->toLengthArray(len) : 0; + return split(separator, false, result); } -Vector<String> String::split(const String& separator, bool allowEmptyEntries) const +void String::split(UChar separator, bool allowEmptyEntries, Vector<String>& result) const { - Vector<String> result; - + result.clear(); + int startPos = 0; int endPos; while ((endPos = find(separator, startPos)) != -1) { if (allowEmptyEntries || startPos != endPos) result.append(substring(startPos, endPos - startPos)); - startPos = endPos + separator.length(); + startPos = endPos + 1; } - if (allowEmptyEntries || startPos != (int)length()) + if (allowEmptyEntries || startPos != static_cast<int>(length())) result.append(substring(startPos)); - - return result; } -Vector<String> String::split(UChar separator, bool allowEmptyEntries) const +void String::split(UChar separator, Vector<String>& result) const { - Vector<String> result; - - return split(String(&separator, 1), allowEmptyEntries); + return split(String(&separator, 1), false, result); } #ifndef NDEBUG @@ -514,61 +577,247 @@ Vector<char> String::ascii() const CString String::latin1() const { - return Latin1Encoding().encode(characters(), length()); + return Latin1Encoding().encode(characters(), length(), QuestionMarksForUnencodables); } CString String::utf8() const { - return UTF8Encoding().encode(characters(), length()); + return UTF8Encoding().encode(characters(), length(), QuestionMarksForUnencodables); } String String::fromUTF8(const char* string, size_t size) { + if (!string) + return String(); return UTF8Encoding().decode(string, size); } String String::fromUTF8(const char* string) { + if (!string) + return String(); return UTF8Encoding().decode(string, strlen(string)); } - -bool operator==(const String& a, const DeprecatedString& b) -{ - unsigned l = a.length(); - if (l != b.length()) - return false; - if (!memcmp(a.characters(), b.unicode(), l * sizeof(UChar))) - return true; - return false; -} - +#if USE(JSC) String::String(const Identifier& str) { if (str.isNull()) return; - m_impl = StringImpl::create(reinterpret_cast<const UChar*>(str.data()), str.size()); + m_impl = StringImpl::create(str.data(), str.size()); } String::String(const UString& str) { if (str.isNull()) return; - m_impl = StringImpl::create(reinterpret_cast<const UChar*>(str.data()), str.size()); + m_impl = StringImpl::create(str.data(), str.size()); } -String::operator Identifier() const +String::operator UString() const { if (!m_impl) - return Identifier(); - return Identifier(reinterpret_cast<const KJS::UChar*>(m_impl->characters()), m_impl->length()); + return UString(); + return UString(m_impl->characters(), m_impl->length()); } +#endif -String::operator UString() const +// String Operations + +static bool isCharacterAllowedInBase(UChar c, int base) { - if (!m_impl) - return UString(); - return UString(reinterpret_cast<const KJS::UChar*>(m_impl->characters()), m_impl->length()); + if (c > 0x7F) + return false; + if (isASCIIDigit(c)) + return c - '0' < base; + if (isASCIIAlpha(c)) { + if (base > 36) + base = 36; + return (c >= 'a' && c < 'a' + base - 10) + || (c >= 'A' && c < 'A' + base - 10); + } + return false; +} + +template <typename IntegralType> +static inline IntegralType toIntegralType(const UChar* data, size_t length, bool* ok, int base) +{ + static const IntegralType integralMax = std::numeric_limits<IntegralType>::max(); + static const bool isSigned = std::numeric_limits<IntegralType>::is_signed; + const IntegralType maxMultiplier = integralMax / base; + + IntegralType value = 0; + bool isOk = false; + bool isNegative = false; + + if (!data) + goto bye; + + // skip leading whitespace + while (length && isSpaceOrNewline(*data)) { + length--; + data++; + } + + if (isSigned && length && *data == '-') { + length--; + data++; + isNegative = true; + } else if (length && *data == '+') { + length--; + data++; + } + + if (!length || !isCharacterAllowedInBase(*data, base)) + goto bye; + + while (length && isCharacterAllowedInBase(*data, base)) { + length--; + IntegralType digitValue; + UChar c = *data; + if (isASCIIDigit(c)) + digitValue = c - '0'; + else if (c >= 'a') + digitValue = c - 'a' + 10; + else + digitValue = c - 'A' + 10; + + if (value > maxMultiplier || (value == maxMultiplier && digitValue > (integralMax % base) + isNegative)) + goto bye; + + value = base * value + digitValue; + data++; + } + +#if COMPILER(MSVC) +#pragma warning(push, 0) +#pragma warning(disable:4146) +#endif + + if (isNegative) + value = -value; + +#if COMPILER(MSVC) +#pragma warning(pop) +#endif + + // skip trailing space + while (length && isSpaceOrNewline(*data)) { + length--; + data++; + } + + if (!length) + isOk = true; +bye: + if (ok) + *ok = isOk; + return isOk ? value : 0; +} + +static unsigned lengthOfCharactersAsInteger(const UChar* data, size_t length) +{ + size_t i = 0; + + // Allow leading spaces. + for (; i != length; ++i) { + if (!isSpaceOrNewline(data[i])) + break; + } + + // Allow sign. + if (i != length && (data[i] == '+' || data[i] == '-')) + ++i; + + // Allow digits. + for (; i != length; ++i) { + if (!isASCIIDigit(data[i])) + break; + } + + return i; +} + +int charactersToIntStrict(const UChar* data, size_t length, bool* ok, int base) +{ + return toIntegralType<int>(data, length, ok, base); +} + +unsigned charactersToUIntStrict(const UChar* data, size_t length, bool* ok, int base) +{ + return toIntegralType<unsigned>(data, length, ok, base); +} + +int64_t charactersToInt64Strict(const UChar* data, size_t length, bool* ok, int base) +{ + return toIntegralType<int64_t>(data, length, ok, base); +} + +uint64_t charactersToUInt64Strict(const UChar* data, size_t length, bool* ok, int base) +{ + return toIntegralType<uint64_t>(data, length, ok, base); +} + +int charactersToInt(const UChar* data, size_t length, bool* ok) +{ + return toIntegralType<int>(data, lengthOfCharactersAsInteger(data, length), ok, 10); +} + +unsigned charactersToUInt(const UChar* data, size_t length, bool* ok) +{ + return toIntegralType<unsigned>(data, lengthOfCharactersAsInteger(data, length), ok, 10); +} + +int64_t charactersToInt64(const UChar* data, size_t length, bool* ok) +{ + return toIntegralType<int64_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10); +} + +uint64_t charactersToUInt64(const UChar* data, size_t length, bool* ok) +{ + return toIntegralType<uint64_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10); +} + +double charactersToDouble(const UChar* data, size_t length, bool* ok) +{ + if (!length) { + if (ok) + *ok = false; + return 0.0; + } + + Vector<char, 256> bytes(length + 1); + for (unsigned i = 0; i < length; ++i) + bytes[i] = data[i] < 0x7F ? data[i] : '?'; + bytes[length] = '\0'; + char* end; + double val = JSC::strtod(bytes.data(), &end); + if (ok) + *ok = (end == 0 || *end == '\0'); + return val; +} + +float charactersToFloat(const UChar* data, size_t length, bool* ok) +{ + // FIXME: This will return ok even when the string fits into a double but not a float. + return narrowPrecisionToFloat(charactersToDouble(data, length, ok)); +} + +PassRefPtr<SharedBuffer> utf8Buffer(const String& string) +{ + // Allocate a buffer big enough to hold all the characters. + const int length = string.length(); + Vector<char> buffer(length * 3); + + // Convert to runs of 8-bit characters. + char* p = buffer.data(); + const UChar* d = string.characters(); + ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), true); + if (result != conversionOK) + return 0; + + buffer.shrink(p - buffer.data()); + return SharedBuffer::adoptVector(buffer); } } // namespace WebCore diff --git a/WebCore/platform/text/StringBuilder.cpp b/WebCore/platform/text/StringBuilder.cpp new file mode 100644 index 0000000..0e9555c --- /dev/null +++ b/WebCore/platform/text/StringBuilder.cpp @@ -0,0 +1,97 @@ +/* + * Copyright (C) 2008 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "StringBuilder.h" + +#include "StringBuffer.h" + +namespace WebCore { + +void StringBuilder::append(const String& string) +{ + if (string.isNull()) + return; + + if (m_totalLength == UINT_MAX) + m_totalLength = string.length(); + else + m_totalLength += string.length(); + + if (!string.isEmpty()) + m_strings.append(string); +} + +void StringBuilder::append(UChar c) +{ + if (m_totalLength == UINT_MAX) + m_totalLength = 1; + else + m_totalLength += 1; + + m_strings.append(String(&c, 1)); +} + +void StringBuilder::append(char c) +{ + if (m_totalLength == UINT_MAX) + m_totalLength = 1; + else + m_totalLength += 1; + + m_strings.append(String(&c, 1)); +} + +String StringBuilder::toString() const +{ + if (isNull()) + return String(); + + unsigned count = m_strings.size(); + + if (!count) + return String(StringImpl::empty()); + if (count == 1) + return m_strings[0]; + + StringBuffer buffer(m_totalLength); + + UChar* p = buffer.characters(); + for (unsigned i = 0; i < count; ++i) { + StringImpl* string = m_strings[i].impl(); + unsigned length = string->length(); + memcpy(p, string->characters(), length * 2); + p += length; + } + + ASSERT(p == m_totalLength + buffer.characters()); + + return String::adopt(buffer); +} + +} diff --git a/WebCore/platform/text/StringBuilder.h b/WebCore/platform/text/StringBuilder.h new file mode 100644 index 0000000..8d76b9c --- /dev/null +++ b/WebCore/platform/text/StringBuilder.h @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2008 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef StringBuilder_h +#define StringBuilder_h + +#include "PlatformString.h" + +namespace WebCore { + + class StringBuilder { + public: + StringBuilder() : m_totalLength(UINT_MAX) {} + + void setNonNull() { if (m_totalLength == UINT_MAX) m_totalLength = 0; } + + void append(const String&); + void append(UChar); + void append(char); + + String toString() const; + + private: + bool isNull() const { return m_totalLength == UINT_MAX; } + + unsigned m_totalLength; + Vector<String, 16> m_strings; + }; + +} + +#endif diff --git a/WebCore/platform/text/StringHash.h b/WebCore/platform/text/StringHash.h index 375b2e4..c6e08a6 100644 --- a/WebCore/platform/text/StringHash.h +++ b/WebCore/platform/text/StringHash.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2006, 2007 Apple Inc. All rights reserved + * Copyright (C) 2006, 2007, 2008 Apple Inc. All rights reserved * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public @@ -28,6 +28,10 @@ namespace WebCore { + // FIXME: We should really figure out a way to put the computeHash function that's + // currently a member function of StringImpl into this file so we can be a little + // closer to having all the nearly-identical hash functions in one place. + struct StringHash { static unsigned hash(StringImpl* key) { return key->hash(); } static bool equal(StringImpl* a, StringImpl* b) @@ -78,17 +82,17 @@ namespace WebCore { public: // Paul Hsieh's SuperFastHash // http://www.azillionmonkeys.com/qed/hash.html - static unsigned hash(StringImpl* str) + static unsigned hash(const UChar* data, unsigned length) { - unsigned l = str->length(); - const UChar* s = str->characters(); + unsigned l = length; + const UChar* s = data; uint32_t hash = PHI; uint32_t tmp; int rem = l & 1; l >>= 1; - // Main loop + // Main loop. for (; l > 0; l--) { hash += WTF::Unicode::foldCase(s[0]); tmp = (WTF::Unicode::foldCase(s[1]) << 11) ^ hash; @@ -97,28 +101,32 @@ namespace WebCore { hash += hash >> 11; } - // Handle end case + // Handle end case. if (rem) { hash += WTF::Unicode::foldCase(s[0]); hash ^= hash << 11; hash += hash >> 17; } - // Force "avalanching" of final 127 bits + // Force "avalanching" of final 127 bits. hash ^= hash << 3; hash += hash >> 5; hash ^= hash << 2; hash += hash >> 15; hash ^= hash << 10; - // this avoids ever returning a hash code of 0, since that is used to + // This avoids ever returning a hash code of 0, since that is used to // signal "hash not computed yet", using a value that is likely to be - // effectively the same as 0 when the low bits are masked - if (hash == 0) - hash = 0x80000000; + // effectively the same as 0 when the low bits are masked. + hash |= !hash << 31; return hash; } + + static unsigned hash(StringImpl* str) + { + return hash(str->characters(), str->length()); + } static unsigned hash(const char* str, unsigned length) { @@ -160,8 +168,7 @@ namespace WebCore { // this avoids ever returning a hash code of 0, since that is used to // signal "hash not computed yet", using a value that is likely to be // effectively the same as 0 when the low bits are masked - if (hash == 0) - hash = 0x80000000; + hash |= !hash << 31; return hash; } @@ -200,48 +207,34 @@ namespace WebCore { static const bool safeToCompareToEmptyOrDeleted = false; }; + // This hash can be used in cases where the key is a hash of a string, but we don't + // want to store the string. It's not really specific to string hashing, but all our + // current uses of it are for strings. + struct AlreadyHashed : IntHash<unsigned> { + static unsigned hash(unsigned key) { return key; } + + // To use a hash value as a key for a hash table, we need to eliminate the + // "deleted" value, which is negative one. That could be done by changing + // the string hash function to never generate negative one, but this works + // and is still relatively efficient. + static unsigned avoidDeletedValue(unsigned hash) + { + ASSERT(hash); + unsigned newHash = hash | (!(hash + 1) << 31); + ASSERT(newHash); + ASSERT(newHash != 0xFFFFFFFF); + return newHash; + } + }; + } namespace WTF { - // store WebCore::String as StringImpl* - template<> struct HashTraits<WebCore::String> : GenericHashTraits<WebCore::String> { - typedef HashTraits<WebCore::StringImpl*>::StorageTraits StorageTraits; - typedef StorageTraits::TraitType StorageType; static const bool emptyValueIsZero = true; - static const bool needsRef = true; - - typedef union { - WebCore::StringImpl* m_p; - StorageType m_s; - } UnionType; - - static void ref(const StorageType& s) { ref(reinterpret_cast<const UnionType*>(&s)->m_p); } - static void deref(const StorageType& s) { deref(reinterpret_cast<const UnionType*>(&s)->m_p); } - - static void ref(const WebCore::StringImpl* str) { if (str) const_cast<WebCore::StringImpl*>(str)->ref(); } - static void deref(const WebCore::StringImpl* str) { if (str) const_cast<WebCore::StringImpl*>(str)->deref(); } - }; - - // share code between StringImpl*, RefPtr<StringImpl>, and String - - template<> struct HashKeyStorageTraits<WebCore::StringHash, HashTraits<RefPtr<WebCore::StringImpl> > > { - typedef WebCore::StringHash Hash; - typedef HashTraits<WebCore::StringImpl*> Traits; - }; - template<> struct HashKeyStorageTraits<WebCore::StringHash, HashTraits<WebCore::String> > { - typedef WebCore::StringHash Hash; - typedef HashTraits<WebCore::StringImpl*> Traits; - }; - - template<> struct HashKeyStorageTraits<WebCore::CaseFoldingHash, HashTraits<RefPtr<WebCore::StringImpl> > > { - typedef WebCore::CaseFoldingHash Hash; - typedef HashTraits<WebCore::StringImpl*> Traits; - }; - template<> struct HashKeyStorageTraits<WebCore::CaseFoldingHash, HashTraits<WebCore::String> > { - typedef WebCore::CaseFoldingHash Hash; - typedef HashTraits<WebCore::StringImpl*> Traits; + static void constructDeletedValue(WebCore::String& slot) { new (&slot) WebCore::String(HashTableDeletedValue); } + static bool isDeletedValue(const WebCore::String& slot) { return slot.isHashTableDeletedValue(); } }; } diff --git a/WebCore/platform/text/StringImpl.cpp b/WebCore/platform/text/StringImpl.cpp index 0643de6..911c0dc 100644 --- a/WebCore/platform/text/StringImpl.cpp +++ b/WebCore/platform/text/StringImpl.cpp @@ -28,24 +28,18 @@ #include "AtomicString.h" #include "CString.h" #include "CharacterNames.h" -#include "DeprecatedString.h" #include "FloatConversion.h" -#include "Length.h" #include "StringBuffer.h" #include "StringHash.h" #include "TextBreakIterator.h" #include "TextEncoding.h" #include <kjs/dtoa.h> -#include <kjs/identifier.h> #include <wtf/Assertions.h> #include <wtf/unicode/Unicode.h> using namespace WTF; using namespace Unicode; -using KJS::Identifier; -using KJS::UString; - namespace WebCore { static inline UChar* newUCharVector(unsigned n) @@ -60,21 +54,23 @@ static inline void deleteUCharVector(const UChar* p) // This constructor is used only to create the empty string. StringImpl::StringImpl() - : RefCounted<StringImpl>(1) - , m_length(0) + : m_length(0) , m_data(0) , m_hash(0) , m_inTable(false) , m_hasTerminatingNullCharacter(false) { + // Ensure that the hash is computed so that AtomicStringHash can call existingHash() + // with impunity. The empty string is special because it is never entered into + // AtomicString's HashKey, but still needs to compare correctly. + hash(); } // This is one of the most common constructors, but it's also used for the copy() // operation. Because of that, it's the one constructor that doesn't assert the // length is non-zero, since we support copying the empty string. inline StringImpl::StringImpl(const UChar* characters, unsigned length) - : RefCounted<StringImpl>(1) - , m_length(length) + : m_length(length) , m_hash(0) , m_inTable(false) , m_hasTerminatingNullCharacter(false) @@ -85,8 +81,7 @@ inline StringImpl::StringImpl(const UChar* characters, unsigned length) } inline StringImpl::StringImpl(const StringImpl& str, WithTerminatingNullCharacter) - : RefCounted<StringImpl>(1) - , m_length(str.m_length) + : m_length(str.m_length) , m_hash(str.m_hash) , m_inTable(false) , m_hasTerminatingNullCharacter(true) @@ -98,8 +93,7 @@ inline StringImpl::StringImpl(const StringImpl& str, WithTerminatingNullCharacte } inline StringImpl::StringImpl(const char* characters, unsigned length) - : RefCounted<StringImpl>(1) - , m_length(length) + : m_length(length) , m_hash(0) , m_inTable(false) , m_hasTerminatingNullCharacter(false) @@ -116,8 +110,7 @@ inline StringImpl::StringImpl(const char* characters, unsigned length) } inline StringImpl::StringImpl(UChar* characters, unsigned length, AdoptBuffer) - : RefCounted<StringImpl>(1) - , m_length(length) + : m_length(length) , m_data(characters) , m_hash(0) , m_inTable(false) @@ -127,15 +120,9 @@ inline StringImpl::StringImpl(UChar* characters, unsigned length, AdoptBuffer) ASSERT(length); } -// FIXME: These AtomicString constructors return objects with a refCount of 0, -// even though the others return objects with a refCount of 1. That preserves -// the historical behavior for the hash map translator call sites inside the -// AtomicString code, but is it correct? - // This constructor is only for use by AtomicString. StringImpl::StringImpl(const UChar* characters, unsigned length, unsigned hash) - : RefCounted<StringImpl>(0) - , m_length(length) + : m_length(length) , m_hash(hash) , m_inTable(true) , m_hasTerminatingNullCharacter(false) @@ -151,8 +138,7 @@ StringImpl::StringImpl(const UChar* characters, unsigned length, unsigned hash) // This constructor is only for use by AtomicString. StringImpl::StringImpl(const char* characters, unsigned length, unsigned hash) - : RefCounted<StringImpl>(0) - , m_length(length) + : m_length(length) , m_hash(hash) , m_inTable(true) , m_hasTerminatingNullCharacter(false) @@ -178,8 +164,8 @@ StringImpl::~StringImpl() StringImpl* StringImpl::empty() { - static StringImpl e; - return &e; + static StringImpl* e = new StringImpl; + return e; } bool StringImpl::containsOnlyWhitespace() @@ -211,128 +197,6 @@ UChar32 StringImpl::characterStartingAt(unsigned i) return 0; } -static Length parseLength(const UChar* data, unsigned length) -{ - if (length == 0) - return Length(1, Relative); - - unsigned i = 0; - while (i < length && isSpaceOrNewline(data[i])) - ++i; - if (i < length && (data[i] == '+' || data[i] == '-')) - ++i; - while (i < length && Unicode::isDigit(data[i])) - ++i; - - bool ok; - int r = DeprecatedConstString(reinterpret_cast<const DeprecatedChar*>(data), i).string().toInt(&ok); - - /* Skip over any remaining digits, we are not that accurate (5.5% => 5%) */ - while (i < length && (Unicode::isDigit(data[i]) || data[i] == '.')) - ++i; - - /* IE Quirk: Skip any whitespace (20 % => 20%) */ - while (i < length && isSpaceOrNewline(data[i])) - ++i; - - if (ok) { - if (i < length) { - UChar next = data[i]; - if (next == '%') - return Length(static_cast<double>(r), Percent); - if (next == '*') - return Length(r, Relative); - } - return Length(r, Fixed); - } else { - if (i < length) { - UChar next = data[i]; - if (next == '*') - return Length(1, Relative); - if (next == '%') - return Length(1, Relative); - } - } - return Length(0, Relative); -} - -Length StringImpl::toLength() -{ - return parseLength(m_data, m_length); -} - -static int countCharacter(StringImpl* string, UChar character) -{ - int count = 0; - int length = string->length(); - for (int i = 0; i < length; ++i) - count += (*string)[i] == character; - return count; -} - -Length* StringImpl::toCoordsArray(int& len) -{ - StringBuffer spacified(m_length); - for (unsigned i = 0; i < m_length; i++) { - UChar cc = m_data[i]; - if (cc > '9' || (cc < '0' && cc != '-' && cc != '*' && cc != '.')) - spacified[i] = ' '; - else - spacified[i] = cc; - } - RefPtr<StringImpl> str = adopt(spacified); - - str = str->simplifyWhiteSpace(); - - len = countCharacter(str.get(), ' ') + 1; - Length* r = new Length[len]; - - int i = 0; - int pos = 0; - int pos2; - - while ((pos2 = str->find(' ', pos)) != -1) { - r[i++] = parseLength(str->characters() + pos, pos2 - pos); - pos = pos2+1; - } - r[i] = parseLength(str->characters() + pos, str->length() - pos); - - ASSERT(i == len - 1); - - return r; -} - -Length* StringImpl::toLengthArray(int& len) -{ - RefPtr<StringImpl> str = simplifyWhiteSpace(); - if (!str->length()) { - len = 1; - return 0; - } - - len = countCharacter(str.get(), ',') + 1; - Length* r = new Length[len]; - - int i = 0; - int pos = 0; - int pos2; - - while ((pos2 = str->find(',', pos)) != -1) { - r[i++] = parseLength(str->characters() + pos, pos2 - pos); - pos = pos2+1; - } - - ASSERT(i == len - 1); - - /* IE Quirk: If the last comma is the last char skip it and reduce len by one */ - if (str->length()-pos > 0) - r[i] = parseLength(str->characters() + pos, str->length() - pos); - else - len--; - - return r; -} - bool StringImpl::isLower() { // Do a faster loop for the case where all the characters are ASCII. @@ -378,7 +242,7 @@ PassRefPtr<StringImpl> StringImpl::lower() if (!error && realLength == length) return adopt(data); data.resize(realLength); - Unicode::toLower(data.characters(), length, m_data, m_length, &error); + Unicode::toLower(data.characters(), realLength, m_data, m_length, &error); if (error) return this; return adopt(data); @@ -386,10 +250,26 @@ PassRefPtr<StringImpl> StringImpl::lower() PassRefPtr<StringImpl> StringImpl::upper() { + StringBuffer data(m_length); + int32_t length = m_length; + + // Do a faster loop for the case where all the characters are ASCII. + UChar ored = 0; + for (int i = 0; i < length; i++) { + UChar c = m_data[i]; + ored |= c; + data[i] = toASCIIUpper(c); + } + if (!(ored & ~0x7F)) + return adopt(data); + + // Do a slower implementation for cases that include non-ASCII characters. bool error; - int32_t length = Unicode::toUpper(0, 0, m_data, m_length, &error); - StringBuffer data(length); - Unicode::toUpper(data.characters(), length, m_data, m_length, &error); + int32_t realLength = Unicode::toUpper(data.characters(), length, m_data, m_length, &error); + if (!error && realLength == length) + return adopt(data); + data.resize(realLength); + Unicode::toUpper(data.characters(), realLength, m_data, m_length, &error); if (error) return this; return adopt(data); @@ -425,7 +305,7 @@ PassRefPtr<StringImpl> StringImpl::foldCase() if (!error && realLength == length) return adopt(data); data.resize(realLength); - Unicode::foldCase(data.characters(), length, m_data, m_length, &error); + Unicode::foldCase(data.characters(), realLength, m_data, m_length, &error); if (error) return this; return adopt(data); @@ -513,84 +393,54 @@ PassRefPtr<StringImpl> StringImpl::capitalize(UChar previous) return adopt(data); } -int StringImpl::toInt(bool* ok) +int StringImpl::toIntStrict(bool* ok, int base) { - unsigned i = 0; + return charactersToIntStrict(m_data, m_length, ok, base); +} - // Allow leading spaces. - for (; i != m_length; ++i) - if (!isSpaceOrNewline(m_data[i])) - break; - - // Allow sign. - if (i != m_length && (m_data[i] == '+' || m_data[i] == '-')) - ++i; - - // Allow digits. - for (; i != m_length; ++i) - if (!Unicode::isDigit(m_data[i])) - break; - - return DeprecatedConstString(reinterpret_cast<const DeprecatedChar*>(m_data), i).string().toInt(ok); +unsigned StringImpl::toUIntStrict(bool* ok, int base) +{ + return charactersToUIntStrict(m_data, m_length, ok, base); } -int64_t StringImpl::toInt64(bool* ok) +int64_t StringImpl::toInt64Strict(bool* ok, int base) { - unsigned i = 0; + return charactersToInt64Strict(m_data, m_length, ok, base); +} - // Allow leading spaces. - for (; i != m_length; ++i) - if (!isSpaceOrNewline(m_data[i])) - break; - - // Allow sign. - if (i != m_length && (m_data[i] == '+' || m_data[i] == '-')) - ++i; - - // Allow digits. - for (; i != m_length; ++i) - if (!Unicode::isDigit(m_data[i])) - break; - - return DeprecatedConstString(reinterpret_cast<const DeprecatedChar*>(m_data), i).string().toInt64(ok); +uint64_t StringImpl::toUInt64Strict(bool* ok, int base) +{ + return charactersToUInt64Strict(m_data, m_length, ok, base); } -uint64_t StringImpl::toUInt64(bool* ok) +int StringImpl::toInt(bool* ok) { - unsigned i = 0; + return charactersToInt(m_data, m_length, ok); +} - // Allow leading spaces. - for (; i != m_length; ++i) - if (!isSpaceOrNewline(m_data[i])) - break; +unsigned StringImpl::toUInt(bool* ok) +{ + return charactersToUInt(m_data, m_length, ok); +} - // Allow digits. - for (; i != m_length; ++i) - if (!Unicode::isDigit(m_data[i])) - break; - - return DeprecatedConstString(reinterpret_cast<const DeprecatedChar*>(m_data), i).string().toUInt64(ok); +int64_t StringImpl::toInt64(bool* ok) +{ + return charactersToInt64(m_data, m_length, ok); +} + +uint64_t StringImpl::toUInt64(bool* ok) +{ + return charactersToUInt64(m_data, m_length, ok); } double StringImpl::toDouble(bool* ok) { - if (!m_length) { - if (ok) - *ok = false; - return 0; - } - char *end; - CString latin1String = Latin1Encoding().encode(characters(), length()); - double val = kjs_strtod(latin1String.data(), &end); - if (ok) - *ok = end == 0 || *end == '\0'; - return val; + return charactersToDouble(m_data, m_length, ok); } float StringImpl::toFloat(bool* ok) { - // FIXME: This will return ok even when the string fits into a double but not a float. - return narrowPrecisionToFloat(toDouble(ok)); + return charactersToFloat(m_data, m_length, ok); } static bool equal(const UChar* a, const char* b, int length) @@ -657,15 +507,7 @@ int StringImpl::find(const char* chs, int index, bool caseSensitive) int StringImpl::find(UChar c, int start) { - unsigned index = start; - if (index >= m_length ) - return -1; - while(index < m_length) { - if (m_data[index] == c) - return index; - index++; - } - return -1; + return WebCore::find(m_data, m_length, c, start); } int StringImpl::find(StringImpl* str, int index, bool caseSensitive) @@ -726,18 +568,7 @@ int StringImpl::find(StringImpl* str, int index, bool caseSensitive) int StringImpl::reverseFind(UChar c, int index) { - if (index >= (int)m_length || m_length == 0) - return -1; - - if (index < 0) - index += m_length; - while (1) { - if (m_data[index] == c) - return index; - if (index == 0) - return -1; - index--; - } + return WebCore::reverseFind(m_data, m_length, c, index); } int StringImpl::reverseFind(StringImpl* str, int index, bool caseSensitive) @@ -1032,20 +863,28 @@ PassRefPtr<StringImpl> StringImpl::createStrippingNullCharacters(const UChar* ch ASSERT(characters); ASSERT(length); - StringBuffer strippedCopy(length); + // Optimize for the case where there are no Null characters by quickly + // searching for nulls, and then using StringImpl::create, which will + // memcpy the whole buffer. This is faster than assigning character by + // character during the loop. + + // Fast case. int foundNull = 0; - for (unsigned i = 0; i < length; i++) { + for (unsigned i = 0; !foundNull && i < length; i++) { int c = characters[i]; // more efficient than using UChar here (at least on Intel Mac OS) - strippedCopy[i] = c; - foundNull |= ~c; + foundNull |= !c; } if (!foundNull) - return adoptRef(new StringImpl(strippedCopy.release(), length, AdoptBuffer())); + return StringImpl::create(characters, length); + + // Slow case. + StringBuffer strippedCopy(length); unsigned strippedLength = 0; for (unsigned i = 0; i < length; i++) { if (int c = characters[i]) strippedCopy[strippedLength++] = c; } + ASSERT(strippedLength < length); // Only take the slow case when stripping. strippedCopy.shrink(strippedLength); return adopt(strippedCopy); } diff --git a/WebCore/platform/text/StringImpl.h b/WebCore/platform/text/StringImpl.h index dd50b2e..57f64c8 100644 --- a/WebCore/platform/text/StringImpl.h +++ b/WebCore/platform/text/StringImpl.h @@ -22,7 +22,6 @@ #ifndef StringImpl_h #define StringImpl_h -#include <kjs/identifier.h> #include <limits.h> #include <wtf/ASCIICType.h> #include <wtf/Forward.h> @@ -30,7 +29,7 @@ #include <wtf/Vector.h> #include <wtf/unicode/Unicode.h> -#if PLATFORM(CF) +#if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN)) typedef const struct __CFString * CFStringRef; #endif @@ -44,11 +43,15 @@ class AtomicString; class StringBuffer; struct CStringTranslator; -struct Length; +struct HashAndCharactersTranslator; struct StringHash; struct UCharBufferTranslator; class StringImpl : public RefCounted<StringImpl> { + friend class AtomicString; + friend struct CStringTranslator; + friend struct HashAndCharactersTranslator; + friend struct UCharBufferTranslator; private: StringImpl(); StringImpl(const UChar*, unsigned length); @@ -83,6 +86,7 @@ public: bool hasTerminatingNullCharacter() { return m_hasTerminatingNullCharacter; } unsigned hash() { if (m_hash == 0) m_hash = computeHash(m_data, m_length); return m_hash; } + unsigned existingHash() const { ASSERT(m_hash); return m_hash; } static unsigned computeHash(const UChar*, unsigned len); static unsigned computeHash(const char*); @@ -95,18 +99,21 @@ public: UChar operator[](unsigned i) { ASSERT(i < m_length); return m_data[i]; } UChar32 characterStartingAt(unsigned); - Length toLength(); - bool containsOnlyWhitespace(); - int toInt(bool* ok = 0); // ignores trailing garbage, unlike DeprecatedString - int64_t toInt64(bool* ok = 0); // ignores trailing garbage, unlike DeprecatedString - uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage, unlike DeprecatedString + int toIntStrict(bool* ok = 0, int base = 10); + unsigned toUIntStrict(bool* ok = 0, int base = 10); + int64_t toInt64Strict(bool* ok = 0, int base = 10); + uint64_t toUInt64Strict(bool* ok = 0, int base = 10); + + int toInt(bool* ok = 0); // ignores trailing garbage + unsigned toUInt(bool* ok = 0); // ignores trailing garbage + int64_t toInt64(bool* ok = 0); // ignores trailing garbage + uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage + double toDouble(bool* ok = 0); float toFloat(bool* ok = 0); - Length* toCoordsArray(int& len); - Length* toLengthArray(int& len); bool isLower(); PassRefPtr<StringImpl> lower(); PassRefPtr<StringImpl> upper(); @@ -138,7 +145,7 @@ public: WTF::Unicode::Direction defaultWritingDirection(); -#if PLATFORM(CF) +#if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN)) CFStringRef createCFString(); #endif #ifdef __OBJC__ @@ -146,10 +153,6 @@ public: #endif private: - friend class AtomicString; - friend struct UCharBufferTranslator; - friend struct CStringTranslator; - unsigned m_length; const UChar* m_data; mutable unsigned m_hash; diff --git a/WebCore/platform/text/TextBreakIteratorICU.cpp b/WebCore/platform/text/TextBreakIteratorICU.cpp index 9fd2d0b..9941f58 100644 --- a/WebCore/platform/text/TextBreakIteratorICU.cpp +++ b/WebCore/platform/text/TextBreakIteratorICU.cpp @@ -25,6 +25,7 @@ #include "TextBreakIteratorInternalICU.h" #include <unicode/ubrk.h> +#include <wtf/Assertions.h> namespace WebCore { @@ -38,6 +39,7 @@ static TextBreakIterator* setUpIterator(bool& createdIterator, TextBreakIterator UErrorCode openStatus = U_ZERO_ERROR; iterator = static_cast<TextBreakIterator*>(ubrk_open(type, currentTextBreakLocaleID(), 0, 0, &openStatus)); createdIterator = true; + ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus); } if (!iterator) return 0; diff --git a/WebCore/platform/text/TextCodec.cpp b/WebCore/platform/text/TextCodec.cpp index 1985c49..4222ee1 100644 --- a/WebCore/platform/text/TextCodec.cpp +++ b/WebCore/platform/text/TextCodec.cpp @@ -28,29 +28,31 @@ #include "TextCodec.h" #include "PlatformString.h" +#include <wtf/StringExtras.h> namespace WebCore { -const UChar BOM = 0xFEFF; - TextCodec::~TextCodec() { } -// We strip BOM characters because they can show up both at the start of content -// and inside content, and we never want them to end up in the decoded text. -void TextCodec::appendOmittingBOM(Vector<UChar>& v, const UChar* characters, size_t length) +int TextCodec::getUnencodableReplacement(unsigned codePoint, UnencodableHandling handling, UnencodableReplacementArray replacement) { - size_t start = 0; - for (size_t i = 0; i != length; ++i) { - if (BOM == characters[i]) { - if (start != i) - v.append(&characters[start], i - start); - start = i + 1; - } + switch (handling) { + case QuestionMarksForUnencodables: + replacement[0] = '?'; + replacement[1] = 0; + return 1; + case EntitiesForUnencodables: + snprintf(replacement, sizeof(UnencodableReplacementArray), "&#%u;", codePoint); + return static_cast<int>(strlen(replacement)); + case URLEncodedEntitiesForUnencodables: + snprintf(replacement, sizeof(UnencodableReplacementArray), "%%26%%23%u%%3B", codePoint); + return static_cast<int>(strlen(replacement)); } - if (start != length) - v.append(&characters[start], length - start); + ASSERT_NOT_REACHED(); + replacement[0] = 0; + return 0; } } // namespace WebCore diff --git a/WebCore/platform/text/TextCodec.h b/WebCore/platform/text/TextCodec.h index 77ffcf4..0a56262 100644 --- a/WebCore/platform/text/TextCodec.h +++ b/WebCore/platform/text/TextCodec.h @@ -32,21 +32,46 @@ #include <wtf/Vector.h> #include <wtf/unicode/Unicode.h> -namespace WebCore { +#include "PlatformString.h" - class CString; - class String; +namespace WebCore { class TextEncoding; + // Specifies what will happen when a character is encountered that is + // not encodable in the character set. + enum UnencodableHandling { + // Substitutes the replacement character "?". + QuestionMarksForUnencodables, + + // Encodes the character as an XML entity. For example, U+06DE + // would be "۞" (0x6DE = 1758 in octal). + EntitiesForUnencodables, + + // Encodes the character as en entity as above, but escaped + // non-alphanumeric characters. This is used in URLs. + // For example, U+6DE would be "%26%231758%3B". + URLEncodedEntitiesForUnencodables, + }; + + typedef char UnencodableReplacementArray[32]; + class TextCodec : Noncopyable { public: virtual ~TextCodec(); - virtual String decode(const char*, size_t length, bool flush = false) = 0; - virtual CString encode(const UChar*, size_t length, bool allowEntities = false) = 0; + String decode(const char* str, size_t length, bool flush = false) + { + bool ignored; + return decode(str, length, flush, false, ignored); + } + + virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError) = 0; + virtual CString encode(const UChar*, size_t length, UnencodableHandling) = 0; - protected: - static void appendOmittingBOM(Vector<UChar>&, const UChar*, size_t length); + // Fills a null-terminated string representation of the given + // unencodable character into the given replacement buffer. + // The length of the string (not including the null) will be returned. + static int getUnencodableReplacement(unsigned codePoint, UnencodableHandling, UnencodableReplacementArray); }; typedef void (*EncodingNameRegistrar)(const char* alias, const char* name); diff --git a/WebCore/platform/text/TextCodecICU.cpp b/WebCore/platform/text/TextCodecICU.cpp index a89a74e..0a324a2 100644 --- a/WebCore/platform/text/TextCodecICU.cpp +++ b/WebCore/platform/text/TextCodecICU.cpp @@ -33,7 +33,7 @@ #include <unicode/ucnv.h> #include <unicode/ucnv_cb.h> #include <wtf/Assertions.h> -#include <wtf/HashMap.h> +#include <wtf/StringExtras.h> using std::auto_ptr; using std::min; @@ -41,7 +41,7 @@ using std::min; namespace WebCore { const size_t ConversionBufferSize = 16384; - + static UConverter* cachedConverterICU; static auto_ptr<TextCodec> newTextCodecICU(const TextEncoding& encoding, const void*) @@ -60,8 +60,7 @@ void TextCodecICU::registerBaseCodecs(TextCodecRegistrar registrar) } // FIXME: Registering all the encodings we get from ucnv_getAvailableName -// includes encodings we don't want or need. For example: UTF16_PlatformEndian, -// UTF16_OppositeEndian, UTF32_PlatformEndian, UTF32_OppositeEndian, and all +// includes encodings we don't want or need. For example, all // the encodings with commas and version numbers. void TextCodecICU::registerExtendedEncodingNames(EncodingNameRegistrar registrar) @@ -69,27 +68,41 @@ void TextCodecICU::registerExtendedEncodingNames(EncodingNameRegistrar registrar // We register Hebrew with logical ordering using a separate name. // Otherwise, this would share the same canonical name as the // visual ordering case, and then TextEncoding could not tell them - // apart; ICU works with either name. + // apart; ICU treats these names as synonyms. registrar("ISO-8859-8-I", "ISO-8859-8-I"); int32_t numEncodings = ucnv_countAvailable(); for (int32_t i = 0; i < numEncodings; ++i) { const char* name = ucnv_getAvailableName(i); UErrorCode error = U_ZERO_ERROR; - // FIXME: Should we use the "MIME" standard instead of "IANA"? - const char* standardName = ucnv_getStandardName(name, "IANA", &error); - if (!U_SUCCESS(error) || !standardName) - continue; + // Try MIME before trying IANA to pick up commonly used names like + // 'EUC-JP' instead of horrendeously long names like + // 'Extended_UNIX_Code_Packed_Format_for_Japanese'. + const char* standardName = ucnv_getStandardName(name, "MIME", &error); + if (!U_SUCCESS(error) || !standardName) { + error = U_ZERO_ERROR; + // Try IANA to pick up 'windows-12xx' and other names + // which are not preferred MIME names but are widely used. + standardName = ucnv_getStandardName(name, "IANA", &error); + if (!U_SUCCESS(error) || !standardName) + continue; + } // 1. Treat GB2312 encoding as GBK (its more modern superset), to match other browsers. // 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides a native encoding // for encoding GB_2312-80 and several others. So, we need to override this behavior, too. if (strcmp(standardName, "GB2312") == 0 || strcmp(standardName, "GB_2312-80") == 0) standardName = "GBK"; -#ifndef ANDROID - else -#endif - registrar(standardName, standardName); + // Similarly, EUC-KR encodings all map to an extended version. + else if (strcmp(standardName, "KSC_5601") == 0 || strcmp(standardName, "EUC-KR") == 0 || strcmp(standardName, "cp1363") == 0) + standardName = "windows-949-2000"; + // And so on. + else if (strcasecmp(standardName, "iso-8859-9") == 0) // This name is returned in different case by ICU 3.2 and 3.6. + standardName = "windows-1254"; + else if (strcmp(standardName, "TIS-620") == 0) + standardName = "windows-874-2000"; + + registrar(standardName, standardName); uint16_t numAliases = ucnv_countAliases(name, &error); ASSERT(U_SUCCESS(error)); @@ -104,21 +117,25 @@ void TextCodecICU::registerExtendedEncodingNames(EncodingNameRegistrar registrar } // Additional aliases. - // Perhaps we can get these added to ICU. + // These are present in modern versions of ICU, but not in ICU 3.2 (shipped with Mac OS X 10.4). registrar("macroman", "macintosh"); - registrar("xmacroman", "macintosh"); +#ifndef ANDROID // Android does not have x-mac-cyrillic in its ICU library + registrar("maccyrillic", "x-mac-cyrillic"); +#endif // Additional aliases that historically were present in the encoding // table in WebKit on Macintosh that don't seem to be present in ICU. // Perhaps we can prove these are not used on the web and remove them. // Or perhaps we can get them added to ICU. + registrar("xmacroman", "macintosh"); +#ifndef ANDROID // Android does not have x-mac-cyrillic in its ICU library + registrar("xmacukrainian", "x-mac-cyrillic"); +#endif registrar("cnbig5", "Big5"); registrar("cngb", "EUC-CN"); registrar("csISO88598I", "ISO_8859-8-I"); registrar("csgb231280", "EUC-CN"); - registrar("dos720", "cp864"); registrar("dos874", "cp874"); - registrar("jis7", "ISO-2022-JP"); registrar("koi", "KOI8-R"); registrar("logical", "ISO-8859-8-I"); registrar("unicode11utf8", "UTF-8"); @@ -127,7 +144,7 @@ void TextCodecICU::registerExtendedEncodingNames(EncodingNameRegistrar registrar registrar("winarabic", "windows-1256"); registrar("winbaltic", "windows-1257"); registrar("wincyrillic", "windows-1251"); - registrar("windows874", "cp874"); + registrar("iso885911", "windows874-2000"); registrar("wingreek", "windows-1253"); registrar("winhebrew", "windows-1255"); registrar("winlatin2", "windows-1250"); @@ -139,7 +156,17 @@ void TextCodecICU::registerExtendedEncodingNames(EncodingNameRegistrar registrar registrar("xeuccn", "EUC-CN"); registrar("xgbk", "EUC-CN"); registrar("xunicode20utf8", "UTF-8"); + registrar("xwindows949", "windows-949-2000"); registrar("xxbig5", "Big5"); + + // This alias is present in modern versions of ICU, but it has no standard name, + // so we give one to it manually. It is not present in ICU 3.2. + registrar("windows874", "windows874-2000"); + + // These aliases are present in modern versions of ICU, but use different codecs, and have no standard names. + // They are not present in ICU 3.2. + registrar("dos720", "cp864"); + registrar("jis7", "ISO-2022-JP"); } void TextCodecICU::registerExtendedCodecs(TextCodecRegistrar registrar) @@ -151,10 +178,13 @@ void TextCodecICU::registerExtendedCodecs(TextCodecRegistrar registrar) for (int32_t i = 0; i < numEncodings; ++i) { const char* name = ucnv_getAvailableName(i); UErrorCode error = U_ZERO_ERROR; - // FIXME: Should we use the "MIME" standard instead of "IANA"? - const char* standardName = ucnv_getStandardName(name, "IANA", &error); - if (!U_SUCCESS(error) || !standardName) - continue; + const char* standardName = ucnv_getStandardName(name, "MIME", &error); + if (!U_SUCCESS(error) || !standardName) { + error = U_ZERO_ERROR; + standardName = ucnv_getStandardName(name, "IANA", &error); + if (!U_SUCCESS(error) || !standardName) + continue; + } registrar(standardName, newTextCodecICU, 0); } } @@ -211,7 +241,50 @@ void TextCodecICU::createICUConverter() const ucnv_setFallback(m_converterICU, TRUE); } -String TextCodecICU::decode(const char* bytes, size_t length, bool flush) +int TextCodecICU::decodeToBuffer(UChar* target, UChar* targetLimit, const char*& source, const char* sourceLimit, int32_t* offsets, bool flush, UErrorCode& err) +{ + UChar* targetStart = target; + err = U_ZERO_ERROR; + ucnv_toUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, offsets, flush, &err); + return target - targetStart; +} + +class ErrorCallbackSetter { +public: + ErrorCallbackSetter(UConverter* converter, bool stopOnError) + : m_converter(converter) + , m_shouldStopOnEncodingErrors(stopOnError) + { + if (m_shouldStopOnEncodingErrors) { + UErrorCode err = U_ZERO_ERROR; + ucnv_setToUCallBack(m_converter, UCNV_TO_U_CALLBACK_SUBSTITUTE, + UCNV_SUB_STOP_ON_ILLEGAL, &m_savedAction, + &m_savedContext, &err); + ASSERT(err == U_ZERO_ERROR); + } + } + ~ErrorCallbackSetter() + { + if (m_shouldStopOnEncodingErrors) { + UErrorCode err = U_ZERO_ERROR; + const void* oldContext; + UConverterToUCallback oldAction; + ucnv_setToUCallBack(m_converter, m_savedAction, + m_savedContext, &oldAction, + &oldContext, &err); + ASSERT(oldAction == UCNV_TO_U_CALLBACK_SUBSTITUTE); + ASSERT(!strcmp(static_cast<const char*>(oldContext), UCNV_SUB_STOP_ON_ILLEGAL)); + ASSERT(err == U_ZERO_ERROR); + } + } +private: + UConverter* m_converter; + bool m_shouldStopOnEncodingErrors; + const void* m_savedContext; + UConverterToUCallback m_savedAction; +}; + +String TextCodecICU::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError) { // Get a converter for the passed-in encoding. if (!m_converterICU) { @@ -222,34 +295,29 @@ String TextCodecICU::decode(const char* bytes, size_t length, bool flush) return String(); } } + + ErrorCallbackSetter callbackSetter(m_converterICU, stopOnError); Vector<UChar> result; UChar buffer[ConversionBufferSize]; + UChar* bufferLimit = buffer + ConversionBufferSize; const char* source = reinterpret_cast<const char*>(bytes); const char* sourceLimit = source + length; int32_t* offsets = NULL; - UErrorCode err; + UErrorCode err = U_ZERO_ERROR; do { - UChar* target = buffer; - const UChar* targetLimit = target + ConversionBufferSize; - err = U_ZERO_ERROR; - ucnv_toUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, offsets, flush, &err); - int count = target - buffer; - appendOmittingBOM(result, reinterpret_cast<const UChar*>(buffer), count); + int ucharsDecoded = decodeToBuffer(buffer, bufferLimit, source, sourceLimit, offsets, flush, err); + result.append(buffer, ucharsDecoded); } while (err == U_BUFFER_OVERFLOW_ERROR); if (U_FAILURE(err)) { // flush the converter so it can be reused, and not be bothered by this error. do { - UChar *target = buffer; - const UChar *targetLimit = target + ConversionBufferSize; - err = U_ZERO_ERROR; - ucnv_toUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, offsets, true, &err); + decodeToBuffer(buffer, bufferLimit, source, sourceLimit, offsets, true, err); } while (source < sourceLimit); - LOG_ERROR("ICU conversion error"); - return String(); + sawError = true; } String resultString = String::adopt(result); @@ -265,23 +333,43 @@ String TextCodecICU::decode(const char* bytes, size_t length, bool flush) // We need to apply these fallbacks ourselves as they are not currently supported by ICU and // they were provided by the old TEC encoding path // Needed to fix <rdar://problem/4708689> -static HashMap<UChar32, UChar>& gbkEscapes() { - static HashMap<UChar32, UChar> escapes; - if (escapes.isEmpty()) { - escapes.add(0x01F9, 0xE7C8); - escapes.add(0x1E3F, 0xE7C7); - escapes.add(0x22EF, 0x2026); - escapes.add(0x301C, 0xFF5E); +static UChar getGbkEscape(UChar32 codePoint) +{ + switch (codePoint) { + case 0x01F9: + return 0xE7C8; + case 0x1E3F: + return 0xE7C7; + case 0x22EF: + return 0x2026; + case 0x301C: + return 0xFF5E; + default: + return 0; } - - return escapes; } +// Invalid character handler when writing escaped entities for unrepresentable +// characters. See the declaration of TextCodec::encode for more. +static void urlEscapedEntityCallback(const void* context, UConverterFromUnicodeArgs* fromUArgs, const UChar* codeUnits, int32_t length, + UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err) +{ + if (reason == UCNV_UNASSIGNED) { + *err = U_ZERO_ERROR; + + UnencodableReplacementArray entity; + int entityLen = TextCodec::getUnencodableReplacement(codePoint, URLEncodedEntitiesForUnencodables, entity); + ucnv_cbFromUWriteBytes(fromUArgs, entity, entityLen, 0, err); + } else + UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint, reason, err); +} + +// Substitutes special GBK characters, escaping all other unassigned entities. static void gbkCallbackEscape(const void* context, UConverterFromUnicodeArgs* fromUArgs, const UChar* codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err) { - if (codePoint && gbkEscapes().contains(codePoint)) { - UChar outChar = gbkEscapes().get(codePoint); + UChar outChar; + if (reason == UCNV_UNASSIGNED && (outChar = getGbkEscape(codePoint))) { const UChar* source = &outChar; *err = U_ZERO_ERROR; ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err); @@ -290,11 +378,28 @@ static void gbkCallbackEscape(const void* context, UConverterFromUnicodeArgs* fr UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint, reason, err); } +// Combines both gbkUrlEscapedEntityCallback and GBK character substitution. +static void gbkUrlEscapedEntityCallack(const void* context, UConverterFromUnicodeArgs* fromUArgs, const UChar* codeUnits, int32_t length, + UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err) +{ + if (reason == UCNV_UNASSIGNED) { + if (UChar outChar = getGbkEscape(codePoint)) { + const UChar* source = &outChar; + *err = U_ZERO_ERROR; + ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err); + return; + } + urlEscapedEntityCallback(context, fromUArgs, codeUnits, length, codePoint, reason, err); + return; + } + UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint, reason, err); +} + static void gbkCallbackSubstitute(const void* context, UConverterFromUnicodeArgs* fromUArgs, const UChar* codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err) { - if (gbkEscapes().contains(codePoint)) { - UChar outChar = gbkEscapes().get(codePoint); + UChar outChar; + if (reason == UCNV_UNASSIGNED && (outChar = getGbkEscape(codePoint))) { const UChar* source = &outChar; *err = U_ZERO_ERROR; ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err); @@ -303,7 +408,7 @@ static void gbkCallbackSubstitute(const void* context, UConverterFromUnicodeArgs UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codePoint, reason, err); } -CString TextCodecICU::encode(const UChar* characters, size_t length, bool allowEntities) +CString TextCodecICU::encode(const UChar* characters, size_t length, UnencodableHandling handling) { if (!length) return ""; @@ -321,14 +426,20 @@ CString TextCodecICU::encode(const UChar* characters, size_t length, bool allowE const UChar* source = copy.characters(); const UChar* sourceLimit = source + copy.length(); - + UErrorCode err = U_ZERO_ERROR; - if (allowEntities) - ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackEscape : UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err); - else { - ucnv_setSubstChars(m_converterICU, "?", 1, &err); - ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackSubstitute : UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err); + switch (handling) { + case QuestionMarksForUnencodables: + ucnv_setSubstChars(m_converterICU, "?", 1, &err); + ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackSubstitute : UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err); + break; + case EntitiesForUnencodables: + ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackEscape : UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err); + break; + case URLEncodedEntitiesForUnencodables: + ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkUrlEscapedEntityCallack : urlEscapedEntityCallback, 0, 0, 0, &err); + break; } ASSERT(U_SUCCESS(err)); diff --git a/WebCore/platform/text/TextCodecICU.h b/WebCore/platform/text/TextCodecICU.h index c2a30b1..9c9a4a7b 100644 --- a/WebCore/platform/text/TextCodecICU.h +++ b/WebCore/platform/text/TextCodecICU.h @@ -45,18 +45,21 @@ namespace WebCore { TextCodecICU(const TextEncoding&); virtual ~TextCodecICU(); - virtual String decode(const char*, size_t length, bool flush = false); - virtual CString encode(const UChar*, size_t length, bool allowEntities = false); + virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError); + virtual CString encode(const UChar*, size_t length, UnencodableHandling); private: void createICUConverter() const; void releaseICUConverter() const; bool needsGBKFallbacks() const { return m_needsGBKFallbacks; } - void setNeedsGBKFallbacks(bool needsFallbacks) { m_needsGBKFallbacks = needsFallbacks; } + void setNeedsGBKFallbacks(bool needsFallbacks) { m_needsGBKFallbacks = needsFallbacks; } + + int decodeToBuffer(UChar* buffer, UChar* bufferLimit, const char*& source, + const char* sourceLimit, int32_t* offsets, bool flush, UErrorCode& err); TextEncoding m_encoding; unsigned m_numBufferedBytes; - unsigned char m_bufferedBytes[16]; // bigger than any single multi-byte character + unsigned char m_bufferedBytes[16]; // bigger than any single multi-byte character mutable UConverter* m_converterICU; mutable bool m_needsGBKFallbacks; }; diff --git a/WebCore/platform/text/TextCodecLatin1.cpp b/WebCore/platform/text/TextCodecLatin1.cpp index a687235..50f9f97 100644 --- a/WebCore/platform/text/TextCodecLatin1.cpp +++ b/WebCore/platform/text/TextCodecLatin1.cpp @@ -29,6 +29,7 @@ #include "CString.h" #include "PlatformString.h" #include "StringBuffer.h" +#include <stdio.h> using std::auto_ptr; @@ -117,7 +118,7 @@ void TextCodecLatin1::registerCodecs(TextCodecRegistrar registrar) registrar("US-ASCII", newStreamingTextDecoderWindowsLatin1, 0); } -String TextCodecLatin1::decode(const char* bytes, size_t length, bool) +String TextCodecLatin1::decode(const char* bytes, size_t length, bool, bool, bool&) { StringBuffer characters(length); @@ -141,7 +142,7 @@ String TextCodecLatin1::decode(const char* bytes, size_t length, bool) return String::adopt(characters); } -static CString encodeComplexWindowsLatin1(const UChar* characters, size_t length, bool allowEntities) +static CString encodeComplexWindowsLatin1(const UChar* characters, size_t length, UnencodableHandling handling) { Vector<char> result(length); char* bytes = result.data(); @@ -158,17 +159,13 @@ static CString encodeComplexWindowsLatin1(const UChar* characters, size_t length if (table[b] == c) goto gotByte; // No way to encode this character with Windows Latin-1. - if (allowEntities) { - char entityBuffer[16]; - sprintf(entityBuffer, "&#%u;", c); - size_t entityLength = strlen(entityBuffer); - result.grow(resultLength + entityLength + length - i); - bytes = result.data(); - memcpy(bytes + resultLength, entityBuffer, entityLength); - resultLength += entityLength; - continue; - } - b = '?'; + UnencodableReplacementArray replacement; + int replacementLength = TextCodec::getUnencodableReplacement(c, handling, replacement); + result.grow(resultLength + replacementLength + length - i); + bytes = result.data(); + memcpy(bytes + resultLength, replacement, replacementLength); + resultLength += replacementLength; + continue; } gotByte: bytes[resultLength++] = b; @@ -177,7 +174,7 @@ static CString encodeComplexWindowsLatin1(const UChar* characters, size_t length return CString(bytes, resultLength); } -CString TextCodecLatin1::encode(const UChar* characters, size_t length, bool allowEntities) +CString TextCodecLatin1::encode(const UChar* characters, size_t length, UnencodableHandling handling) { { char* bytes; @@ -196,7 +193,7 @@ CString TextCodecLatin1::encode(const UChar* characters, size_t length, bool all } // If it wasn't all ASCII, call the function that handles more-complex cases. - return encodeComplexWindowsLatin1(characters, length, allowEntities); + return encodeComplexWindowsLatin1(characters, length, handling); } } // namespace WebCore diff --git a/WebCore/platform/text/TextCodecLatin1.h b/WebCore/platform/text/TextCodecLatin1.h index 46d6e66..f035d01 100644 --- a/WebCore/platform/text/TextCodecLatin1.h +++ b/WebCore/platform/text/TextCodecLatin1.h @@ -35,8 +35,8 @@ namespace WebCore { static void registerEncodingNames(EncodingNameRegistrar); static void registerCodecs(TextCodecRegistrar); - virtual String decode(const char*, size_t length, bool flush = false); - virtual CString encode(const UChar*, size_t length, bool allowEntities = false); + virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError); + virtual CString encode(const UChar*, size_t length, UnencodableHandling); }; } // namespace WebCore diff --git a/WebCore/platform/text/TextCodecUTF16.cpp b/WebCore/platform/text/TextCodecUTF16.cpp index 9ecd2a9..88e4e73 100644 --- a/WebCore/platform/text/TextCodecUTF16.cpp +++ b/WebCore/platform/text/TextCodecUTF16.cpp @@ -34,8 +34,6 @@ using std::auto_ptr; namespace WebCore { -const UChar BOM = 0xFEFF; - void TextCodecUTF16::registerEncodingNames(EncodingNameRegistrar registrar) { registrar("UTF-16LE", "UTF-16LE"); @@ -67,7 +65,7 @@ void TextCodecUTF16::registerCodecs(TextCodecRegistrar registrar) registrar("UTF-16BE", newStreamingTextDecoderUTF16BE, 0); } -String TextCodecUTF16::decode(const char* bytes, size_t length, bool) +String TextCodecUTF16::decode(const char* bytes, size_t length, bool, bool stopOnError, bool& sawError) { if (!length) return String(); @@ -85,8 +83,7 @@ String TextCodecUTF16::decode(const char* bytes, size_t length, bool) c = m_bufferedByte | (p[0] << 8); else c = (m_bufferedByte << 8) | p[0]; - if (c != BOM) - *q++ = c; + *q++ = c; m_haveBufferedByte = false; p += 1; numChars -= 1; @@ -96,15 +93,13 @@ String TextCodecUTF16::decode(const char* bytes, size_t length, bool) for (size_t i = 0; i < numChars; ++i) { UChar c = p[0] | (p[1] << 8); p += 2; - if (c != BOM) - *q++ = c; + *q++ = c; } else for (size_t i = 0; i < numChars; ++i) { UChar c = (p[0] << 8) | p[1]; p += 2; - if (c != BOM) - *q++ = c; + *q++ = c; } if (numBytes & 1) { @@ -118,7 +113,7 @@ String TextCodecUTF16::decode(const char* bytes, size_t length, bool) return String::adopt(buffer); } -CString TextCodecUTF16::encode(const UChar* characters, size_t length, bool) +CString TextCodecUTF16::encode(const UChar* characters, size_t length, UnencodableHandling) { char* bytes; CString string = CString::newUninitialized(length * 2, bytes); diff --git a/WebCore/platform/text/TextCodecUTF16.h b/WebCore/platform/text/TextCodecUTF16.h index 2bde221..8ce9476 100644 --- a/WebCore/platform/text/TextCodecUTF16.h +++ b/WebCore/platform/text/TextCodecUTF16.h @@ -37,8 +37,8 @@ namespace WebCore { TextCodecUTF16(bool littleEndian) : m_littleEndian(littleEndian), m_haveBufferedByte(false) { } - virtual String decode(const char*, size_t length, bool flush = false); - virtual CString encode(const UChar*, size_t length, bool allowEntities = false); + virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError); + virtual CString encode(const UChar*, size_t length, UnencodableHandling); private: bool m_littleEndian; diff --git a/WebCore/platform/text/TextCodecUserDefined.cpp b/WebCore/platform/text/TextCodecUserDefined.cpp index a420992..2dae0f3 100644 --- a/WebCore/platform/text/TextCodecUserDefined.cpp +++ b/WebCore/platform/text/TextCodecUserDefined.cpp @@ -29,6 +29,7 @@ #include "CString.h" #include "PlatformString.h" #include "StringBuffer.h" +#include <stdio.h> using std::auto_ptr; @@ -49,7 +50,7 @@ void TextCodecUserDefined::registerCodecs(TextCodecRegistrar registrar) registrar("x-user-defined", newStreamingTextDecoderUserDefined, 0); } -String TextCodecUserDefined::decode(const char* bytes, size_t length, bool) +String TextCodecUserDefined::decode(const char* bytes, size_t length, bool, bool, bool&) { StringBuffer buffer(length); @@ -61,7 +62,7 @@ String TextCodecUserDefined::decode(const char* bytes, size_t length, bool) return String::adopt(buffer); } -static CString encodeComplexUserDefined(const UChar* characters, size_t length, bool allowEntities) +static CString encodeComplexUserDefined(const UChar* characters, size_t length, UnencodableHandling handling) { Vector<char> result(length); char* bytes = result.data(); @@ -71,27 +72,23 @@ static CString encodeComplexUserDefined(const UChar* characters, size_t length, UChar32 c; U16_NEXT(characters, i, length, c); signed char signedByte = c; - if ((signedByte & 0xf7ff) == c) + if ((signedByte & 0xF7FF) == c) bytes[resultLength++] = signedByte; else { // No way to encode this character with x-user-defined. - if (allowEntities) { - char entityBuffer[16]; - sprintf(entityBuffer, "&#%u;", c); - size_t entityLength = strlen(entityBuffer); - result.grow(resultLength + entityLength + length - i); - bytes = result.data(); - memcpy(bytes + resultLength, entityBuffer, entityLength); - resultLength += entityLength; - } else - bytes[resultLength++] = '?'; + UnencodableReplacementArray replacement; + int replacementLength = TextCodec::getUnencodableReplacement(c, handling, replacement); + result.grow(resultLength + replacementLength + length - i); + bytes = result.data(); + memcpy(bytes + resultLength, replacement, replacementLength); + resultLength += replacementLength; } } return CString(bytes, resultLength); } -CString TextCodecUserDefined::encode(const UChar* characters, size_t length, bool allowEntities) +CString TextCodecUserDefined::encode(const UChar* characters, size_t length, UnencodableHandling handling) { char* bytes; CString string = CString::newUninitialized(length, bytes); @@ -108,7 +105,7 @@ CString TextCodecUserDefined::encode(const UChar* characters, size_t length, boo return string; // If it wasn't all ASCII, call the function that handles more-complex cases. - return encodeComplexUserDefined(characters, length, allowEntities); + return encodeComplexUserDefined(characters, length, handling); } } // namespace WebCore diff --git a/WebCore/platform/text/TextCodecUserDefined.h b/WebCore/platform/text/TextCodecUserDefined.h index 4fba907..d1b3160 100644 --- a/WebCore/platform/text/TextCodecUserDefined.h +++ b/WebCore/platform/text/TextCodecUserDefined.h @@ -35,8 +35,8 @@ namespace WebCore { static void registerEncodingNames(EncodingNameRegistrar); static void registerCodecs(TextCodecRegistrar); - virtual String decode(const char*, size_t length, bool flush = false); - virtual CString encode(const UChar*, size_t length, bool allowEntities = false); + virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError); + virtual CString encode(const UChar*, size_t length, UnencodableHandling); }; } // namespace WebCore diff --git a/WebCore/platform/text/TextDecoder.cpp b/WebCore/platform/text/TextDecoder.cpp index 8633e9f..e39a6b7 100644 --- a/WebCore/platform/text/TextDecoder.cpp +++ b/WebCore/platform/text/TextDecoder.cpp @@ -47,8 +47,10 @@ void TextDecoder::reset(const TextEncoding& encoding) m_numBufferedBytes = 0; } -String TextDecoder::checkForBOM(const char* data, size_t length, bool flush) +String TextDecoder::checkForBOM(const char* data, size_t length, bool flush, bool stopOnError, bool& sawError) { + ASSERT(!m_checkedForBOM); + // Check to see if we found a BOM. size_t numBufferedBytes = m_numBufferedBytes; size_t buf1Len = numBufferedBytes; @@ -62,22 +64,28 @@ String TextDecoder::checkForBOM(const char* data, size_t length, bool flush) const TextEncoding* encodingConsideringBOM = &m_encoding; bool foundBOM = true; + size_t lengthOfBOM = 0; if (c1 == 0xFF && c2 == 0xFE) { - if (c3 != 0 || c4 != 0) + if (c3 != 0 || c4 != 0) { encodingConsideringBOM = &UTF16LittleEndianEncoding(); - else if (numBufferedBytes + length > sizeof(m_bufferedBytes)) + lengthOfBOM = 2; + } else if (numBufferedBytes + length > sizeof(m_bufferedBytes)) { encodingConsideringBOM = &UTF32LittleEndianEncoding(); - else + lengthOfBOM = 4; + } else foundBOM = false; - } - else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) + } else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) { encodingConsideringBOM = &UTF8Encoding(); - else if (c1 == 0xFE && c2 == 0xFF) + lengthOfBOM = 3; + } else if (c1 == 0xFE && c2 == 0xFF) { encodingConsideringBOM = &UTF16BigEndianEncoding(); - else if (c1 == 0 && c2 == 0 && c3 == 0xFE && c4 == 0xFF) + lengthOfBOM = 2; + } else if (c1 == 0 && c2 == 0 && c3 == 0xFE && c4 == 0xFF) { encodingConsideringBOM = &UTF32BigEndianEncoding(); - else + lengthOfBOM = 4; + } else foundBOM = false; + if (!foundBOM && numBufferedBytes + length <= sizeof(m_bufferedBytes) && !flush) { // Continue to look for the BOM. memcpy(&m_bufferedBytes[numBufferedBytes], data, length); @@ -91,16 +99,31 @@ String TextDecoder::checkForBOM(const char* data, size_t length, bool flush) return String(); m_checkedForBOM = true; + // Skip the BOM. + if (foundBOM) { + ASSERT(numBufferedBytes < lengthOfBOM); + size_t numUnbufferedBOMBytes = lengthOfBOM - numBufferedBytes; + ASSERT(numUnbufferedBOMBytes <= length); + + data += numUnbufferedBOMBytes; + length -= numUnbufferedBOMBytes; + numBufferedBytes = 0; + m_numBufferedBytes = 0; + } + // Handle case where we have some buffered bytes to deal with. if (numBufferedBytes) { char bufferedBytes[sizeof(m_bufferedBytes)]; memcpy(bufferedBytes, m_bufferedBytes, numBufferedBytes); m_numBufferedBytes = 0; - return m_codec->decode(bufferedBytes, numBufferedBytes, false) - + m_codec->decode(data, length, flush); + + String bufferedResult = m_codec->decode(bufferedBytes, numBufferedBytes, false, stopOnError, sawError); + if (stopOnError && sawError) + return bufferedResult; + return bufferedResult + m_codec->decode(data, length, flush, stopOnError, sawError); } - return m_codec->decode(data, length, flush); + return m_codec->decode(data, length, flush, stopOnError, sawError); } } // namespace WebCore diff --git a/WebCore/platform/text/TextDecoder.h b/WebCore/platform/text/TextDecoder.h index 3892032..171cb59 100644 --- a/WebCore/platform/text/TextDecoder.h +++ b/WebCore/platform/text/TextDecoder.h @@ -41,15 +41,15 @@ namespace WebCore { void reset(const TextEncoding&); const TextEncoding& encoding() const { return m_encoding; }; - String decode(const char* data, size_t length, bool flush = false) + String decode(const char* data, size_t length, bool flush, bool stopOnError, bool& sawError) { if (!m_checkedForBOM) - return checkForBOM(data, length, flush); - return m_codec->decode(data, length, flush); + return checkForBOM(data, length, flush, stopOnError, sawError); + return m_codec->decode(data, length, flush, stopOnError, sawError); } private: - String checkForBOM(const char*, size_t length, bool flush); + String checkForBOM(const char*, size_t length, bool flush, bool stopOnError, bool& sawError); TextEncoding m_encoding; OwnPtr<TextCodec> m_codec; diff --git a/WebCore/platform/text/TextEncoding.cpp b/WebCore/platform/text/TextEncoding.cpp index c7676e9..9026049 100644 --- a/WebCore/platform/text/TextEncoding.cpp +++ b/WebCore/platform/text/TextEncoding.cpp @@ -59,15 +59,15 @@ TextEncoding::TextEncoding(const String& name) { } -String TextEncoding::decode(const char* data, size_t length) const +String TextEncoding::decode(const char* data, size_t length, bool stopOnError, bool& sawError) const { if (!m_name) return String(); - return TextDecoder(*this).decode(data, length, true); + return TextDecoder(*this).decode(data, length, true, stopOnError, sawError); } -CString TextEncoding::encode(const UChar* characters, size_t length, bool allowEntities) const +CString TextEncoding::encode(const UChar* characters, size_t length, UnencodableHandling handling) const { if (!m_name) return CString(); @@ -100,11 +100,11 @@ CString TextEncoding::encode(const UChar* characters, size_t length, bool allowE source = normalizedCharacters.data(); sourceLength = normalizedLength; } - return newTextCodec(*this)->encode(source, sourceLength, allowEntities); + return newTextCodec(*this)->encode(source, sourceLength, handling); #elif USE(QT4_UNICODE) QString str(reinterpret_cast<const QChar*>(characters), length); str = str.normalized(QString::NormalizationForm_C); - return newTextCodec(*this)->encode(reinterpret_cast<const UChar *>(str.utf16()), str.length(), allowEntities); + return newTextCodec(*this)->encode(reinterpret_cast<const UChar *>(str.utf16()), str.length(), handling); #endif } diff --git a/WebCore/platform/text/TextEncoding.h b/WebCore/platform/text/TextEncoding.h index 59d225c..0a0ab8c 100644 --- a/WebCore/platform/text/TextEncoding.h +++ b/WebCore/platform/text/TextEncoding.h @@ -26,6 +26,7 @@ #ifndef TextEncoding_h #define TextEncoding_h +#include "TextCodec.h" #include <wtf/unicode/Unicode.h> namespace WebCore { @@ -46,8 +47,13 @@ namespace WebCore { UChar backslashAsCurrencySymbol() const; const TextEncoding& closest8BitEquivalent() const; - String decode(const char*, size_t length) const; - CString encode(const UChar*, size_t length, bool allowEntities = false) const; + String decode(const char* str, size_t length) const + { + bool ignored; + return decode(str, length, false, ignored); + } + String decode(const char*, size_t length, bool stopOnError, bool& sawError) const; + CString encode(const UChar*, size_t length, UnencodableHandling) const; private: const char* m_name; diff --git a/WebCore/platform/text/TextEncodingRegistry.cpp b/WebCore/platform/text/TextEncodingRegistry.cpp index a7ad879..3f1f078 100644 --- a/WebCore/platform/text/TextEncodingRegistry.cpp +++ b/WebCore/platform/text/TextEncodingRegistry.cpp @@ -33,6 +33,7 @@ #include <wtf/ASCIICType.h> #include <wtf/Assertions.h> #include <wtf/HashMap.h> +#include <wtf/StringExtras.h> #if USE(ICU_UNICODE) #include "TextCodecICU.h" @@ -132,7 +133,7 @@ static void checkExistingName(const char* alias, const char* atomicName) // Keep the warning silent about one case where we know this will happen. if (strcmp(alias, "ISO-8859-8-I") == 0 && strcmp(oldAtomicName, "ISO-8859-8-I") == 0 - && strcmp(atomicName, "ISO_8859-8:1988") == 0) + && strcasecmp(atomicName, "iso-8859-8") == 0) return; LOG_ERROR("alias %s maps to %s already, but someone is trying to make it map to %s", alias, oldAtomicName, atomicName); diff --git a/WebCore/platform/text/TextStream.cpp b/WebCore/platform/text/TextStream.cpp index b23e769..5b7a0c7 100644 --- a/WebCore/platform/text/TextStream.cpp +++ b/WebCore/platform/text/TextStream.cpp @@ -26,145 +26,89 @@ #include "config.h" #include "TextStream.h" -#include "DeprecatedString.h" -#include "Logging.h" #include "PlatformString.h" -#include <wtf/Vector.h> +#include <wtf/StringExtras.h> namespace WebCore { -const size_t integerOrPointerAsStringBufferSize = 100; // large enough for any integer or pointer in string format, including trailing null character -const char* const precisionFormats[7] = { "%.0f", "%.1f", "%.2f", "%.3f", "%.4f", "%.5f", "%.6f"}; -const int maxPrecision = 6; // must match size of precisionFormats -const int defaultPrecision = 6; // matches qt and sprintf(.., "%f", ...) behaviour - -TextStream::TextStream(DeprecatedString* s) - : m_hasByteArray(false), m_string(s), m_precision(defaultPrecision) -{ -} - -TextStream& TextStream::operator<<(char c) -{ - if (m_hasByteArray) - m_byteArray.append(c); - - if (m_string) - m_string->append(DeprecatedChar(c)); - return *this; -} - -TextStream& TextStream::operator<<(short i) -{ - char buffer[integerOrPointerAsStringBufferSize]; - sprintf(buffer, "%d", i); - return *this << buffer; -} - -TextStream& TextStream::operator<<(unsigned short i) -{ - char buffer[integerOrPointerAsStringBufferSize]; - sprintf(buffer, "%u", i); - return *this << buffer; -} +static const size_t printBufferSize = 100; // large enough for any integer or floating point value in string format, including trailing null character TextStream& TextStream::operator<<(int i) { - char buffer[integerOrPointerAsStringBufferSize]; - sprintf(buffer, "%d", i); + char buffer[printBufferSize]; + snprintf(buffer, sizeof(buffer) - 1, "%d", i); return *this << buffer; } TextStream& TextStream::operator<<(unsigned i) { - char buffer[integerOrPointerAsStringBufferSize]; - sprintf(buffer, "%u", i); + char buffer[printBufferSize]; + snprintf(buffer, sizeof(buffer) - 1, "%u", i); return *this << buffer; } TextStream& TextStream::operator<<(long i) { - char buffer[integerOrPointerAsStringBufferSize]; - sprintf(buffer, "%ld", i); + char buffer[printBufferSize]; + snprintf(buffer, sizeof(buffer) - 1, "%ld", i); return *this << buffer; } TextStream& TextStream::operator<<(unsigned long i) { - char buffer[integerOrPointerAsStringBufferSize]; - sprintf(buffer, "%lu", i); + char buffer[printBufferSize]; + snprintf(buffer, sizeof(buffer) - 1, "%lu", i); return *this << buffer; } TextStream& TextStream::operator<<(float f) { - char buffer[integerOrPointerAsStringBufferSize]; - sprintf(buffer, precisionFormats[m_precision], f); + char buffer[printBufferSize]; + snprintf(buffer, sizeof(buffer) - 1, "%.2f", f); return *this << buffer; } TextStream& TextStream::operator<<(double d) { - char buffer[integerOrPointerAsStringBufferSize]; - sprintf(buffer, precisionFormats[m_precision], d); + char buffer[printBufferSize]; + snprintf(buffer, sizeof(buffer) - 1, "%.2f", d); return *this << buffer; } -TextStream& TextStream::operator<<(const char* s) +TextStream& TextStream::operator<<(const char* string) { - if (m_hasByteArray) { - unsigned length = strlen(s); - unsigned oldSize = m_byteArray.size(); - m_byteArray.grow(oldSize + length); - memcpy(m_byteArray.data() + oldSize, s, length); - } - if (m_string) - m_string->append(s); + size_t stringLength = strlen(string); + size_t textLength = m_text.size(); + m_text.grow(textLength + stringLength); + for (size_t i = 0; i < stringLength; ++i) + m_text[textLength + i] = string[i]; return *this; } -TextStream& TextStream::operator<<(const DeprecatedString& s) +TextStream& TextStream::operator<<(const String& string) { - if (m_hasByteArray) { - unsigned length = s.length(); - unsigned oldSize = m_byteArray.size(); - m_byteArray.grow(oldSize + length); - memcpy(m_byteArray.data() + oldSize, s.latin1(), length); - } - if (m_string) - m_string->append(s); + append(m_text, string); return *this; } -TextStream& TextStream::operator<<(const String& s) +String TextStream::release() { - return (*this) << s.deprecatedString(); + return String::adopt(m_text); } -TextStream& TextStream::operator<<(void* p) +#if PLATFORM(WIN_OS) && PLATFORM(X86_64) && COMPILER(MSVC) +TextStream& TextStream::operator<<(__int64 i) { - char buffer[integerOrPointerAsStringBufferSize]; - sprintf(buffer, "%p", p); + char buffer[printBufferSize]; + snprintf(buffer, sizeof(buffer) - 1, "%I64i", i); return *this << buffer; } - -TextStream& TextStream::operator<<(const TextStreamManipulator& m) -{ - return m(*this); -} - -int TextStream::precision(int p) -{ - int oldPrecision = m_precision; - - if (p >= 0 && p <= maxPrecision) - m_precision = p; - - return oldPrecision; -} - -TextStream &endl(TextStream& stream) +TextStream& TextStream::operator<<(unsigned __int64 i) { - return stream << '\n'; + char buffer[printBufferSize]; + snprintf(buffer, sizeof(buffer) - 1, "%I64u", i); + return *this << buffer; } +#endif } diff --git a/WebCore/platform/text/TextStream.h b/WebCore/platform/text/TextStream.h index 897c267..6fb3f4b 100644 --- a/WebCore/platform/text/TextStream.h +++ b/WebCore/platform/text/TextStream.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2004 Apple Computer, Inc. All rights reserved. + * Copyright (C) 2004, 2008 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -27,26 +27,14 @@ #define TextStream_h #include <wtf/Vector.h> +#include <wtf/unicode/Unicode.h> namespace WebCore { -class DeprecatedChar; -class DeprecatedString; class String; -class TextStream; - -typedef TextStream& (*TextStreamManipulator)(TextStream&); - -TextStream& endl(TextStream&); class TextStream { public: - TextStream(DeprecatedString*); - - TextStream& operator<<(char); - TextStream& operator<<(const DeprecatedChar&); - TextStream& operator<<(short); - TextStream& operator<<(unsigned short); TextStream& operator<<(int); TextStream& operator<<(unsigned); TextStream& operator<<(long); @@ -55,21 +43,15 @@ public: TextStream& operator<<(double); TextStream& operator<<(const char*); TextStream& operator<<(const String&); - TextStream& operator<<(const DeprecatedString&); - TextStream& operator<<(void*); - - TextStream& operator<<(const TextStreamManipulator&); +#if PLATFORM(WIN_OS) && PLATFORM(X86_64) && COMPILER(MSVC) + TextStream& operator<<(unsigned __int64); + TextStream& operator<<(__int64); +#endif - int precision(int); + String release(); private: - TextStream(const TextStream&); - TextStream& operator=(const TextStream&); - - bool m_hasByteArray; - Vector<char> m_byteArray; - DeprecatedString* m_string; - int m_precision; + Vector<UChar> m_text; }; } diff --git a/WebCore/platform/text/cf/StringCF.cpp b/WebCore/platform/text/cf/StringCF.cpp index 9e0d5f2..5e12ba9 100644 --- a/WebCore/platform/text/cf/StringCF.cpp +++ b/WebCore/platform/text/cf/StringCF.cpp @@ -21,7 +21,7 @@ #include "config.h" #include "PlatformString.h" -#if PLATFORM(CF) +#if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN)) #include <CoreFoundation/CoreFoundation.h> @@ -52,4 +52,4 @@ CFStringRef String::createCFString() const } -#endif // PLATFORM(CF) +#endif // PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN)) diff --git a/WebCore/platform/text/cf/StringImplCF.cpp b/WebCore/platform/text/cf/StringImplCF.cpp index 21b43df..ff595a5 100644 --- a/WebCore/platform/text/cf/StringImplCF.cpp +++ b/WebCore/platform/text/cf/StringImplCF.cpp @@ -21,7 +21,7 @@ #include "config.h" #include "StringImpl.h" -#if PLATFORM(CF) +#if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN)) #include <CoreFoundation/CoreFoundation.h> @@ -34,4 +34,4 @@ CFStringRef StringImpl::createCFString() } -#endif // PLATFORM(CF) +#endif // PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN)) diff --git a/WebCore/platform/text/mac/ShapeArabic.c b/WebCore/platform/text/mac/ShapeArabic.c index 4706e7c..6dbc008 100644 --- a/WebCore/platform/text/mac/ShapeArabic.c +++ b/WebCore/platform/text/mac/ShapeArabic.c @@ -2,8 +2,28 @@ ****************************************************************************** * * Copyright (C) 2000-2004, International Business Machines -* Corporation and others. All Rights Reserved. -* Copyright (C) 2007 Apple Inc. All rights reserved. +* Corporation and others. All Rights Reserved. +* Copyright (C) 2007 Apple Inc. All rights reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a copy of this +* software and associated documentation files (the "Software"), to deal in the Software +* without restriction, including without limitation the rights to use, copy, modify, +* merge, publish, distribute, and/or sell copies of the Software, and to permit persons +* to whom the Software is furnished to do so, provided that the above copyright notice(s) +* and this permission notice appear in all copies of the Software and that both the above +* copyright notice(s) and this permission notice appear in supporting documentation. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +* PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER +* OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR +* CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR +* PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING +* OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +* +* Except as contained in this notice, the name of a copyright holder shall not be used in +* advertising or otherwise to promote the sale, use or other dealings in this Software +* without prior written authorization of the copyright holder. * ****************************************************************************** * @@ -11,6 +31,9 @@ */ #include "config.h" + +#if USE(ATSUI) + #include "ShapeArabic.h" #include <unicode/utypes.h> @@ -528,3 +551,5 @@ int32_t shapeArabic(const UChar *source, int32_t sourceLength, UChar *dest, int3 return sourceLength; } + +#endif // USE(ATSUI) diff --git a/WebCore/platform/text/mac/ShapeArabic.h b/WebCore/platform/text/mac/ShapeArabic.h index 2f85ea0..8aa577d 100644 --- a/WebCore/platform/text/mac/ShapeArabic.h +++ b/WebCore/platform/text/mac/ShapeArabic.h @@ -26,6 +26,8 @@ #ifndef ShapeArabic_h #define ShapeArabic_h +#if USE(ATSUI) + #include <unicode/ushape.h> #ifdef __cplusplus @@ -38,4 +40,5 @@ int32_t shapeArabic(const UChar *source, int32_t sourceLength, UChar *dest, int3 } #endif +#endif // USE(ATSUI) #endif // ShapeArabic_h diff --git a/WebCore/platform/text/mac/TextCodecMac.cpp b/WebCore/platform/text/mac/TextCodecMac.cpp index 7270a26..ac1f0fb 100644 --- a/WebCore/platform/text/mac/TextCodecMac.cpp +++ b/WebCore/platform/text/mac/TextCodecMac.cpp @@ -78,7 +78,6 @@ void TextCodecMac::registerCodecs(TextCodecRegistrar registrar) TextCodecMac::TextCodecMac(TECTextEncodingID encoding) : m_encoding(encoding) - , m_error(false) , m_numBufferedBytes(0) , m_converterTEC(0) { @@ -179,16 +178,15 @@ OSStatus TextCodecMac::decode(const unsigned char* inputBuffer, int inputBufferL } // Work around bug 3351093, where sometimes we get kTECBufferBelowMinimumSizeErr instead of kTECOutputBufferFullStatus. - if (status == kTECBufferBelowMinimumSizeErr && bytesWritten != 0) { + if (status == kTECBufferBelowMinimumSizeErr && bytesWritten != 0) status = kTECOutputBufferFullStatus; - } inputLength = bytesRead; outputLength = bytesWritten; return status; } -String TextCodecMac::decode(const char* bytes, size_t length, bool flush) +String TextCodecMac::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError) { // Get a converter for the passed-in encoding. if (!m_converterTEC && createTECConverter() != noErr) @@ -201,7 +199,7 @@ String TextCodecMac::decode(const char* bytes, size_t length, bool flush) bool bufferWasFull = false; UniChar buffer[ConversionBufferSize]; - while (sourceLength || bufferWasFull) { + while ((sourceLength || bufferWasFull) && !sawError) { int bytesRead = 0; int bytesWritten = 0; OSStatus status = decode(sourcePointer, sourceLength, bytesRead, buffer, sizeof(buffer), bytesWritten); @@ -217,6 +215,10 @@ String TextCodecMac::decode(const char* bytes, size_t length, bool flush) case kTextUndefinedElementErr: // FIXME: Put FFFD character into the output string in this case? TECClearConverterContextInfo(m_converterTEC); + if (stopOnError) { + sawError = true; + break; + } if (sourceLength) { sourcePointer += 1; sourceLength -= 1; @@ -236,13 +238,12 @@ String TextCodecMac::decode(const char* bytes, size_t length, bool flush) break; } default: - LOG_ERROR("text decoding failed with error %ld", static_cast<long>(status)); - m_error = true; + sawError = true; return String(); } ASSERT(!(bytesWritten % sizeof(UChar))); - appendOmittingBOM(result, buffer, bytesWritten / sizeof(UChar)); + result.append(buffer, bytesWritten / sizeof(UChar)); bufferWasFull = status == kTECOutputBufferFullStatus; } @@ -251,7 +252,7 @@ String TextCodecMac::decode(const char* bytes, size_t length, bool flush) unsigned long bytesWritten = 0; TECFlushText(m_converterTEC, reinterpret_cast<unsigned char*>(buffer), sizeof(buffer), &bytesWritten); ASSERT(!(bytesWritten % sizeof(UChar))); - appendOmittingBOM(result, buffer, bytesWritten / sizeof(UChar)); + result.append(buffer, bytesWritten / sizeof(UChar)); } String resultString = String::adopt(result); @@ -266,7 +267,7 @@ String TextCodecMac::decode(const char* bytes, size_t length, bool flush) return resultString; } -CString TextCodecMac::encode(const UChar* characters, size_t length, bool allowEntities) +CString TextCodecMac::encode(const UChar* characters, size_t length, UnencodableHandling handling) { // FIXME: We should really use TEC here instead of CFString for consistency with the other direction. @@ -280,7 +281,7 @@ CString TextCodecMac::encode(const UChar* characters, size_t length, bool allowE CFIndex charactersLeft = CFStringGetLength(cfs); Vector<char> result; size_t size = 0; - UInt8 lossByte = allowEntities ? 0 : '?'; + UInt8 lossByte = handling == QuestionMarksForUnencodables ? '?' : 0; while (charactersLeft > 0) { CFRange range = CFRangeMake(startPos, charactersLeft); CFIndex bufferLength; @@ -303,11 +304,10 @@ CString TextCodecMac::encode(const UChar* characters, size_t length, bool allowE ++charactersConverted; } } - char entityBuffer[16]; - sprintf(entityBuffer, "&#%u;", badChar); - size_t entityLength = strlen(entityBuffer); + UnencodableReplacementArray entity; + int entityLength = getUnencodableReplacement(badChar, handling, entity); result.grow(size + entityLength); - memcpy(result.data() + size, entityBuffer, entityLength); + memcpy(result.data() + size, entity, entityLength); size += entityLength; } diff --git a/WebCore/platform/text/mac/TextCodecMac.h b/WebCore/platform/text/mac/TextCodecMac.h index 639e214..aee4a97 100644 --- a/WebCore/platform/text/mac/TextCodecMac.h +++ b/WebCore/platform/text/mac/TextCodecMac.h @@ -43,8 +43,8 @@ namespace WebCore { explicit TextCodecMac(TECTextEncodingID); virtual ~TextCodecMac(); - virtual String decode(const char*, size_t length, bool flush = false); - virtual CString encode(const UChar*, size_t length, bool allowEntities = false); + virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError); + virtual CString encode(const UChar*, size_t length, UnencodableHandling); private: OSStatus decode(const unsigned char* inputBuffer, int inputBufferLength, int& inputLength, @@ -55,7 +55,6 @@ namespace WebCore { TECTextEncodingID m_encoding; UChar m_backslashAsCurrencySymbol; - bool m_error; unsigned m_numBufferedBytes; unsigned char m_bufferedBytes[16]; // bigger than any single multi-byte character mutable TECObjectRef m_converterTEC; diff --git a/WebCore/platform/text/mac/mac-encodings.txt b/WebCore/platform/text/mac/mac-encodings.txt index 270c625..bb45e22 100644 --- a/WebCore/platform/text/mac/mac-encodings.txt +++ b/WebCore/platform/text/mac/mac-encodings.txt @@ -22,15 +22,12 @@ JIS_X0208_90: JIS_X0208-1990 JIS_X0212_90: JIS_X0212-1990 KOI8_U: KOI8-U MacArabic: x-mac-arabic -MacCentralEurRoman: x-mac-centraleurroman, xmacce MacChineseSimp: x-mac-chinesesimp, xmacsimpchinese MacChineseTrad: x-mac-chinesetrad, xmactradchinese MacCroatian: x-mac-croatian -MacCyrillic: x-mac-cyrillic, maccyrillic, xmacukrainian MacDevanagari: x-mac-devanagari MacDingbats: x-mac-dingbats MacFarsi: x-mac-farsi -MacGreek: x-mac-greek MacGujarati: x-mac-gujarati MacGurmukhi: x-mac-gurmukhi MacHebrew: x-mac-hebrew @@ -42,7 +39,6 @@ MacRomanian: x-mac-romanian MacSymbol: x-mac-symbol MacThai: x-mac-thai MacTibetan: x-mac-tibetan -MacTurkish: x-mac-turkish MacVT100: x-mac-vt100 NextStepLatin: x-nextstep ShiftJIS_X0213_00: Shift_JIS_X0213-2000 diff --git a/WebCore/platform/text/qt/StringQt.cpp b/WebCore/platform/text/qt/StringQt.cpp index 23a684b..de9f527 100644 --- a/WebCore/platform/text/qt/StringQt.cpp +++ b/WebCore/platform/text/qt/StringQt.cpp @@ -26,7 +26,6 @@ #include "config.h" #include "PlatformString.h" -#include "DeprecatedString.h" #include <QString> @@ -44,21 +43,14 @@ String::String(const QStringRef& ref) { if (!ref.string()) return; - m_impl = StringImpl::create(reinterpret_cast<const UChar *>(ref.unicode()), ref.length()); + m_impl = StringImpl::create(reinterpret_cast<const UChar*>(ref.unicode()), ref.length()); } - String::operator QString() const { return QString(reinterpret_cast<const QChar*>(characters()), length()); } -// DeprecatedString conversions -DeprecatedString::operator QString() const -{ - return QString(reinterpret_cast<const QChar*>(unicode()), length()); -} - } // vim: ts=4 sw=4 et diff --git a/WebCore/platform/text/qt/TextCodecQt.cpp b/WebCore/platform/text/qt/TextCodecQt.cpp index 888c6af..0f385dd 100644 --- a/WebCore/platform/text/qt/TextCodecQt.cpp +++ b/WebCore/platform/text/qt/TextCodecQt.cpp @@ -1,5 +1,6 @@ /* * Copyright (C) 2006 Lars Knoll <lars@trolltech.com> + * Copyright (C) 2008 Holger Hans Peter Freyther * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -91,9 +92,10 @@ TextCodecQt::~TextCodecQt() } -String TextCodecQt::decode(const char* bytes, size_t length, bool flush) +String TextCodecQt::decode(const char* bytes, size_t length, bool flush, bool /*stopOnError*/, bool& sawError) { QString unicode = m_codec->toUnicode(bytes, length, &m_state); + sawError = m_state.invalidChars != 0; if (flush) { m_state.flags = QTextCodec::DefaultConversion; @@ -104,12 +106,12 @@ String TextCodecQt::decode(const char* bytes, size_t length, bool flush) return unicode; } -CString TextCodecQt::encode(const UChar* characters, size_t length, bool allowEntities) +CString TextCodecQt::encode(const UChar* characters, size_t length, UnencodableHandling) { if (!length) return ""; - // FIXME: do something sensible with allowEntities + // FIXME: do something sensible with UnencodableHandling QByteArray ba = m_codec->fromUnicode(reinterpret_cast<const QChar*>(characters), length, 0); return CString(ba.constData(), ba.length()); diff --git a/WebCore/platform/text/qt/TextCodecQt.h b/WebCore/platform/text/qt/TextCodecQt.h index 9bbb80b..f28f0bb 100644 --- a/WebCore/platform/text/qt/TextCodecQt.h +++ b/WebCore/platform/text/qt/TextCodecQt.h @@ -30,8 +30,6 @@ #include "TextEncoding.h" #include <QTextCodec> -class QTextCodec; - namespace WebCore { class TextCodecQt : public TextCodec { @@ -42,8 +40,8 @@ namespace WebCore { TextCodecQt(const TextEncoding&); virtual ~TextCodecQt(); - virtual String decode(const char*, size_t length, bool flush = false); - virtual CString encode(const UChar*, size_t length, bool allowEntities = false); + virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError); + virtual CString encode(const UChar*, size_t length, UnencodableHandling); private: TextEncoding m_encoding; diff --git a/WebCore/platform/text/wx/StringWx.cpp b/WebCore/platform/text/wx/StringWx.cpp index 7f91dbf..50919c4 100644 --- a/WebCore/platform/text/wx/StringWx.cpp +++ b/WebCore/platform/text/wx/StringWx.cpp @@ -27,7 +27,6 @@ #include "PlatformString.h" #include "CString.h" -#include "DeprecatedString.h" #include "unicode/ustring.h" #include <wx/defs.h> @@ -88,12 +87,6 @@ String::operator wxString() const return wxString(utf8().data(), wxConvUTF8); } -// DeprecatedString conversions -DeprecatedString::operator wxString() const -{ - return wxString(utf8().data(), wxConvUTF8); -} - } // vim: ts=4 sw=4 et |