diff options
author | Steve Block <steveblock@google.com> | 2011-05-06 11:45:16 +0100 |
---|---|---|
committer | Steve Block <steveblock@google.com> | 2011-05-12 13:44:10 +0100 |
commit | cad810f21b803229eb11403f9209855525a25d57 (patch) | |
tree | 29a6fd0279be608e0fe9ffe9841f722f0f4e4269 /Source/WebCore/platform/text/wince | |
parent | 121b0cf4517156d0ac5111caf9830c51b69bae8f (diff) | |
download | external_webkit-cad810f21b803229eb11403f9209855525a25d57.zip external_webkit-cad810f21b803229eb11403f9209855525a25d57.tar.gz external_webkit-cad810f21b803229eb11403f9209855525a25d57.tar.bz2 |
Merge WebKit at r75315: Initial merge by git.
Change-Id: I570314b346ce101c935ed22a626b48c2af266b84
Diffstat (limited to 'Source/WebCore/platform/text/wince')
3 files changed, 765 insertions, 0 deletions
diff --git a/Source/WebCore/platform/text/wince/TextBreakIteratorWinCE.cpp b/Source/WebCore/platform/text/wince/TextBreakIteratorWinCE.cpp new file mode 100644 index 0000000..96488c0 --- /dev/null +++ b/Source/WebCore/platform/text/wince/TextBreakIteratorWinCE.cpp @@ -0,0 +1,303 @@ +/* + * Copyright (C) 2006 Lars Knoll <lars@trolltech.com> + * Copyright (C) 2007-2009 Torch Mobile, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + */ + +#include "config.h" +#include "TextBreakIterator.h" + +#include "PlatformString.h" +#include <wtf/StdLibExtras.h> +#include <wtf/unicode/Unicode.h> + +using namespace WTF::Unicode; + +namespace WebCore { + +// Hack, not entirely correct +static inline bool isCharStop(UChar c) +{ + CharCategory charCategory = category(c); + return charCategory != Mark_NonSpacing && (charCategory != Other_Surrogate || (c < 0xd800 || c >= 0xdc00)); +} + +static inline bool isLineStop(UChar c) +{ + return category(c) != Separator_Line; +} + +static inline bool isSentenceStop(UChar c) +{ + return isPunct(c); +} + +class TextBreakIterator { +public: + void reset(const UChar* str, int len) + { + string = str; + length = len; + currentPos = 0; + } + int first() + { + currentPos = 0; + return currentPos; + } + int last() + { + currentPos = length; + return currentPos; + } + virtual int next() = 0; + virtual int previous() = 0; + int following(int position) + { + currentPos = position; + return next(); + } + int preceding(int position) + { + currentPos = position; + return previous(); + } + + int currentPos; + const UChar* string; + int length; +}; + +struct WordBreakIterator: TextBreakIterator { + virtual int next(); + virtual int previous(); +}; + +struct CharBreakIterator: TextBreakIterator { + virtual int next(); + virtual int previous(); +}; + +struct LineBreakIterator: TextBreakIterator { + virtual int next(); + virtual int previous(); +}; + +struct SentenceBreakIterator : TextBreakIterator { + virtual int next(); + virtual int previous(); +}; + +int WordBreakIterator::next() +{ + if (currentPos == length) { + currentPos = -1; + return currentPos; + } + bool haveSpace = false; + while (currentPos < length) { + if (haveSpace && !isSpace(string[currentPos])) + break; + if (isSpace(string[currentPos])) + haveSpace = true; + ++currentPos; + } + return currentPos; +} + +int WordBreakIterator::previous() +{ + if (!currentPos) { + currentPos = -1; + return currentPos; + } + bool haveSpace = false; + while (currentPos > 0) { + if (haveSpace && !isSpace(string[currentPos])) + break; + if (isSpace(string[currentPos])) + haveSpace = true; + --currentPos; + } + return currentPos; +} + +int CharBreakIterator::next() +{ + if (currentPos >= length) + return -1; + ++currentPos; + while (currentPos < length && !isCharStop(string[currentPos])) + ++currentPos; + return currentPos; +} + +int CharBreakIterator::previous() +{ + if (currentPos <= 0) + return -1; + if (currentPos > length) + currentPos = length; + --currentPos; + while (currentPos > 0 && !isCharStop(string[currentPos])) + --currentPos; + return currentPos; +} + +int LineBreakIterator::next() +{ + if (currentPos == length) { + currentPos = -1; + return currentPos; + } + bool haveSpace = false; + while (currentPos < length) { + if (haveSpace && !isLineStop(string[currentPos])) + break; + if (isLineStop(string[currentPos])) + haveSpace = true; + ++currentPos; + } + return currentPos; +} + +int LineBreakIterator::previous() +{ + if (!currentPos) { + currentPos = -1; + return currentPos; + } + bool haveSpace = false; + while (currentPos > 0) { + if (haveSpace && !isLineStop(string[currentPos])) + break; + if (isLineStop(string[currentPos])) + haveSpace = true; + --currentPos; + } + return currentPos; +} + +int SentenceBreakIterator::next() +{ + if (currentPos == length) { + currentPos = -1; + return currentPos; + } + bool haveSpace = false; + while (currentPos < length) { + if (haveSpace && !isSentenceStop(string[currentPos])) + break; + if (isSentenceStop(string[currentPos])) + haveSpace = true; + ++currentPos; + } + return currentPos; +} + +int SentenceBreakIterator::previous() +{ + if (!currentPos) { + currentPos = -1; + return currentPos; + } + bool haveSpace = false; + while (currentPos > 0) { + if (haveSpace && !isSentenceStop(string[currentPos])) + break; + if (isSentenceStop(string[currentPos])) + haveSpace = true; + --currentPos; + } + return currentPos; +} + +TextBreakIterator* wordBreakIterator(const UChar* string, int length) +{ + DEFINE_STATIC_LOCAL(WordBreakIterator, iterator, ()); + iterator.reset(string, length); + return &iterator; +} + +TextBreakIterator* characterBreakIterator(const UChar* string, int length) +{ + DEFINE_STATIC_LOCAL(CharBreakIterator, iterator, ()); + iterator.reset(string, length); + return &iterator; +} + +TextBreakIterator* lineBreakIterator(const UChar* string, int length) +{ + DEFINE_STATIC_LOCAL(LineBreakIterator , iterator, ()); + iterator.reset(string, length); + return &iterator; +} + +TextBreakIterator* sentenceBreakIterator(const UChar* string, int length) +{ + DEFINE_STATIC_LOCAL(SentenceBreakIterator, iterator, ()); + iterator.reset(string, length); + return &iterator; +} + +int textBreakFirst(TextBreakIterator* breakIterator) +{ + return breakIterator->first(); +} + +int textBreakLast(TextBreakIterator* breakIterator) +{ + return breakIterator->last(); +} + +int textBreakNext(TextBreakIterator* breakIterator) +{ + return breakIterator->next(); +} + +int textBreakPrevious(TextBreakIterator* breakIterator) +{ + return breakIterator->previous(); +} + +int textBreakPreceding(TextBreakIterator* breakIterator, int position) +{ + return breakIterator->preceding(position); +} + +int textBreakFollowing(TextBreakIterator* breakIterator, int position) +{ + return breakIterator->following(position); +} + +int textBreakCurrent(TextBreakIterator* breakIterator) +{ + return breakIterator->currentPos; +} + +bool isTextBreak(TextBreakIterator*, int) +{ + return true; +} + +TextBreakIterator* cursorMovementIterator(const UChar* string, int length) +{ + return characterBreakIterator(string, length); +} + +} // namespace WebCore diff --git a/Source/WebCore/platform/text/wince/TextCodecWinCE.cpp b/Source/WebCore/platform/text/wince/TextCodecWinCE.cpp new file mode 100644 index 0000000..3532e74 --- /dev/null +++ b/Source/WebCore/platform/text/wince/TextCodecWinCE.cpp @@ -0,0 +1,389 @@ +/* + * Copyright (C) 2007-2009 Torch Mobile, Inc. All rights reserved. + * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * This library is distributed in the hope that i will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#include "config.h" +#include "TextCodecWinCE.h" + +#include "FontCache.h" +#include "PlatformString.h" +#include <mlang.h> +#include <winbase.h> +#include <winnls.h> +#include <wtf/HashMap.h> +#include <wtf/HashSet.h> +#include <wtf/text/CString.h> +#include <wtf/text/StringConcatenate.h> +#include <wtf/text/StringHash.h> +#include <wtf/unicode/UTF8.h> + +namespace WebCore { + +struct CharsetInfo { + CString m_name; + String m_friendlyName; + UINT m_codePage; + Vector<CString> m_aliases; +}; + +class LanguageManager { +private: + LanguageManager(); + + friend LanguageManager& languageManager(); +}; + +// Usage: a lookup table used to get CharsetInfo with code page ID. +// Key: code page ID. Value: charset information. +static HashMap<UINT, CString>& codePageCharsets() +{ + static HashMap<UINT, CString> cc; + return cc; +} + +static HashMap<String, CharsetInfo>& knownCharsets() +{ + static HashMap<String, CharsetInfo> kc; + return kc; +} + +// Usage: a map that stores charsets that are supported by system. Sorted by name. +// Key: charset. Value: code page ID. +typedef HashSet<String> CharsetSet; +static CharsetSet& supportedCharsets() +{ + static CharsetSet sl; + return sl; +} + +static LanguageManager& languageManager() +{ + static LanguageManager lm; + return lm; +} + +LanguageManager::LanguageManager() +{ + IEnumCodePage* enumInterface; + IMultiLanguage* mli = FontCache::getMultiLanguageInterface(); + if (mli && S_OK == mli->EnumCodePages(MIMECONTF_BROWSER, &enumInterface)) { + MIMECPINFO cpInfo; + ULONG ccpInfo; + while (S_OK == enumInterface->Next(1, &cpInfo, &ccpInfo) && ccpInfo) { + if (!IsValidCodePage(cpInfo.uiCodePage)) + continue; + + HashMap<UINT, CString>::iterator i = codePageCharsets().find(cpInfo.uiCodePage); + + CString name(String(cpInfo.wszWebCharset).latin1()); + if (i == codePageCharsets().end()) { + CharsetInfo info; + info.m_codePage = cpInfo.uiCodePage; + knownCharsets().set(name.data(), info); + i = codePageCharsets().set(cpInfo.uiCodePage, name).first; + } + if (i != codePageCharsets().end()) { + HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(String(i->second.data(), i->second.length())); + ASSERT(j != knownCharsets().end()); + CharsetInfo& info = j->second; + info.m_name = i->second.data(); + info.m_friendlyName = cpInfo.wszDescription; + info.m_aliases.append(name); + info.m_aliases.append(String(cpInfo.wszHeaderCharset).latin1()); + info.m_aliases.append(String(cpInfo.wszBodyCharset).latin1()); + String cpName = makeString("cp", String::number(cpInfo.uiCodePage)); + info.m_aliases.append(cpName.latin1()); + supportedCharsets().add(i->second.data()); + } + } + enumInterface->Release(); + } +} + +static UINT getCodePage(const char* name) +{ + if (!strcmp(name, "UTF-8")) + return CP_UTF8; + + // Explicitly use a "const" reference to fix the silly VS build error + // saying "==" is not found for const_iterator and iterator + const HashMap<String, CharsetInfo>& charsets = knownCharsets(); + HashMap<String, CharsetInfo>::const_iterator i = charsets.find(name); + return i == charsets.end() ? CP_ACP : i->second.m_codePage; +} + +static PassOwnPtr<TextCodec> newTextCodecWinCE(const TextEncoding& encoding, const void*) +{ + return new TextCodecWinCE(getCodePage(encoding.name())); +} + +TextCodecWinCE::TextCodecWinCE(UINT codePage) + : m_codePage(codePage) +{ +} + +TextCodecWinCE::~TextCodecWinCE() +{ +} + +void TextCodecWinCE::registerBaseEncodingNames(EncodingNameRegistrar registrar) +{ + registrar("UTF-8", "UTF-8"); +} + +void TextCodecWinCE::registerBaseCodecs(TextCodecRegistrar registrar) +{ + registrar("UTF-8", newTextCodecWinCE, 0); +} + +void TextCodecWinCE::registerExtendedEncodingNames(EncodingNameRegistrar registrar) +{ + languageManager(); + for (CharsetSet::iterator i = supportedCharsets().begin(); i != supportedCharsets().end(); ++i) { + HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(*i); + if (j != knownCharsets().end()) { + registrar(j->second.m_name.data(), j->second.m_name.data()); + for (Vector<CString>::const_iterator alias = j->second.m_aliases.begin(); alias != j->second.m_aliases.end(); ++alias) + registrar(alias->data(), j->second.m_name.data()); + } + } +} + +void TextCodecWinCE::registerExtendedCodecs(TextCodecRegistrar registrar) +{ + languageManager(); + for (CharsetSet::iterator i = supportedCharsets().begin(); i != supportedCharsets().end(); ++i) { + HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(*i); + if (j != knownCharsets().end()) + registrar(j->second.m_name.data(), newTextCodecWinCE, 0); + } +} + +static DWORD getCodePageFlags(UINT codePage) +{ + if (codePage == CP_UTF8) + return MB_ERR_INVALID_CHARS; + + if (codePage == 42) // Symbol + return 0; + + // Microsoft says the flag must be 0 for the following code pages + if (codePage > 50000) { + if ((codePage >= 50220 && codePage <= 50222) + || codePage == 50225 + || codePage == 50227 + || codePage == 50229 + || codePage == 52936 + || codePage == 54936 + || (codePage >= 57002 && codePage <= 57001) + || codePage == 65000 // UTF-7 + ) + return 0; + } + + return MB_PRECOMPOSED | MB_ERR_INVALID_CHARS; +} + +static inline const char* findFirstNonAsciiCharacter(const char* bytes, size_t length) +{ + for (const char* bytesEnd = bytes + length; bytes < bytesEnd; ++bytes) { + if (*bytes & 0x80) + break; + } + return bytes; +} + +static void decode(Vector<UChar, 8192>& result, UINT codePage, const char* bytes, size_t length, size_t* left, bool canBeFirstTime, bool& sawInvalidChar) +{ + *left = length; + if (!bytes || !length) + return; + + DWORD flags = getCodePageFlags(codePage); + + if (codePage == CP_UTF8) { + if (canBeFirstTime) { + // Handle BOM. + if (length > 3) { + if (bytes[0] == (char)0xEF && bytes[1] == (char)0xBB && bytes[2] == (char)0xBF) { + // BOM found! + length -= 3; + bytes += 3; + *left = length; + } + } else if (bytes[0] == 0xEF && (length < 2 || bytes[1] == (char)0xBB) && (length < 3 || bytes[2] == (char)0xBF)) { + if (length == 3) + *left = 0; + return; + } + } + + // Process ASCII characters at beginning. + const char* firstNonAsciiChar = findFirstNonAsciiCharacter(bytes, length); + int numAsciiCharacters = firstNonAsciiChar - bytes; + if (numAsciiCharacters) { + result.append(bytes, numAsciiCharacters); + length -= numAsciiCharacters; + if (!length) { + *left = 0; + return; + } + bytes = firstNonAsciiChar; + } + + int oldSize = result.size(); + result.resize(oldSize + length); + UChar* resultStart = result.data() + oldSize; + const char* sourceStart = bytes; + const char* const sourceEnd = bytes + length; + for (;;) { + using namespace WTF::Unicode; + ConversionResult convRes = convertUTF8ToUTF16(&sourceStart + , sourceEnd + , &resultStart + , result.data() + result.size() + , true); + + // FIXME: is it possible? + if (convRes == targetExhausted && sourceStart < sourceEnd) { + oldSize = result.size(); + result.resize(oldSize + 256); + resultStart = result.data() + oldSize; + continue; + } + + if (convRes != conversionOK) + sawInvalidChar = true; + + break; + } + + *left = sourceEnd - sourceStart; + result.resize(resultStart - result.data()); + } else { + int testLength = length; + int untestedLength = length; + for (;;) { + int resultLength = MultiByteToWideChar(codePage, flags, bytes, testLength, 0, 0); + + if (resultLength > 0) { + int oldSize = result.size(); + result.resize(oldSize + resultLength); + + MultiByteToWideChar(codePage, flags, bytes, testLength, result.data() + oldSize, resultLength); + + if (testLength == untestedLength) { + *left = length - testLength; + break; + } + untestedLength -= testLength; + length -= testLength; + bytes += testLength; + } else { + untestedLength = testLength - 1; + if (!untestedLength) { + *left = length; + break; + } + } + testLength = (untestedLength + 1) / 2; + } + } +} + +String TextCodecWinCE::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError) +{ + if (!m_decodeBuffer.isEmpty()) { + m_decodeBuffer.append(bytes, length); + bytes = m_decodeBuffer.data(); + length = m_decodeBuffer.size(); + } + + size_t left; + Vector<UChar, 8192> result; + for (;;) { + bool sawInvalidChar = false; + WebCore::decode(result, m_codePage, bytes, length, &left, m_decodeBuffer.isEmpty(), sawInvalidChar); + if (!left) + break; + + if (!sawInvalidChar && !flush && left < 16) + break; + + result.append(L'?'); + sawError = true; + if (stopOnError) + return String::adopt(result); + + if (left == 1) + break; + + bytes += length - left + 1; + length = left - 1; + } + if (left && !flush) { + if (m_decodeBuffer.isEmpty()) + m_decodeBuffer.append(bytes + length - left, left); + else { + memmove(m_decodeBuffer.data(), bytes + length - left, left); + m_decodeBuffer.resize(left); + } + } else + m_decodeBuffer.clear(); + + return String::adopt(result); +} + +CString TextCodecWinCE::encode(const UChar* characters, size_t length, UnencodableHandling) +{ + if (!characters || !length) + return CString(); + + DWORD flags = m_codePage == CP_UTF8 ? 0 : WC_COMPOSITECHECK; + + int resultLength = WideCharToMultiByte(m_codePage, flags, characters, length, 0, 0, 0, 0); + + // FIXME: We need to implement UnencodableHandling: QuestionMarksForUnencodables, EntitiesForUnencodables, and URLEncodedEntitiesForUnencodables. + + if (resultLength <= 0) + return "?"; + + char* characterBuffer; + CString result = CString::newUninitialized(resultLength, characterBuffer); + + WideCharToMultiByte(m_codePage, flags, characters, length, characterBuffer, resultLength, 0, 0); + + return result; +} + +void TextCodecWinCE::enumerateSupportedEncodings(EncodingReceiver& receiver) +{ + languageManager(); + for (CharsetSet::iterator i = supportedCharsets().begin(); i != supportedCharsets().end(); ++i) { + HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(*i); + if (j != knownCharsets().end() && !receiver.receive(j->second.m_name.data(), j->second.m_friendlyName.charactersWithNullTermination(), j->second.m_codePage)) + break; + } +} + +} // namespace WebCore diff --git a/Source/WebCore/platform/text/wince/TextCodecWinCE.h b/Source/WebCore/platform/text/wince/TextCodecWinCE.h new file mode 100644 index 0000000..8d332a6 --- /dev/null +++ b/Source/WebCore/platform/text/wince/TextCodecWinCE.h @@ -0,0 +1,73 @@ +/* + * Copyright (C) 2004, 2006, 2007 Apple Inc. All rights reserved. + * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> + * Copyright (C) 2007-2009 Torch Mobile, Inc. + * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef TextCodecWinCE_h +#define TextCodecWinCE_h + +#include "PlatformString.h" +#include "TextCodec.h" +#include "TextEncoding.h" +#include <wtf/Vector.h> +#include <windows.h> + +namespace WebCore { + +class TextCodecWinCE : public TextCodec { +public: + static void registerBaseEncodingNames(EncodingNameRegistrar); + static void registerBaseCodecs(TextCodecRegistrar); + + static void registerExtendedEncodingNames(EncodingNameRegistrar); + static void registerExtendedCodecs(TextCodecRegistrar); + + TextCodecWinCE(UINT codePage); + virtual ~TextCodecWinCE(); + + virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError); + virtual CString encode(const UChar*, size_t length, UnencodableHandling); + + struct EncodingInfo { + String m_encoding; + String m_friendlyName; + }; + + struct EncodingReceiver { + // Return false to stop enumerating. + virtual bool receive(const char* encoding, const wchar_t* friendlyName, unsigned int codePage) = 0; + }; + + static void enumerateSupportedEncodings(EncodingReceiver& receiver); + +private: + UINT m_codePage; + Vector<char> m_decodeBuffer; +}; + +} // namespace WebCore + +#endif // TextCodecWinCE_h |