diff options
author | Kristian Monsen <kristianm@google.com> | 2010-06-28 16:42:48 +0100 |
---|---|---|
committer | Kristian Monsen <kristianm@google.com> | 2010-07-02 10:29:56 +0100 |
commit | 06ea8e899e48f1f2f396b70e63fae369f2f23232 (patch) | |
tree | 20c1428cd05c76f32394ab354ea35ed99acd86d8 /WebCore/platform/text/wince | |
parent | 72aad67af14193199e29cdd5c4ddc095a8b9a8a8 (diff) | |
download | external_webkit-06ea8e899e48f1f2f396b70e63fae369f2f23232.zip external_webkit-06ea8e899e48f1f2f396b70e63fae369f2f23232.tar.gz external_webkit-06ea8e899e48f1f2f396b70e63fae369f2f23232.tar.bz2 |
Merge WebKit at r61871: Initial merge by git.
Change-Id: I6cff43abca9cc4782e088a469ad4f03f166a65d5
Diffstat (limited to 'WebCore/platform/text/wince')
-rw-r--r-- | WebCore/platform/text/wince/TextCodecWinCE.cpp | 483 | ||||
-rw-r--r-- | WebCore/platform/text/wince/TextCodecWinCE.h | 71 |
2 files changed, 554 insertions, 0 deletions
diff --git a/WebCore/platform/text/wince/TextCodecWinCE.cpp b/WebCore/platform/text/wince/TextCodecWinCE.cpp new file mode 100644 index 0000000..644b12f --- /dev/null +++ b/WebCore/platform/text/wince/TextCodecWinCE.cpp @@ -0,0 +1,483 @@ +/* + * Copyright (C) 2007-2009 Torch Mobile, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * This library is distributed in the hope that i will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#include "config.h" +#include "TextCodecWinCE.h" + +#include "ce_textcodecs.h" +#include "FontCache.h" +#include "PlatformString.h" +#include "StringHash.h" +#include <mlang.h> +#include <winbase.h> +#include <winnls.h> +#include <wtf/HashMap.h> +#include <wtf/HashSet.h> +#include <wtf/text/CString.h> +#include <wtf/unicode/UTF8.h> + +namespace WebCore { + +struct CharsetInfo { + CString m_name; + String m_friendlyName; + UINT m_codePage; + Vector<CString> m_aliases; + bool m_usesNativeCodec; +}; + +class LanguageManager { +private: + LanguageManager(); + + friend LanguageManager& languageManager(); +}; + +// Usage: a lookup table used to get CharsetInfo with code page ID. +// Key: code page ID. Value: charset information. +static HashMap<UINT, CString>& codePageCharsets() +{ + static HashMap<UINT, CString> cc; + return cc; +} + +static HashMap<String, CharsetInfo>& knownCharsets() +{ + static HashMap<String, CharsetInfo> kc; + return kc; +} + +// Usage: a map that stores charsets that are supported by system. Sorted by name. +// Key: charset. Value: code page ID. +typedef HashSet<String> CharsetSet; +static CharsetSet& supportedCharsets() +{ + static CharsetSet sl; + return sl; +} + +static LanguageManager& languageManager() +{ + static LanguageManager lm; + return lm; +} + +static void addCharset(UINT codePage, const char* charsetName, const wchar_t* friendlyName, const char* charsetAliases, bool nativeSupport = false) +{ + CharsetInfo info; + info.m_codePage = codePage; + info.m_name = charsetName; + info.m_friendlyName = friendlyName; + info.m_usesNativeCodec = nativeSupport; + const char* begin = charsetAliases; + for (;;) { + const char* end = strchr(begin, '|'); + CString alias = end ? CString(begin, end - begin) : begin; + if (alias.length()) + info.m_aliases.append(alias); + if (!end) + break; + begin = end + 1; + } + knownCharsets().set(info.m_name.data(), info); + if (codePage != CP_ACP) + codePageCharsets().set(codePage, info.m_name); +} + +LanguageManager::LanguageManager() +{ + // 437, 708, 709, 710, 720, 737, 775, 850, 852 + addCharset(932, "SHIFT_JIS", L"Japanese (SHIFT_JIS)", "shift_jis"); + addCharset(936, "GBK", L"Chinese Simplified (GBK)", "gbk|gb2312"); + addCharset(949, "KSC5601", L"Korean (KSC5601)", "ks_c_5601-1987|ksc5601|euc-kr|euckr|x-euc-kr"); + addCharset(950, "BIG5", L"Chinese Traditional (BIG5)", "big5"); + addCharset(1361, "JOHAB", L"Korean (Johab)", "johab|korean.johab"); + addCharset(51932, "EUC-JP", L"Japanese (EUC)", "euc-jp|eucjp|x-euc-jp", true); + addCharset(874, "CP874", L"Thai (Windows)", "cp874|windows-874", true); + addCharset(CP_ACP, "TIS620", L"Thai (TIS 620)", "tis620|ISO-8859-11|ISO-IR-166|TIS-620|TIS620-0TIS620.2529-1|TIS620.2533-0|TIS620.2533-1|thai8", true); + addCharset(CP_ACP, "MACTHAI", L"Thai (Mac OS)", "macthai|x-mac-thai|mac-thai", true); + supportedCharsets().add("EUC-JP"); + supportedCharsets().add("CP874"); + supportedCharsets().add("TIS620"); + supportedCharsets().add("MACTHAI"); + + IEnumCodePage* enumInterface; + IMultiLanguage* mli = FontCache::getMultiLanguageInterface(); + if (mli && S_OK == mli->EnumCodePages(MIMECONTF_BROWSER, &enumInterface)) { + MIMECPINFO cpInfo; + ULONG ccpInfo; + while (S_OK == enumInterface->Next(1, &cpInfo, &ccpInfo) && ccpInfo) { + if (!IsValidCodePage(cpInfo.uiCodePage)) + continue; + + HashMap<UINT, CString>::iterator i = codePageCharsets().find(cpInfo.uiCodePage); + + CString name(String(cpInfo.wszWebCharset).latin1()); + if (i == codePageCharsets().end()) { + CharsetInfo info; + info.m_codePage = cpInfo.uiCodePage; + knownCharsets().set(name.data(), info); + i = codePageCharsets().set(cpInfo.uiCodePage, name).first; + } + if (i != codePageCharsets().end()) { + HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(String(i->second.data(), i->second.length())); + ASSERT(j != knownCharsets().end()); + CharsetInfo& info = j->second; + info.m_name = i->second.data(); + info.m_friendlyName = cpInfo.wszDescription; + info.m_aliases.append(name); + info.m_aliases.append(String(cpInfo.wszHeaderCharset).latin1()); + info.m_aliases.append(String(cpInfo.wszBodyCharset).latin1()); + info.m_usesNativeCodec = false; + String cpName = String::format("cp%d", cpInfo.uiCodePage); + info.m_aliases.append(cpName.latin1()); + supportedCharsets().add(i->second.data()); + } + } + enumInterface->Release(); + } +} + +static UINT getCodePage(const char* name) +{ + if (!strcmp(name, "UTF-8")) + return CP_UTF8; + + // Explicitly use a "const" reference to fix the silly VS build error + // saying "==" is not found for const_iterator and iterator + const HashMap<String, CharsetInfo>& charsets = knownCharsets(); + HashMap<String, CharsetInfo>::const_iterator i = charsets.find(name); + return i == charsets.end() ? CP_ACP : i->second.m_codePage; +} + +static PassOwnPtr<TextCodec> newTextCodecWinCE(const TextEncoding& encoding, const void*) +{ + return new TextCodecWinCE(encoding); +} + +TextCodecWinCE::TextCodecWinCE(const TextEncoding& encoding) + : m_encoding(encoding) +{ +} + +TextCodecWinCE::~TextCodecWinCE() +{ +} + +void TextCodecWinCE::registerBaseEncodingNames(EncodingNameRegistrar registrar) +{ + registrar("UTF-8", "UTF-8"); +} + +void TextCodecWinCE::registerBaseCodecs(TextCodecRegistrar registrar) +{ + registrar("UTF-8", newTextCodecWinCE, 0); +} + +void TextCodecWinCE::registerExtendedEncodingNames(EncodingNameRegistrar registrar) +{ + languageManager(); + for (CharsetSet::iterator i = supportedCharsets().begin(); i != supportedCharsets().end(); ++i) { + HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(*i); + if (j != knownCharsets().end()) { + registrar(j->second.m_name.data(), j->second.m_name.data()); + for (Vector<CString>::const_iterator alias = j->second.m_aliases.begin(); alias != j->second.m_aliases.end(); ++alias) + registrar(alias->data(), j->second.m_name.data()); + } + } +} + +void TextCodecWinCE::registerExtendedCodecs(TextCodecRegistrar registrar) +{ + languageManager(); + for (CharsetSet::iterator i = supportedCharsets().begin(); i != supportedCharsets().end(); ++i) { + HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(*i); + if (j != knownCharsets().end()) + registrar(j->second.m_name.data(), newTextCodecWinCE, 0); + } +} + +static DWORD getCodePageFlags(UINT codePage) +{ + if (codePage == CP_UTF8) + return MB_ERR_INVALID_CHARS; + + if (codePage == 42) // Symbol + return 0; + + // Microsoft says the flag must be 0 for the following code pages + if (codePage > 50000) { + if ((codePage >= 50220 && codePage <= 50222) + || codePage == 50225 + || codePage == 50227 + || codePage == 50229 + || codePage == 52936 + || codePage == 54936 + || (codePage >= 57002 && codePage <= 57001) + || codePage == 65000 // UTF-7 + ) + return 0; + } + + return MB_PRECOMPOSED | MB_ERR_INVALID_CHARS; +} + +static inline const char* findFirstNonAsciiCharacter(const char* bytes, size_t length) +{ + for (const char* bytesEnd = bytes + length; bytes < bytesEnd; ++bytes) { + if (*bytes & 0x80) + break; + } + return bytes; +} + +static void decode(Vector<UChar, 8192>& result, const char* encodingName, const char* bytes, size_t length, size_t* left, bool canBeFirstTime, bool& sawInvalidChar) +{ + *left = length; + if (!bytes || !length) + return; + + UINT codePage; + + HashMap<String, CharsetInfo>::iterator i = knownCharsets().find(encodingName); + if (i == knownCharsets().end()) { + if (!strcmp(encodingName, "UTF-8")) + codePage = CP_UTF8; + else + codePage = CP_ACP; + } else { + codePage = i->second.m_codePage; + if (i->second.m_usesNativeCodec) { + typedef int (*FuncEucMbToWc)(wchar_t *pwc, const unsigned char *s, int n); + FuncEucMbToWc encMbToWc = 0; + if (!strcmp(encodingName, "EUC-JP")) + encMbToWc = TextCodecsCE::euc_jp_mbtowc; + else if (!strcmp(encodingName, "CP874")) + encMbToWc = TextCodecsCE::cp874_mbtowc; + else if (!strcmp(encodingName, "TIS620")) + encMbToWc = TextCodecsCE::tis620_mbtowc; + else if (!strcmp(encodingName, "MACTHAI")) + encMbToWc = TextCodecsCE::mac_thai_mbtowc; + + if (encMbToWc) { + const char* const srcStart = bytes; + const char* const srcEnd = bytes + length; + int lastSize = result.size(); + result.resize(lastSize + length); + for (;;) { + UChar* dst = result.data() + lastSize; + const UChar* const dstEnd = result.data() + result.size(); + for (; dst < dstEnd && bytes < srcEnd; ++dst) { + int numberEncoded = encMbToWc(dst, (const unsigned char*)bytes, srcEnd - bytes); + if (numberEncoded >= 0) + bytes += numberEncoded; + else { + if (numberEncoded == RET_ILSEQ) + sawInvalidChar = true; + break; + } + } + if (bytes == srcEnd || dst != dstEnd) { + *left = srcEnd - bytes; + result.resize(dst - result.data()); + return; + } + lastSize = result.size(); + result.resize(result.size() + 256); + } + } else { + *left = 0; + result.append(bytes, length); + return; + } + } + } + + DWORD flags = getCodePageFlags(codePage); + + if (codePage == CP_UTF8) { + if (canBeFirstTime) { + // Handle BOM. + if (length > 3) { + if (bytes[0] == (char)0xEF && bytes[1] == (char)0xBB && bytes[2] == (char)0xBF) { + // BOM found! + length -= 3; + bytes += 3; + *left = length; + } + } else if (bytes[0] == 0xEF && (length < 2 || bytes[1] == (char)0xBB) && (length < 3 || bytes[2] == (char)0xBF)) { + if (length == 3) + *left = 0; + return; + } + } + + // Process ASCII characters at beginning. + const char* firstNonAsciiChar = findFirstNonAsciiCharacter(bytes, length); + int numAsciiCharacters = firstNonAsciiChar - bytes; + if (numAsciiCharacters) { + result.append(bytes, numAsciiCharacters); + length -= numAsciiCharacters; + if (!length) { + *left = 0; + return; + } + bytes = firstNonAsciiChar; + } + + int oldSize = result.size(); + result.resize(oldSize + length); + UChar* resultStart = result.data() + oldSize; + const char* sourceStart = bytes; + const char* const sourceEnd = bytes + length; + for (;;) { + using namespace WTF::Unicode; + ConversionResult convRes = convertUTF8ToUTF16(&sourceStart + , sourceEnd + , &resultStart + , result.data() + result.size() + , true); + + // FIXME: is it possible? + if (convRes == targetExhausted && sourceStart < sourceEnd) { + oldSize = result.size(); + result.resize(oldSize + 256); + resultStart = result.data() + oldSize; + continue; + } + + if (convRes != conversionOK) + sawInvalidChar = true; + + break; + } + + *left = sourceEnd - sourceStart; + result.resize(resultStart - result.data()); + } else { + int testLength = length; + int untestedLength = length; + for (;;) { + int resultLength = MultiByteToWideChar(codePage, flags, bytes, testLength, 0, 0); + + if (resultLength > 0) { + int oldSize = result.size(); + result.resize(oldSize + resultLength); + + MultiByteToWideChar(codePage, flags, bytes, testLength, result.data() + oldSize, resultLength); + + if (testLength == untestedLength) { + *left = length - testLength; + break; + } + untestedLength -= testLength; + length -= testLength; + bytes += testLength; + } else { + untestedLength = testLength - 1; + if (!untestedLength) { + *left = length; + break; + } + } + testLength = (untestedLength + 1) / 2; + } + } +} + +String TextCodecWinCE::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError) +{ + if (!m_decodeBuffer.isEmpty()) { + m_decodeBuffer.append(bytes, length); + bytes = m_decodeBuffer.data(); + length = m_decodeBuffer.size(); + } + + size_t left; + Vector<UChar, 8192> result; + for (;;) { + bool sawInvalidChar = false; + WebCore::decode(result, m_encoding.name(), bytes, length, &left, m_decodeBuffer.isEmpty(), sawInvalidChar); + if (!left) + break; + + if (!sawInvalidChar && !flush && left < 16) + break; + + result.append(L'?'); + sawError = true; + if (stopOnError) + return String(result.data(), result.size()); + + + if (left == 1) + break; + + bytes += length - left + 1; + length = left - 1; + } + if (left && !flush) { + if (m_decodeBuffer.isEmpty()) + m_decodeBuffer.append(bytes + length - left, left); + else { + memmove(m_decodeBuffer.data(), bytes + length - left, left); + m_decodeBuffer.resize(left); + } + } else + m_decodeBuffer.clear(); + return String(result.data(), result.size()); +} + +CString TextCodecWinCE::encode(const UChar* characters, size_t length, UnencodableHandling) +{ + if (!characters || !length) + return CString(); + + UINT codePage = getCodePage(m_encoding.name()); + DWORD flags = codePage == CP_UTF8 ? 0 : WC_COMPOSITECHECK; + + int resultLength = WideCharToMultiByte(codePage, flags, characters, length, 0, 0, 0, 0); + + // FIXME: We need to implement UnencodableHandling: QuestionMarksForUnencodables, EntitiesForUnencodables, and URLEncodedEntitiesForUnencodables. + + if (resultLength <= 0) + return "?"; + + Vector<char> result(resultLength); + + WideCharToMultiByte(codePage, flags, characters, length, result.data(), resultLength, 0, 0); + + return CString(result.data(), result.size()); +} + +void TextCodecWinCE::enumerateSupportedEncodings(EncodingReceiver& receiver) +{ + languageManager(); + for (CharsetSet::iterator i = supportedCharsets().begin(); i != supportedCharsets().end(); ++i) { + HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(*i); + if (j != knownCharsets().end() && !receiver.receive(j->second.m_name.data(), j->second.m_friendlyName.charactersWithNullTermination(), j->second.m_codePage)) + break; + } +} + +} // namespace WebCore diff --git a/WebCore/platform/text/wince/TextCodecWinCE.h b/WebCore/platform/text/wince/TextCodecWinCE.h new file mode 100644 index 0000000..a870d3e --- /dev/null +++ b/WebCore/platform/text/wince/TextCodecWinCE.h @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2004, 2006, 2007 Apple Inc. All rights reserved. + * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> + * Copyright (C) 2007-2009 Torch Mobile, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef TextCodecWinCE_h +#define TextCodecWinCE_h + +#include "PlatformString.h" +#include "TextCodec.h" +#include "TextEncoding.h" +#include <wtf/Vector.h> + +namespace WebCore { + +class TextCodecWinCE : public TextCodec { +public: + static void registerBaseEncodingNames(EncodingNameRegistrar); + static void registerBaseCodecs(TextCodecRegistrar); + + static void registerExtendedEncodingNames(EncodingNameRegistrar); + static void registerExtendedCodecs(TextCodecRegistrar); + + TextCodecWinCE(const TextEncoding&); + virtual ~TextCodecWinCE(); + + virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError); + virtual CString encode(const UChar*, size_t length, UnencodableHandling); + + struct EncodingInfo { + String m_encoding; + String m_friendlyName; + }; + + struct EncodingReceiver { + // Return false to stop enumerating. + virtual bool receive(const char* encoding, const wchar_t* friendlyName, unsigned int codePage) = 0; + }; + + static void enumerateSupportedEncodings(EncodingReceiver& receiver); + +private: + TextEncoding m_encoding; + Vector<char> m_decodeBuffer; +}; + +} // namespace WebCore + +#endif // TextCodecWinCE_h |