diff options
Diffstat (limited to 'WebCore/platform/text/TextEncoding.cpp')
-rw-r--r-- | WebCore/platform/text/TextEncoding.cpp | 213 |
1 files changed, 0 insertions, 213 deletions
diff --git a/WebCore/platform/text/TextEncoding.cpp b/WebCore/platform/text/TextEncoding.cpp deleted file mode 100644 index 9026049..0000000 --- a/WebCore/platform/text/TextEncoding.cpp +++ /dev/null @@ -1,213 +0,0 @@ -/* - * Copyright (C) 2004, 2006, 2007, 2008 Apple Inc. All rights reserved. - * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "TextEncoding.h" - -#include "CString.h" -#include "PlatformString.h" -#include "TextCodec.h" -#include "TextDecoder.h" -#include "TextEncodingRegistry.h" -#if USE(ICU_UNICODE) -#include <unicode/unorm.h> -#elif USE(QT4_UNICODE) -#include <QString> -#endif -#include <wtf/HashSet.h> -#include <wtf/OwnPtr.h> - -namespace WebCore { - -static void addEncodingName(HashSet<const char*>& set, const char* name) -{ - const char* atomicName = atomicCanonicalTextEncodingName(name); - if (atomicName) - set.add(atomicName); -} - -TextEncoding::TextEncoding(const char* name) - : m_name(atomicCanonicalTextEncodingName(name)) -{ -} - -TextEncoding::TextEncoding(const String& name) - : m_name(atomicCanonicalTextEncodingName(name.characters(), name.length())) -{ -} - -String TextEncoding::decode(const char* data, size_t length, bool stopOnError, bool& sawError) const -{ - if (!m_name) - return String(); - - return TextDecoder(*this).decode(data, length, true, stopOnError, sawError); -} - -CString TextEncoding::encode(const UChar* characters, size_t length, UnencodableHandling handling) const -{ - if (!m_name) - return CString(); - - if (!length) - return ""; - -#if USE(ICU_UNICODE) - // FIXME: What's the right place to do normalization? - // It's a little strange to do it inside the encode function. - // Perhaps normalization should be an explicit step done before calling encode. - - const UChar* source = characters; - size_t sourceLength = length; - - Vector<UChar> normalizedCharacters; - - UErrorCode err = U_ZERO_ERROR; - if (unorm_quickCheck(source, sourceLength, UNORM_NFC, &err) != UNORM_YES) { - // First try using the length of the original string, since normalization to NFC rarely increases length. - normalizedCharacters.grow(sourceLength); - int32_t normalizedLength = unorm_normalize(source, length, UNORM_NFC, 0, normalizedCharacters.data(), length, &err); - if (err == U_BUFFER_OVERFLOW_ERROR) { - err = U_ZERO_ERROR; - normalizedCharacters.resize(normalizedLength); - normalizedLength = unorm_normalize(source, length, UNORM_NFC, 0, normalizedCharacters.data(), normalizedLength, &err); - } - ASSERT(U_SUCCESS(err)); - - source = normalizedCharacters.data(); - sourceLength = normalizedLength; - } - return newTextCodec(*this)->encode(source, sourceLength, handling); -#elif USE(QT4_UNICODE) - QString str(reinterpret_cast<const QChar*>(characters), length); - str = str.normalized(QString::NormalizationForm_C); - return newTextCodec(*this)->encode(reinterpret_cast<const UChar *>(str.utf16()), str.length(), handling); -#endif -} - -bool TextEncoding::usesVisualOrdering() const -{ - if (noExtendedTextEncodingNameUsed()) - return false; - - static const char* const a = atomicCanonicalTextEncodingName("ISO-8859-8"); - return m_name == a; -} - -bool TextEncoding::isJapanese() const -{ - if (noExtendedTextEncodingNameUsed()) - return false; - - static HashSet<const char*> set; - if (set.isEmpty()) { - addEncodingName(set, "x-mac-japanese"); - addEncodingName(set, "cp932"); - addEncodingName(set, "JIS_X0201"); - addEncodingName(set, "JIS_X0208-1983"); - addEncodingName(set, "JIS_X0208-1990"); - addEncodingName(set, "JIS_X0212-1990"); - addEncodingName(set, "JIS_C6226-1978"); - addEncodingName(set, "Shift_JIS_X0213-2000"); - addEncodingName(set, "ISO-2022-JP"); - addEncodingName(set, "ISO-2022-JP-2"); - addEncodingName(set, "ISO-2022-JP-1"); - addEncodingName(set, "ISO-2022-JP-3"); - addEncodingName(set, "EUC-JP"); - addEncodingName(set, "Shift_JIS"); - } - return m_name && set.contains(m_name); -} - -UChar TextEncoding::backslashAsCurrencySymbol() const -{ - if (noExtendedTextEncodingNameUsed()) - return '\\'; - - // The text encodings below treat backslash as a currency symbol. - // See http://blogs.msdn.com/michkap/archive/2005/09/17/469941.aspx for more information. - static const char* const a = atomicCanonicalTextEncodingName("Shift_JIS_X0213-2000"); - static const char* const b = atomicCanonicalTextEncodingName("EUC-JP"); - return (m_name == a || m_name == b) ? 0x00A5 : '\\'; -} - -const TextEncoding& TextEncoding::closest8BitEquivalent() const -{ - if (*this == UTF16BigEndianEncoding() || *this == UTF16LittleEndianEncoding()) - return UTF8Encoding(); - return *this; -} - -const TextEncoding& ASCIIEncoding() -{ - static TextEncoding globalASCIIEncoding("ASCII"); - return globalASCIIEncoding; -} - -const TextEncoding& Latin1Encoding() -{ - static TextEncoding globalLatin1Encoding("Latin-1"); - return globalLatin1Encoding; -} - -const TextEncoding& UTF16BigEndianEncoding() -{ - static TextEncoding globalUTF16BigEndianEncoding("UTF-16BE"); - return globalUTF16BigEndianEncoding; -} - -const TextEncoding& UTF16LittleEndianEncoding() -{ - static TextEncoding globalUTF16LittleEndianEncoding("UTF-16LE"); - return globalUTF16LittleEndianEncoding; -} - -const TextEncoding& UTF32BigEndianEncoding() -{ - static TextEncoding globalUTF32BigEndianEncoding("UTF-32BE"); - return globalUTF32BigEndianEncoding; -} - -const TextEncoding& UTF32LittleEndianEncoding() -{ - static TextEncoding globalUTF32LittleEndianEncoding("UTF-32LE"); - return globalUTF32LittleEndianEncoding; -} - - -const TextEncoding& UTF8Encoding() -{ - static TextEncoding globalUTF8Encoding("UTF-8"); - return globalUTF8Encoding; -} - -const TextEncoding& WindowsLatin1Encoding() -{ - static TextEncoding globalWindowsLatin1Encoding("WinLatin-1"); - return globalWindowsLatin1Encoding; -} - -} // namespace WebCore |