diff options
Diffstat (limited to 'WebCore/platform/text/TextDecoder.cpp')
-rw-r--r-- | WebCore/platform/text/TextDecoder.cpp | 129 |
1 files changed, 129 insertions, 0 deletions
diff --git a/WebCore/platform/text/TextDecoder.cpp b/WebCore/platform/text/TextDecoder.cpp new file mode 100644 index 0000000..e39a6b7 --- /dev/null +++ b/WebCore/platform/text/TextDecoder.cpp @@ -0,0 +1,129 @@ +/* + * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "TextDecoder.h" + +#include "TextEncodingRegistry.h" + +// FIXME: Would be nice to also handle BOM for UTF-7 and UTF-32. + +namespace WebCore { + +TextDecoder::TextDecoder(const TextEncoding& encoding) + : m_encoding(encoding) + , m_checkedForBOM(false) + , m_numBufferedBytes(0) +{ +} + +void TextDecoder::reset(const TextEncoding& encoding) +{ + m_encoding = encoding; + m_codec.clear(); + m_checkedForBOM = false; + m_numBufferedBytes = 0; +} + +String TextDecoder::checkForBOM(const char* data, size_t length, bool flush, bool stopOnError, bool& sawError) +{ + ASSERT(!m_checkedForBOM); + + // Check to see if we found a BOM. + size_t numBufferedBytes = m_numBufferedBytes; + size_t buf1Len = numBufferedBytes; + size_t buf2Len = length; + const unsigned char* buf1 = m_bufferedBytes; + const unsigned char* buf2 = reinterpret_cast<const unsigned char*>(data); + unsigned char c1 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0; + unsigned char c2 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0; + unsigned char c3 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0; + unsigned char c4 = buf2Len ? (--buf2Len, *buf2++) : 0; + + const TextEncoding* encodingConsideringBOM = &m_encoding; + bool foundBOM = true; + size_t lengthOfBOM = 0; + if (c1 == 0xFF && c2 == 0xFE) { + if (c3 != 0 || c4 != 0) { + encodingConsideringBOM = &UTF16LittleEndianEncoding(); + lengthOfBOM = 2; + } else if (numBufferedBytes + length > sizeof(m_bufferedBytes)) { + encodingConsideringBOM = &UTF32LittleEndianEncoding(); + lengthOfBOM = 4; + } else + foundBOM = false; + } else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) { + encodingConsideringBOM = &UTF8Encoding(); + lengthOfBOM = 3; + } else if (c1 == 0xFE && c2 == 0xFF) { + encodingConsideringBOM = &UTF16BigEndianEncoding(); + lengthOfBOM = 2; + } else if (c1 == 0 && c2 == 0 && c3 == 0xFE && c4 == 0xFF) { + encodingConsideringBOM = &UTF32BigEndianEncoding(); + lengthOfBOM = 4; + } else + foundBOM = false; + + if (!foundBOM && numBufferedBytes + length <= sizeof(m_bufferedBytes) && !flush) { + // Continue to look for the BOM. + memcpy(&m_bufferedBytes[numBufferedBytes], data, length); + m_numBufferedBytes += length; + return ""; + } + + // Done checking for BOM. + m_codec.set(newTextCodec(*encodingConsideringBOM).release()); + if (!m_codec) + return String(); + m_checkedForBOM = true; + + // Skip the BOM. + if (foundBOM) { + ASSERT(numBufferedBytes < lengthOfBOM); + size_t numUnbufferedBOMBytes = lengthOfBOM - numBufferedBytes; + ASSERT(numUnbufferedBOMBytes <= length); + + data += numUnbufferedBOMBytes; + length -= numUnbufferedBOMBytes; + numBufferedBytes = 0; + m_numBufferedBytes = 0; + } + + // Handle case where we have some buffered bytes to deal with. + if (numBufferedBytes) { + char bufferedBytes[sizeof(m_bufferedBytes)]; + memcpy(bufferedBytes, m_bufferedBytes, numBufferedBytes); + m_numBufferedBytes = 0; + + String bufferedResult = m_codec->decode(bufferedBytes, numBufferedBytes, false, stopOnError, sawError); + if (stopOnError && sawError) + return bufferedResult; + return bufferedResult + m_codec->decode(data, length, flush, stopOnError, sawError); + } + + return m_codec->decode(data, length, flush, stopOnError, sawError); +} + +} // namespace WebCore |