summaryrefslogtreecommitdiffstats
path: root/JavaScriptCore/wtf/text/WTFString.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'JavaScriptCore/wtf/text/WTFString.cpp')
-rw-r--r--JavaScriptCore/wtf/text/WTFString.cpp97
1 files changed, 70 insertions, 27 deletions
diff --git a/JavaScriptCore/wtf/text/WTFString.cpp b/JavaScriptCore/wtf/text/WTFString.cpp
index 6c4de6e..7d44d21 100644
--- a/JavaScriptCore/wtf/text/WTFString.cpp
+++ b/JavaScriptCore/wtf/text/WTFString.cpp
@@ -36,6 +36,13 @@ namespace WTF {
using namespace Unicode;
+// Construct a string with UTF-16 data.
+String::String(const UChar* characters, unsigned length)
+ : m_impl(characters ? StringImpl::create(characters, length) : 0)
+{
+}
+
+// Construct a string with UTF-16 data, from a null-terminated source.
String::String(const UChar* str)
{
if (!str)
@@ -48,6 +55,18 @@ String::String(const UChar* str)
m_impl = StringImpl::create(str, len);
}
+// Construct a string with latin1 data.
+String::String(const char* characters, unsigned length)
+ : m_impl(characters ? StringImpl::create(characters, length) : 0)
+{
+}
+
+// Construct a string with latin1 data, from a null-terminated source.
+String::String(const char* characters)
+ : m_impl(characters ? StringImpl::create(characters) : 0)
+{
+}
+
void String::append(const String& str)
{
if (str.isEmpty())
@@ -226,6 +245,19 @@ String String::substring(unsigned pos, unsigned len) const
return m_impl->substring(pos, len);
}
+String String::substringSharingImpl(unsigned offset, unsigned length) const
+{
+ // FIXME: We used to check against a limit of Heap::minExtraCost / sizeof(UChar).
+
+ unsigned stringLength = this->length();
+ offset = min(offset, stringLength);
+ length = min(length, stringLength - offset);
+
+ if (!offset && length == stringLength)
+ return *this;
+ return String(StringImpl::create(m_impl, offset, length));
+}
+
String String::lower() const
{
if (!m_impl)
@@ -557,14 +589,14 @@ void String::split(const String& separator, bool allowEmptyEntries, Vector<Strin
{
result.clear();
- int startPos = 0;
- int endPos;
- while ((endPos = find(separator, startPos)) != -1) {
+ unsigned startPos = 0;
+ size_t endPos;
+ while ((endPos = find(separator, startPos)) != notFound) {
if (allowEmptyEntries || startPos != endPos)
result.append(substring(startPos, endPos - startPos));
startPos = endPos + separator.length();
}
- if (allowEmptyEntries || startPos != static_cast<int>(length()))
+ if (allowEmptyEntries || startPos != length())
result.append(substring(startPos));
}
@@ -577,14 +609,14 @@ void String::split(UChar separator, bool allowEmptyEntries, Vector<String>& resu
{
result.clear();
- int startPos = 0;
- int endPos;
- while ((endPos = find(separator, startPos)) != -1) {
+ unsigned startPos = 0;
+ size_t endPos;
+ while ((endPos = find(separator, startPos)) != notFound) {
if (allowEmptyEntries || startPos != endPos)
result.append(substring(startPos, endPos - startPos));
startPos = endPos + 1;
}
- if (allowEmptyEntries || startPos != static_cast<int>(length()))
+ if (allowEmptyEntries || startPos != length())
result.append(substring(startPos));
}
@@ -593,18 +625,23 @@ void String::split(UChar separator, Vector<String>& result) const
return split(String(&separator, 1), false, result);
}
-Vector<char> String::ascii() const
+CString String::ascii() const
{
- if (m_impl)
- return m_impl->ascii();
-
- const char* nullMsg = "(null impl)";
- Vector<char, 2048> buffer;
- for (int i = 0; nullMsg[i]; ++i)
- buffer.append(nullMsg[i]);
-
- buffer.append('\0');
- return buffer;
+ // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
+ // preserved, characters outside of this range are converted to '?'.
+
+ unsigned length = this->length();
+ const UChar* characters = this->characters();
+
+ char* characterBuffer;
+ CString result = CString::newUninitialized(length, characterBuffer);
+
+ for (unsigned i = 0; i < length; ++i) {
+ UChar ch = characters[i];
+ characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch;
+ }
+
+ return result;
}
CString String::latin1() const
@@ -620,7 +657,7 @@ CString String::latin1() const
for (unsigned i = 0; i < length; ++i) {
UChar ch = characters[i];
- characterBuffer[i] = ch > 255 ? '?' : ch;
+ characterBuffer[i] = ch > 0xff ? '?' : ch;
}
return result;
@@ -635,7 +672,7 @@ static inline void putUTF8Triple(char*& buffer, UChar ch)
*buffer++ = static_cast<char>((ch & 0x3F) | 0x80);
}
-CString String::utf8() const
+CString String::utf8(bool strict) const
{
unsigned length = this->length();
const UChar* characters = this->characters();
@@ -653,15 +690,21 @@ CString String::utf8() const
Vector<char, 1024> bufferVector(length * 3);
char* buffer = bufferVector.data();
- ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), false);
- ASSERT(result != sourceIllegal); // Only produced from strict conversion.
+ ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), strict);
ASSERT(result != targetExhausted); // (length * 3) should be sufficient for any conversion
- // If a high surrogate is left unconverted, treat it the same was as an unpaired high surrogate
- // would have been handled in the middle of a string with non-strict conversion - which is to say,
- // simply encode it to UTF-8.
+ // Only produced from strict conversion.
+ if (result == sourceIllegal)
+ return CString();
+
+ // Check for an unconverted high surrogate.
if (result == sourceExhausted) {
- // This should be one unpaired high surrogate.
+ if (strict)
+ return CString();
+ // This should be one unpaired high surrogate. Treat it the same
+ // was as an unpaired high surrogate would have been handled in
+ // the middle of a string with non-strict conversion - which is
+ // to say, simply encode it to UTF-8.
ASSERT((characters + 1) == (this->characters() + length));
ASSERT((*characters >= 0xD800) && (*characters <= 0xDBFF));
// There should be room left, since one UChar hasn't been converted.