diff options
Diffstat (limited to 'libs/utils/String8.cpp')
-rw-r--r-- | libs/utils/String8.cpp | 414 |
1 files changed, 45 insertions, 369 deletions
diff --git a/libs/utils/String8.cpp b/libs/utils/String8.cpp index 1c4f80c..e531a2a 100644 --- a/libs/utils/String8.cpp +++ b/libs/utils/String8.cpp @@ -17,6 +17,8 @@ #include <utils/String8.h> #include <utils/Log.h> +#include <utils/Unicode.h> +#include <utils/SharedBuffer.h> #include <utils/String16.h> #include <utils/TextOutput.h> #include <utils/threads.h> @@ -34,94 +36,10 @@ namespace android { -static const char32_t kByteMask = 0x000000BF; -static const char32_t kByteMark = 0x00000080; - -// Surrogates aren't valid for UTF-32 characters, so define some -// constants that will let us screen them out. -static const char32_t kUnicodeSurrogateHighStart = 0x0000D800; -static const char32_t kUnicodeSurrogateHighEnd = 0x0000DBFF; -static const char32_t kUnicodeSurrogateLowStart = 0x0000DC00; -static const char32_t kUnicodeSurrogateLowEnd = 0x0000DFFF; -static const char32_t kUnicodeSurrogateStart = kUnicodeSurrogateHighStart; -static const char32_t kUnicodeSurrogateEnd = kUnicodeSurrogateLowEnd; -static const char32_t kUnicodeMaxCodepoint = 0x0010FFFF; - -// Mask used to set appropriate bits in first byte of UTF-8 sequence, -// indexed by number of bytes in the sequence. -// 0xxxxxxx -// -> (00-7f) 7bit. Bit mask for the first byte is 0x00000000 -// 110yyyyx 10xxxxxx -// -> (c0-df)(80-bf) 11bit. Bit mask is 0x000000C0 -// 1110yyyy 10yxxxxx 10xxxxxx -// -> (e0-ef)(80-bf)(80-bf) 16bit. Bit mask is 0x000000E0 -// 11110yyy 10yyxxxx 10xxxxxx 10xxxxxx -// -> (f0-f7)(80-bf)(80-bf)(80-bf) 21bit. Bit mask is 0x000000F0 -static const char32_t kFirstByteMark[] = { - 0x00000000, 0x00000000, 0x000000C0, 0x000000E0, 0x000000F0 -}; - // Separator used by resource paths. This is not platform dependent contrary // to OS_PATH_SEPARATOR. #define RES_PATH_SEPARATOR '/' -// Return number of utf8 bytes required for the character. -static size_t utf32_to_utf8_bytes(char32_t srcChar) -{ - size_t bytesToWrite; - - // Figure out how many bytes the result will require. - if (srcChar < 0x00000080) - { - bytesToWrite = 1; - } - else if (srcChar < 0x00000800) - { - bytesToWrite = 2; - } - else if (srcChar < 0x00010000) - { - if ((srcChar < kUnicodeSurrogateStart) - || (srcChar > kUnicodeSurrogateEnd)) - { - bytesToWrite = 3; - } - else - { - // Surrogates are invalid UTF-32 characters. - return 0; - } - } - // Max code point for Unicode is 0x0010FFFF. - else if (srcChar <= kUnicodeMaxCodepoint) - { - bytesToWrite = 4; - } - else - { - // Invalid UTF-32 character. - return 0; - } - - return bytesToWrite; -} - -// Write out the source character to <dstP>. - -static void utf32_to_utf8(uint8_t* dstP, char32_t srcChar, size_t bytes) -{ - dstP += bytes; - switch (bytes) - { /* note: everything falls through. */ - case 4: *--dstP = (uint8_t)((srcChar | kByteMark) & kByteMask); srcChar >>= 6; - case 3: *--dstP = (uint8_t)((srcChar | kByteMark) & kByteMask); srcChar >>= 6; - case 2: *--dstP = (uint8_t)((srcChar | kByteMark) & kByteMask); srcChar >>= 6; - case 1: *--dstP = (uint8_t)(srcChar | kFirstByteMark[bytes]); - } -} - -// --------------------------------------------------------------------------- - static SharedBuffer* gEmptyStringBuf = NULL; static char* gEmptyString = NULL; @@ -175,62 +93,47 @@ static char* allocFromUTF8(const char* in, size_t len) return getEmptyString(); } -template<typename T, typename L> -static char* allocFromUTF16OrUTF32(const T* in, L len) +static char* allocFromUTF16(const char16_t* in, size_t len) { if (len == 0) return getEmptyString(); - size_t bytes = 0; - const T* end = in+len; - const T* p = in; - - while (p < end) { - bytes += utf32_to_utf8_bytes(*p); - p++; + const ssize_t bytes = utf16_to_utf8_length(in, len); + if (bytes < 0) { + return getEmptyString(); } SharedBuffer* buf = SharedBuffer::alloc(bytes+1); LOG_ASSERT(buf, "Unable to allocate shared buffer"); - if (buf) { - p = in; - char* str = (char*)buf->data(); - char* d = str; - while (p < end) { - const T c = *p++; - size_t len = utf32_to_utf8_bytes(c); - utf32_to_utf8((uint8_t*)d, c, len); - d += len; - } - *d = 0; - - return str; + if (!buf) { + return getEmptyString(); } - return getEmptyString(); + char* str = (char*)buf->data(); + utf16_to_utf8(in, len, str); + return str; } -static char* allocFromUTF16(const char16_t* in, size_t len) +static char* allocFromUTF32(const char32_t* in, size_t len) { - if (len == 0) return getEmptyString(); + if (len == 0) { + return getEmptyString(); + } - const size_t bytes = utf8_length_from_utf16(in, len); + const ssize_t bytes = utf32_to_utf8_length(in, len); + if (bytes < 0) { + return getEmptyString(); + } SharedBuffer* buf = SharedBuffer::alloc(bytes+1); LOG_ASSERT(buf, "Unable to allocate shared buffer"); - if (buf) { - char* str = (char*)buf->data(); - - utf16_to_utf8(in, len, str, bytes+1); - - return str; + if (!buf) { + return getEmptyString(); } - return getEmptyString(); -} + char* str = (char*) buf->data(); + utf32_to_utf8(in, len, str); -static char* allocFromUTF32(const char32_t* in, size_t len) -{ - return allocFromUTF16OrUTF32<char32_t, size_t>(in, len); + return str; } // --------------------------------------------------------------------------- @@ -292,6 +195,11 @@ String8::~String8() SharedBuffer::bufferFromData(mString)->release(); } +void String8::clear() { + SharedBuffer::bufferFromData(mString)->release(); + mString = getEmptyString(); +} + void String8::setTo(const String8& other) { SharedBuffer::bufferFromData(other.mString)->acquire(); @@ -374,22 +282,28 @@ status_t String8::append(const char* other, size_t otherLen) status_t String8::appendFormat(const char* fmt, ...) { - va_list ap; - va_start(ap, fmt); + va_list args; + va_start(args, fmt); + status_t result = appendFormatV(fmt, args); + + va_end(args); + return result; +} + +status_t String8::appendFormatV(const char* fmt, va_list args) +{ int result = NO_ERROR; - int n = vsnprintf(NULL, 0, fmt, ap); + int n = vsnprintf(NULL, 0, fmt, args); if (n != 0) { size_t oldLength = length(); char* buf = lockBuffer(oldLength + n); if (buf) { - vsnprintf(buf + oldLength, n + 1, fmt, ap); + vsnprintf(buf + oldLength, n + 1, fmt, args); } else { result = NO_MEMORY; } } - - va_end(ap); return result; } @@ -505,17 +419,17 @@ void String8::toUpper(size_t start, size_t length) size_t String8::getUtf32Length() const { - return utf32_length(mString, length()); + return utf8_to_utf32_length(mString, length()); } int32_t String8::getUtf32At(size_t index, size_t *next_index) const { - return utf32_at(mString, length(), index, next_index); + return utf32_from_utf8_at(mString, length(), index, next_index); } -size_t String8::getUtf32(char32_t* dst, size_t dst_len) const +void String8::getUtf32(char32_t* dst) const { - return utf8_to_utf32(mString, length(), dst, dst_len); + utf8_to_utf32(mString, length(), dst); } TextOutput& operator<<(TextOutput& to, const String8& val) @@ -700,241 +614,3 @@ String8& String8::convertToResPath() } }; // namespace android - -// --------------------------------------------------------------------------- - -size_t strlen32(const char32_t *s) -{ - const char32_t *ss = s; - while ( *ss ) - ss++; - return ss-s; -} - -size_t strnlen32(const char32_t *s, size_t maxlen) -{ - const char32_t *ss = s; - while ((maxlen > 0) && *ss) { - ss++; - maxlen--; - } - return ss-s; -} - -size_t utf8_length(const char *src) -{ - const char *cur = src; - size_t ret = 0; - while (*cur != '\0') { - const char first_char = *cur++; - if ((first_char & 0x80) == 0) { // ASCII - ret += 1; - continue; - } - // (UTF-8's character must not be like 10xxxxxx, - // but 110xxxxx, 1110xxxx, ... or 1111110x) - if ((first_char & 0x40) == 0) { - return 0; - } - - int32_t mask, to_ignore_mask; - size_t num_to_read = 0; - char32_t utf32 = 0; - for (num_to_read = 1, mask = 0x40, to_ignore_mask = 0x80; - num_to_read < 5 && (first_char & mask); - num_to_read++, to_ignore_mask |= mask, mask >>= 1) { - if ((*cur & 0xC0) != 0x80) { // must be 10xxxxxx - return 0; - } - // 0x3F == 00111111 - utf32 = (utf32 << 6) + (*cur++ & 0x3F); - } - // "first_char" must be (110xxxxx - 11110xxx) - if (num_to_read == 5) { - return 0; - } - to_ignore_mask |= mask; - utf32 |= ((~to_ignore_mask) & first_char) << (6 * (num_to_read - 1)); - if (utf32 > android::kUnicodeMaxCodepoint) { - return 0; - } - - ret += num_to_read; - } - return ret; -} - -size_t utf32_length(const char *src, size_t src_len) -{ - if (src == NULL || src_len == 0) { - return 0; - } - size_t ret = 0; - const char* cur; - const char* end; - size_t num_to_skip; - for (cur = src, end = src + src_len, num_to_skip = 1; - cur < end; - cur += num_to_skip, ret++) { - const char first_char = *cur; - num_to_skip = 1; - if ((first_char & 0x80) == 0) { // ASCII - continue; - } - int32_t mask; - - for (mask = 0x40; (first_char & mask); num_to_skip++, mask >>= 1) { - } - } - return ret; -} - -size_t utf8_length_from_utf32(const char32_t *src, size_t src_len) -{ - if (src == NULL || src_len == 0) { - return 0; - } - size_t ret = 0; - const char32_t *end = src + src_len; - while (src < end) { - ret += android::utf32_to_utf8_bytes(*src++); - } - return ret; -} - -size_t utf8_length_from_utf16(const char16_t *src, size_t src_len) -{ - if (src == NULL || src_len == 0) { - return 0; - } - size_t ret = 0; - const char16_t* const end = src + src_len; - while (src < end) { - if ((*src & 0xFC00) == 0xD800 && (src + 1) < end - && (*++src & 0xFC00) == 0xDC00) { - // surrogate pairs are always 4 bytes. - ret += 4; - src++; - } else { - ret += android::utf32_to_utf8_bytes((char32_t) *src++); - } - } - return ret; -} - -static int32_t utf32_at_internal(const char* cur, size_t *num_read) -{ - const char first_char = *cur; - if ((first_char & 0x80) == 0) { // ASCII - *num_read = 1; - return *cur; - } - cur++; - char32_t mask, to_ignore_mask; - size_t num_to_read = 0; - char32_t utf32 = first_char; - for (num_to_read = 1, mask = 0x40, to_ignore_mask = 0xFFFFFF80; - (first_char & mask); - num_to_read++, to_ignore_mask |= mask, mask >>= 1) { - // 0x3F == 00111111 - utf32 = (utf32 << 6) + (*cur++ & 0x3F); - } - to_ignore_mask |= mask; - utf32 &= ~(to_ignore_mask << (6 * (num_to_read - 1))); - - *num_read = num_to_read; - return static_cast<int32_t>(utf32); -} - -int32_t utf32_at(const char *src, size_t src_len, - size_t index, size_t *next_index) -{ - if (index >= src_len) { - return -1; - } - size_t dummy_index; - if (next_index == NULL) { - next_index = &dummy_index; - } - size_t num_read; - int32_t ret = utf32_at_internal(src + index, &num_read); - if (ret >= 0) { - *next_index = index + num_read; - } - - return ret; -} - -size_t utf8_to_utf32(const char* src, size_t src_len, - char32_t* dst, size_t dst_len) -{ - if (src == NULL || src_len == 0 || dst == NULL || dst_len == 0) { - return 0; - } - - const char* cur = src; - const char* end = src + src_len; - char32_t* cur_utf32 = dst; - const char32_t* end_utf32 = dst + dst_len; - while (cur_utf32 < end_utf32 && cur < end) { - size_t num_read; - *cur_utf32++ = - static_cast<char32_t>(utf32_at_internal(cur, &num_read)); - cur += num_read; - } - if (cur_utf32 < end_utf32) { - *cur_utf32 = 0; - } - return static_cast<size_t>(cur_utf32 - dst); -} - -size_t utf32_to_utf8(const char32_t* src, size_t src_len, - char* dst, size_t dst_len) -{ - if (src == NULL || src_len == 0 || dst == NULL || dst_len == 0) { - return 0; - } - const char32_t *cur_utf32 = src; - const char32_t *end_utf32 = src + src_len; - char *cur = dst; - const char *end = dst + dst_len; - while (cur_utf32 < end_utf32 && cur < end) { - size_t len = android::utf32_to_utf8_bytes(*cur_utf32); - android::utf32_to_utf8((uint8_t *)cur, *cur_utf32++, len); - cur += len; - } - if (cur < end) { - *cur = '\0'; - } - return cur - dst; -} - -size_t utf16_to_utf8(const char16_t* src, size_t src_len, - char* dst, size_t dst_len) -{ - if (src == NULL || src_len == 0 || dst == NULL || dst_len == 0) { - return 0; - } - const char16_t* cur_utf16 = src; - const char16_t* const end_utf16 = src + src_len; - char *cur = dst; - const char* const end = dst + dst_len; - while (cur_utf16 < end_utf16 && cur < end) { - char32_t utf32; - // surrogate pairs - if ((*cur_utf16 & 0xFC00) == 0xD800 && (cur_utf16 + 1) < end_utf16) { - utf32 = (*cur_utf16++ - 0xD800) << 10; - utf32 |= *cur_utf16++ - 0xDC00; - utf32 += 0x10000; - } else { - utf32 = (char32_t) *cur_utf16++; - } - size_t len = android::utf32_to_utf8_bytes(utf32); - android::utf32_to_utf8((uint8_t*)cur, utf32, len); - cur += len; - } - if (cur < end) { - *cur = '\0'; - } - return cur - dst; -} |