diff options
Diffstat (limited to 'libs')
-rw-r--r-- | libs/utils/ResourceTypes.cpp | 104 | ||||
-rw-r--r-- | libs/utils/String16.cpp | 58 | ||||
-rw-r--r-- | libs/utils/String8.cpp | 67 |
3 files changed, 183 insertions, 46 deletions
diff --git a/libs/utils/ResourceTypes.cpp b/libs/utils/ResourceTypes.cpp index 450af8d..afca814 100644 --- a/libs/utils/ResourceTypes.cpp +++ b/libs/utils/ResourceTypes.cpp @@ -229,12 +229,12 @@ Res_png_9patch* Res_png_9patch::deserialize(const void* inData) // -------------------------------------------------------------------- ResStringPool::ResStringPool() - : mError(NO_INIT), mOwnedData(NULL) + : mError(NO_INIT), mOwnedData(NULL), mHeader(NULL), mCache(NULL) { } ResStringPool::ResStringPool(const void* data, size_t size, bool copyData) - : mError(NO_INIT), mOwnedData(NULL) + : mError(NO_INIT), mOwnedData(NULL), mHeader(NULL), mCache(NULL) { setTo(data, size, copyData); } @@ -296,7 +296,17 @@ status_t ResStringPool::setTo(const void* data, size_t size, bool copyData) (int)size); return (mError=BAD_TYPE); } - mStrings = (const char16_t*) + + size_t charSize; + if (mHeader->flags&ResStringPool_header::UTF8_FLAG) { + charSize = sizeof(uint8_t); + mCache = (char16_t**)malloc(sizeof(char16_t**)*mHeader->stringCount); + memset(mCache, 0, sizeof(char16_t**)*mHeader->stringCount); + } else { + charSize = sizeof(char16_t); + } + + mStrings = (const void*) (((const uint8_t*)data)+mHeader->stringsStart); if (mHeader->stringsStart >= (mHeader->header.size-sizeof(uint16_t))) { LOGW("Bad string block: string pool starts at %d, after total size %d\n", @@ -305,7 +315,7 @@ status_t ResStringPool::setTo(const void* data, size_t size, bool copyData) } if (mHeader->styleCount == 0) { mStringPoolSize = - (mHeader->header.size-mHeader->stringsStart)/sizeof(uint16_t); + (mHeader->header.size-mHeader->stringsStart)/charSize; } else { // check invariant: styles follow the strings if (mHeader->stylesStart <= mHeader->stringsStart) { @@ -314,7 +324,7 @@ status_t ResStringPool::setTo(const void* data, size_t size, bool copyData) return (mError=BAD_TYPE); } mStringPoolSize = - (mHeader->stylesStart-mHeader->stringsStart)/sizeof(uint16_t); + (mHeader->stylesStart-mHeader->stringsStart)/charSize; } // check invariant: stringCount > 0 requires a string pool to exist @@ -329,13 +339,19 @@ status_t ResStringPool::setTo(const void* data, size_t size, bool copyData) for (i=0; i<mHeader->stringCount; i++) { e[i] = dtohl(mEntries[i]); } - char16_t* s = const_cast<char16_t*>(mStrings); - for (i=0; i<mStringPoolSize; i++) { - s[i] = dtohs(mStrings[i]); + if (!(mHeader->flags&ResStringPool_header::UTF8_FLAG)) { + const char16_t* strings = (const char16_t*)mStrings; + char16_t* s = const_cast<char16_t*>(strings); + for (i=0; i<mStringPoolSize; i++) { + s[i] = dtohs(strings[i]); + } } } - if (mStrings[mStringPoolSize-1] != 0) { + if ((mHeader->flags&ResStringPool_header::UTF8_FLAG && + ((uint8_t*)mStrings)[mStringPoolSize-1] != 0) || + (!mHeader->flags&ResStringPool_header::UTF8_FLAG && + ((char16_t*)mStrings)[mStringPoolSize-1] != 0)) { LOGW("Bad string block: last string is not 0-terminated\n"); return (mError=BAD_TYPE); } @@ -410,24 +426,67 @@ void ResStringPool::uninit() free(mOwnedData); mOwnedData = NULL; } + if (mHeader != NULL && mCache != NULL) { + for (size_t x = 0; x < mHeader->stringCount; x++) { + if (mCache[x] != NULL) { + free(mCache[x]); + mCache[x] = NULL; + } + } + free(mCache); + mCache = NULL; + } } +#define DECODE_LENGTH(str, chrsz, len) \ + len = *(str); \ + if (*(str)&(1<<(chrsz*8-1))) { \ + (str)++; \ + len = (((len)&((1<<(chrsz*8-1))-1))<<(chrsz*8)) + *(str); \ + } \ + (str)++; + const uint16_t* ResStringPool::stringAt(size_t idx, size_t* outLen) const { if (mError == NO_ERROR && idx < mHeader->stringCount) { - const uint32_t off = (mEntries[idx]/sizeof(uint16_t)); + const bool isUTF8 = (mHeader->flags&ResStringPool_header::UTF8_FLAG) != 0; + const uint32_t off = mEntries[idx]/(isUTF8?sizeof(char):sizeof(char16_t)); if (off < (mStringPoolSize-1)) { - const char16_t* str = mStrings+off; - *outLen = *str; - if ((*str)&0x8000) { - str++; - *outLen = (((*outLen)&0x7fff)<<16) + *str; - } - if ((uint32_t)(str+1+*outLen-mStrings) < mStringPoolSize) { - return str+1; + if (!isUTF8) { + const char16_t* strings = (char16_t*)mStrings; + const char16_t* str = strings+off; + DECODE_LENGTH(str, sizeof(char16_t), *outLen) + if ((uint32_t)(str+*outLen-strings) < mStringPoolSize) { + return str; + } else { + LOGW("Bad string block: string #%d extends to %d, past end at %d\n", + (int)idx, (int)(str+*outLen-strings), (int)mStringPoolSize); + } } else { - LOGW("Bad string block: string #%d extends to %d, past end at %d\n", - (int)idx, (int)(str+1+*outLen-mStrings), (int)mStringPoolSize); + const uint8_t* strings = (uint8_t*)mStrings; + const uint8_t* str = strings+off; + DECODE_LENGTH(str, sizeof(uint8_t), *outLen) + size_t encLen; + DECODE_LENGTH(str, sizeof(uint8_t), encLen) + if ((uint32_t)(str+encLen-strings) < mStringPoolSize) { + AutoMutex lock(mDecodeLock); + if (mCache[idx] != NULL) { + return mCache[idx]; + } + char16_t *u16str = (char16_t *)calloc(*outLen+1, sizeof(char16_t)); + if (!u16str) { + LOGW("No memory when trying to allocate decode cache for string #%d\n", + (int)idx); + return NULL; + } + const unsigned char *u8src = reinterpret_cast<const unsigned char *>(str); + utf8_to_utf16(u8src, encLen, u16str, *outLen); + mCache[idx] = u16str; + return u16str; + } else { + LOGW("Bad string block: string #%d extends to %d, past end at %d\n", + (int)idx, (int)(str+encLen-strings), (int)mStringPoolSize); + } } } else { LOGW("Bad string block: string #%d entry is at %d, past end at %d\n", @@ -466,6 +525,10 @@ ssize_t ResStringPool::indexOfString(const char16_t* str, size_t strLen) const size_t len; + // TODO optimize searching for UTF-8 strings taking into account + // the cache fill to determine when to convert the searched-for + // string key to UTF-8. + if (mHeader->flags&ResStringPool_header::SORTED_FLAG) { // Do a binary search for the string... ssize_t l = 0; @@ -1043,6 +1106,7 @@ status_t ResXMLTree::getError() const void ResXMLTree::uninit() { mError = NO_INIT; + mStrings.uninit(); if (mOwnedData) { free(mOwnedData); mOwnedData = NULL; diff --git a/libs/utils/String16.cpp b/libs/utils/String16.cpp index aef67f2..eab7b2b 100644 --- a/libs/utils/String16.cpp +++ b/libs/utils/String16.cpp @@ -172,10 +172,6 @@ int strzcmp16_h_n(const char16_t *s1H, size_t n1, const char16_t *s2N, size_t n2 : 0); } -// --------------------------------------------------------------------------- - -namespace android { - static inline size_t utf8_char_len(uint8_t ch) { @@ -215,8 +211,38 @@ utf8_to_utf32(const uint8_t *src, size_t length) //printf("Char at %p: len=%d, utf-16=%p\n", src, length, (void*)result); } +void +utf8_to_utf16(const uint8_t *src, size_t srcLen, + char16_t* dst, const size_t dstLen) +{ + const uint8_t* const end = src + srcLen; + const char16_t* const dstEnd = dst + dstLen; + while (src < end && dst < dstEnd) { + size_t len = utf8_char_len(*src); + uint32_t codepoint = utf8_to_utf32((const uint8_t*)src, len); + + // Convert the UTF32 codepoint to one or more UTF16 codepoints + if (codepoint <= 0xFFFF) { + // Single UTF16 character + *dst++ = (char16_t) codepoint; + } else { + // Multiple UTF16 characters with surrogates + codepoint = codepoint - 0x10000; + *dst++ = (char16_t) ((codepoint >> 10) + 0xD800); + *dst++ = (char16_t) ((codepoint & 0x3FF) + 0xDC00); + } + + src += len; + } + if (dst < dstEnd) { + *dst = 0; + } +} + // --------------------------------------------------------------------------- +namespace android { + static SharedBuffer* gEmptyStringBuf = NULL; static char16_t* gEmptyString = NULL; @@ -260,30 +286,14 @@ static char16_t* allocFromUTF8(const char* in, size_t len) p += utf8len; } - SharedBuffer* buf = SharedBuffer::alloc((chars+1)*sizeof(char16_t)); + size_t bufSize = (chars+1)*sizeof(char16_t); + SharedBuffer* buf = SharedBuffer::alloc(bufSize); if (buf) { p = in; char16_t* str = (char16_t*)buf->data(); - char16_t* d = str; - while (p < end) { - size_t len = utf8_char_len(*p); - uint32_t codepoint = utf8_to_utf32((const uint8_t*)p, len); - - // Convert the UTF32 codepoint to one or more UTF16 codepoints - if (codepoint <= 0xFFFF) { - // Single UTF16 character - *d++ = (char16_t) codepoint; - } else { - // Multiple UTF16 characters with surrogates - codepoint = codepoint - 0x10000; - *d++ = (char16_t) ((codepoint >> 10) + 0xD800); - *d++ = (char16_t) ((codepoint & 0x3FF) + 0xDC00); - } - - p += len; - } - *d = 0; + utf8_to_utf16((const uint8_t*)p, len, str, bufSize); + //printf("Created UTF-16 string from UTF-8 \"%s\":", in); //printHexData(1, str, buf->size(), 16, 1); //printf("\n"); diff --git a/libs/utils/String8.cpp b/libs/utils/String8.cpp index e908ec1..3a34838 100644 --- a/libs/utils/String8.cpp +++ b/libs/utils/String8.cpp @@ -208,10 +208,23 @@ static char* allocFromUTF16OrUTF32(const T* in, L len) return getEmptyString(); } -// Note: not dealing with expanding surrogate pairs. static char* allocFromUTF16(const char16_t* in, size_t len) { - return allocFromUTF16OrUTF32<char16_t, size_t>(in, len); + if (len == 0) return getEmptyString(); + + const size_t bytes = utf8_length_from_utf16(in, len); + + SharedBuffer* buf = SharedBuffer::alloc(bytes+1); + LOG_ASSERT(buf, "Unable to allocate shared buffer"); + if (buf) { + char* str = (char*)buf->data(); + + utf16_to_utf8(in, len, str, bytes+1); + + return str; + } + + return getEmptyString(); } static char* allocFromUTF32(const char32_t* in, size_t len) @@ -762,6 +775,26 @@ size_t utf8_length_from_utf32(const char32_t *src, size_t src_len) return ret; } +size_t utf8_length_from_utf16(const char16_t *src, size_t src_len) +{ + if (src == NULL || src_len == 0) { + return 0; + } + size_t ret = 0; + const char16_t* const end = src + src_len; + while (src < end) { + if ((*src & 0xFC00) == 0xD800 && (src + 1) < end + && (*++src & 0xFC00) == 0xDC00) { + // surrogate pairs are always 4 bytes. + ret += 4; + src++; + } else { + ret += android::utf32_to_utf8_bytes((char32_t) *src++); + } + } + return ret; +} + static int32_t utf32_at_internal(const char* cur, size_t *num_read) { const char first_char = *cur; @@ -848,3 +881,33 @@ size_t utf32_to_utf8(const char32_t* src, size_t src_len, } return cur - dst; } + +size_t utf16_to_utf8(const char16_t* src, size_t src_len, + char* dst, size_t dst_len) +{ + if (src == NULL || src_len == 0 || dst == NULL || dst_len == 0) { + return 0; + } + const char16_t* cur_utf16 = src; + const char16_t* const end_utf16 = src + src_len; + char *cur = dst; + const char* const end = dst + dst_len; + while (cur_utf16 < end_utf16 && cur < end) { + char32_t utf32; + // surrogate pairs + if ((*cur_utf16 & 0xFC00) == 0xD800 && (cur_utf16 + 1) < end_utf16) { + utf32 = (*cur_utf16++ - 0xD800) << 10; + utf32 |= *cur_utf16++ - 0xDC00; + utf32 += 0x10000; + } else { + utf32 = (char32_t) *cur_utf16++; + } + size_t len = android::utf32_to_utf8_bytes(utf32); + android::utf32_to_utf8((uint8_t*)cur, utf32, len); + cur += len; + } + if (cur < end) { + *cur = '\0'; + } + return cur - dst; +} |