summaryrefslogtreecommitdiffstats
path: root/libs
diff options
context:
space:
mode:
Diffstat (limited to 'libs')
-rw-r--r--libs/utils/ResourceTypes.cpp104
-rw-r--r--libs/utils/String16.cpp58
-rw-r--r--libs/utils/String8.cpp67
3 files changed, 183 insertions, 46 deletions
diff --git a/libs/utils/ResourceTypes.cpp b/libs/utils/ResourceTypes.cpp
index 450af8d..afca814 100644
--- a/libs/utils/ResourceTypes.cpp
+++ b/libs/utils/ResourceTypes.cpp
@@ -229,12 +229,12 @@ Res_png_9patch* Res_png_9patch::deserialize(const void* inData)
// --------------------------------------------------------------------
ResStringPool::ResStringPool()
- : mError(NO_INIT), mOwnedData(NULL)
+ : mError(NO_INIT), mOwnedData(NULL), mHeader(NULL), mCache(NULL)
{
}
ResStringPool::ResStringPool(const void* data, size_t size, bool copyData)
- : mError(NO_INIT), mOwnedData(NULL)
+ : mError(NO_INIT), mOwnedData(NULL), mHeader(NULL), mCache(NULL)
{
setTo(data, size, copyData);
}
@@ -296,7 +296,17 @@ status_t ResStringPool::setTo(const void* data, size_t size, bool copyData)
(int)size);
return (mError=BAD_TYPE);
}
- mStrings = (const char16_t*)
+
+ size_t charSize;
+ if (mHeader->flags&ResStringPool_header::UTF8_FLAG) {
+ charSize = sizeof(uint8_t);
+ mCache = (char16_t**)malloc(sizeof(char16_t**)*mHeader->stringCount);
+ memset(mCache, 0, sizeof(char16_t**)*mHeader->stringCount);
+ } else {
+ charSize = sizeof(char16_t);
+ }
+
+ mStrings = (const void*)
(((const uint8_t*)data)+mHeader->stringsStart);
if (mHeader->stringsStart >= (mHeader->header.size-sizeof(uint16_t))) {
LOGW("Bad string block: string pool starts at %d, after total size %d\n",
@@ -305,7 +315,7 @@ status_t ResStringPool::setTo(const void* data, size_t size, bool copyData)
}
if (mHeader->styleCount == 0) {
mStringPoolSize =
- (mHeader->header.size-mHeader->stringsStart)/sizeof(uint16_t);
+ (mHeader->header.size-mHeader->stringsStart)/charSize;
} else {
// check invariant: styles follow the strings
if (mHeader->stylesStart <= mHeader->stringsStart) {
@@ -314,7 +324,7 @@ status_t ResStringPool::setTo(const void* data, size_t size, bool copyData)
return (mError=BAD_TYPE);
}
mStringPoolSize =
- (mHeader->stylesStart-mHeader->stringsStart)/sizeof(uint16_t);
+ (mHeader->stylesStart-mHeader->stringsStart)/charSize;
}
// check invariant: stringCount > 0 requires a string pool to exist
@@ -329,13 +339,19 @@ status_t ResStringPool::setTo(const void* data, size_t size, bool copyData)
for (i=0; i<mHeader->stringCount; i++) {
e[i] = dtohl(mEntries[i]);
}
- char16_t* s = const_cast<char16_t*>(mStrings);
- for (i=0; i<mStringPoolSize; i++) {
- s[i] = dtohs(mStrings[i]);
+ if (!(mHeader->flags&ResStringPool_header::UTF8_FLAG)) {
+ const char16_t* strings = (const char16_t*)mStrings;
+ char16_t* s = const_cast<char16_t*>(strings);
+ for (i=0; i<mStringPoolSize; i++) {
+ s[i] = dtohs(strings[i]);
+ }
}
}
- if (mStrings[mStringPoolSize-1] != 0) {
+ if ((mHeader->flags&ResStringPool_header::UTF8_FLAG &&
+ ((uint8_t*)mStrings)[mStringPoolSize-1] != 0) ||
+ (!mHeader->flags&ResStringPool_header::UTF8_FLAG &&
+ ((char16_t*)mStrings)[mStringPoolSize-1] != 0)) {
LOGW("Bad string block: last string is not 0-terminated\n");
return (mError=BAD_TYPE);
}
@@ -410,24 +426,67 @@ void ResStringPool::uninit()
free(mOwnedData);
mOwnedData = NULL;
}
+ if (mHeader != NULL && mCache != NULL) {
+ for (size_t x = 0; x < mHeader->stringCount; x++) {
+ if (mCache[x] != NULL) {
+ free(mCache[x]);
+ mCache[x] = NULL;
+ }
+ }
+ free(mCache);
+ mCache = NULL;
+ }
}
+#define DECODE_LENGTH(str, chrsz, len) \
+ len = *(str); \
+ if (*(str)&(1<<(chrsz*8-1))) { \
+ (str)++; \
+ len = (((len)&((1<<(chrsz*8-1))-1))<<(chrsz*8)) + *(str); \
+ } \
+ (str)++;
+
const uint16_t* ResStringPool::stringAt(size_t idx, size_t* outLen) const
{
if (mError == NO_ERROR && idx < mHeader->stringCount) {
- const uint32_t off = (mEntries[idx]/sizeof(uint16_t));
+ const bool isUTF8 = (mHeader->flags&ResStringPool_header::UTF8_FLAG) != 0;
+ const uint32_t off = mEntries[idx]/(isUTF8?sizeof(char):sizeof(char16_t));
if (off < (mStringPoolSize-1)) {
- const char16_t* str = mStrings+off;
- *outLen = *str;
- if ((*str)&0x8000) {
- str++;
- *outLen = (((*outLen)&0x7fff)<<16) + *str;
- }
- if ((uint32_t)(str+1+*outLen-mStrings) < mStringPoolSize) {
- return str+1;
+ if (!isUTF8) {
+ const char16_t* strings = (char16_t*)mStrings;
+ const char16_t* str = strings+off;
+ DECODE_LENGTH(str, sizeof(char16_t), *outLen)
+ if ((uint32_t)(str+*outLen-strings) < mStringPoolSize) {
+ return str;
+ } else {
+ LOGW("Bad string block: string #%d extends to %d, past end at %d\n",
+ (int)idx, (int)(str+*outLen-strings), (int)mStringPoolSize);
+ }
} else {
- LOGW("Bad string block: string #%d extends to %d, past end at %d\n",
- (int)idx, (int)(str+1+*outLen-mStrings), (int)mStringPoolSize);
+ const uint8_t* strings = (uint8_t*)mStrings;
+ const uint8_t* str = strings+off;
+ DECODE_LENGTH(str, sizeof(uint8_t), *outLen)
+ size_t encLen;
+ DECODE_LENGTH(str, sizeof(uint8_t), encLen)
+ if ((uint32_t)(str+encLen-strings) < mStringPoolSize) {
+ AutoMutex lock(mDecodeLock);
+ if (mCache[idx] != NULL) {
+ return mCache[idx];
+ }
+ char16_t *u16str = (char16_t *)calloc(*outLen+1, sizeof(char16_t));
+ if (!u16str) {
+ LOGW("No memory when trying to allocate decode cache for string #%d\n",
+ (int)idx);
+ return NULL;
+ }
+ const unsigned char *u8src = reinterpret_cast<const unsigned char *>(str);
+ utf8_to_utf16(u8src, encLen, u16str, *outLen);
+ mCache[idx] = u16str;
+ return u16str;
+ } else {
+ LOGW("Bad string block: string #%d extends to %d, past end at %d\n",
+ (int)idx, (int)(str+encLen-strings), (int)mStringPoolSize);
+ }
}
} else {
LOGW("Bad string block: string #%d entry is at %d, past end at %d\n",
@@ -466,6 +525,10 @@ ssize_t ResStringPool::indexOfString(const char16_t* str, size_t strLen) const
size_t len;
+ // TODO optimize searching for UTF-8 strings taking into account
+ // the cache fill to determine when to convert the searched-for
+ // string key to UTF-8.
+
if (mHeader->flags&ResStringPool_header::SORTED_FLAG) {
// Do a binary search for the string...
ssize_t l = 0;
@@ -1043,6 +1106,7 @@ status_t ResXMLTree::getError() const
void ResXMLTree::uninit()
{
mError = NO_INIT;
+ mStrings.uninit();
if (mOwnedData) {
free(mOwnedData);
mOwnedData = NULL;
diff --git a/libs/utils/String16.cpp b/libs/utils/String16.cpp
index aef67f2..eab7b2b 100644
--- a/libs/utils/String16.cpp
+++ b/libs/utils/String16.cpp
@@ -172,10 +172,6 @@ int strzcmp16_h_n(const char16_t *s1H, size_t n1, const char16_t *s2N, size_t n2
: 0);
}
-// ---------------------------------------------------------------------------
-
-namespace android {
-
static inline size_t
utf8_char_len(uint8_t ch)
{
@@ -215,8 +211,38 @@ utf8_to_utf32(const uint8_t *src, size_t length)
//printf("Char at %p: len=%d, utf-16=%p\n", src, length, (void*)result);
}
+void
+utf8_to_utf16(const uint8_t *src, size_t srcLen,
+ char16_t* dst, const size_t dstLen)
+{
+ const uint8_t* const end = src + srcLen;
+ const char16_t* const dstEnd = dst + dstLen;
+ while (src < end && dst < dstEnd) {
+ size_t len = utf8_char_len(*src);
+ uint32_t codepoint = utf8_to_utf32((const uint8_t*)src, len);
+
+ // Convert the UTF32 codepoint to one or more UTF16 codepoints
+ if (codepoint <= 0xFFFF) {
+ // Single UTF16 character
+ *dst++ = (char16_t) codepoint;
+ } else {
+ // Multiple UTF16 characters with surrogates
+ codepoint = codepoint - 0x10000;
+ *dst++ = (char16_t) ((codepoint >> 10) + 0xD800);
+ *dst++ = (char16_t) ((codepoint & 0x3FF) + 0xDC00);
+ }
+
+ src += len;
+ }
+ if (dst < dstEnd) {
+ *dst = 0;
+ }
+}
+
// ---------------------------------------------------------------------------
+namespace android {
+
static SharedBuffer* gEmptyStringBuf = NULL;
static char16_t* gEmptyString = NULL;
@@ -260,30 +286,14 @@ static char16_t* allocFromUTF8(const char* in, size_t len)
p += utf8len;
}
- SharedBuffer* buf = SharedBuffer::alloc((chars+1)*sizeof(char16_t));
+ size_t bufSize = (chars+1)*sizeof(char16_t);
+ SharedBuffer* buf = SharedBuffer::alloc(bufSize);
if (buf) {
p = in;
char16_t* str = (char16_t*)buf->data();
- char16_t* d = str;
- while (p < end) {
- size_t len = utf8_char_len(*p);
- uint32_t codepoint = utf8_to_utf32((const uint8_t*)p, len);
-
- // Convert the UTF32 codepoint to one or more UTF16 codepoints
- if (codepoint <= 0xFFFF) {
- // Single UTF16 character
- *d++ = (char16_t) codepoint;
- } else {
- // Multiple UTF16 characters with surrogates
- codepoint = codepoint - 0x10000;
- *d++ = (char16_t) ((codepoint >> 10) + 0xD800);
- *d++ = (char16_t) ((codepoint & 0x3FF) + 0xDC00);
- }
-
- p += len;
- }
- *d = 0;
+ utf8_to_utf16((const uint8_t*)p, len, str, bufSize);
+
//printf("Created UTF-16 string from UTF-8 \"%s\":", in);
//printHexData(1, str, buf->size(), 16, 1);
//printf("\n");
diff --git a/libs/utils/String8.cpp b/libs/utils/String8.cpp
index e908ec1..3a34838 100644
--- a/libs/utils/String8.cpp
+++ b/libs/utils/String8.cpp
@@ -208,10 +208,23 @@ static char* allocFromUTF16OrUTF32(const T* in, L len)
return getEmptyString();
}
-// Note: not dealing with expanding surrogate pairs.
static char* allocFromUTF16(const char16_t* in, size_t len)
{
- return allocFromUTF16OrUTF32<char16_t, size_t>(in, len);
+ if (len == 0) return getEmptyString();
+
+ const size_t bytes = utf8_length_from_utf16(in, len);
+
+ SharedBuffer* buf = SharedBuffer::alloc(bytes+1);
+ LOG_ASSERT(buf, "Unable to allocate shared buffer");
+ if (buf) {
+ char* str = (char*)buf->data();
+
+ utf16_to_utf8(in, len, str, bytes+1);
+
+ return str;
+ }
+
+ return getEmptyString();
}
static char* allocFromUTF32(const char32_t* in, size_t len)
@@ -762,6 +775,26 @@ size_t utf8_length_from_utf32(const char32_t *src, size_t src_len)
return ret;
}
+size_t utf8_length_from_utf16(const char16_t *src, size_t src_len)
+{
+ if (src == NULL || src_len == 0) {
+ return 0;
+ }
+ size_t ret = 0;
+ const char16_t* const end = src + src_len;
+ while (src < end) {
+ if ((*src & 0xFC00) == 0xD800 && (src + 1) < end
+ && (*++src & 0xFC00) == 0xDC00) {
+ // surrogate pairs are always 4 bytes.
+ ret += 4;
+ src++;
+ } else {
+ ret += android::utf32_to_utf8_bytes((char32_t) *src++);
+ }
+ }
+ return ret;
+}
+
static int32_t utf32_at_internal(const char* cur, size_t *num_read)
{
const char first_char = *cur;
@@ -848,3 +881,33 @@ size_t utf32_to_utf8(const char32_t* src, size_t src_len,
}
return cur - dst;
}
+
+size_t utf16_to_utf8(const char16_t* src, size_t src_len,
+ char* dst, size_t dst_len)
+{
+ if (src == NULL || src_len == 0 || dst == NULL || dst_len == 0) {
+ return 0;
+ }
+ const char16_t* cur_utf16 = src;
+ const char16_t* const end_utf16 = src + src_len;
+ char *cur = dst;
+ const char* const end = dst + dst_len;
+ while (cur_utf16 < end_utf16 && cur < end) {
+ char32_t utf32;
+ // surrogate pairs
+ if ((*cur_utf16 & 0xFC00) == 0xD800 && (cur_utf16 + 1) < end_utf16) {
+ utf32 = (*cur_utf16++ - 0xD800) << 10;
+ utf32 |= *cur_utf16++ - 0xDC00;
+ utf32 += 0x10000;
+ } else {
+ utf32 = (char32_t) *cur_utf16++;
+ }
+ size_t len = android::utf32_to_utf8_bytes(utf32);
+ android::utf32_to_utf8((uint8_t*)cur, utf32, len);
+ cur += len;
+ }
+ if (cur < end) {
+ *cur = '\0';
+ }
+ return cur - dst;
+}