3 files changed, 183 insertions, 46 deletions
diff --git a/libs/utils/ResourceTypes.cpp b/libs/utils/ResourceTypes.cpp
index 450af8d..afca814 100644
--- a/libs/utils/ResourceTypes.cpp
+++ b/libs/utils/ResourceTypes.cpp
@@ -229,12 +229,12 @@ Res_png_9patch* Res_png_9patch::deserialize(const void* inData)
 // --------------------------------------------------------------------
 
 ResStringPool::ResStringPool()
-    : mError(NO_INIT), mOwnedData(NULL)
+    : mError(NO_INIT), mOwnedData(NULL), mHeader(NULL), mCache(NULL)
 {
 }
 
 ResStringPool::ResStringPool(const void* data, size_t size, bool copyData)
-    : mError(NO_INIT), mOwnedData(NULL)
+    : mError(NO_INIT), mOwnedData(NULL), mHeader(NULL), mCache(NULL)
 {
     setTo(data, size, copyData);
 }
@@ -296,7 +296,17 @@ status_t ResStringPool::setTo(const void* data, size_t size, bool copyData)
                     (int)size);
             return (mError=BAD_TYPE);
         }
-        mStrings = (const char16_t*)
+
+        size_t charSize;
+        if (mHeader->flags&ResStringPool_header::UTF8_FLAG) {
+            charSize = sizeof(uint8_t);
+            mCache = (char16_t**)malloc(sizeof(char16_t**)*mHeader->stringCount);
+            memset(mCache, 0, sizeof(char16_t**)*mHeader->stringCount);
+        } else {
+            charSize = sizeof(char16_t);
+        }
+
+        mStrings = (const void*)
             (((const uint8_t*)data)+mHeader->stringsStart);
         if (mHeader->stringsStart >= (mHeader->header.size-sizeof(uint16_t))) {
             LOGW("Bad string block: string pool starts at %d, after total size %d\n",
@@ -305,7 +315,7 @@ status_t ResStringPool::setTo(const void* data, size_t size, bool copyData)
         }
         if (mHeader->styleCount == 0) {
             mStringPoolSize =
-                (mHeader->header.size-mHeader->stringsStart)/sizeof(uint16_t);
+                (mHeader->header.size-mHeader->stringsStart)/charSize;
         } else {
             // check invariant: styles follow the strings
             if (mHeader->stylesStart <= mHeader->stringsStart) {
@@ -314,7 +324,7 @@ status_t ResStringPool::setTo(const void* data, size_t size, bool copyData)
                 return (mError=BAD_TYPE);
             }
             mStringPoolSize =
-                (mHeader->stylesStart-mHeader->stringsStart)/sizeof(uint16_t);
+                (mHeader->stylesStart-mHeader->stringsStart)/charSize;
         }
 
         // check invariant: stringCount > 0 requires a string pool to exist
@@ -329,13 +339,19 @@ status_t ResStringPool::setTo(const void* data, size_t size, bool copyData)
             for (i=0; i<mHeader->stringCount; i++) {
                 e[i] = dtohl(mEntries[i]);
             }
-            char16_t* s = const_cast<char16_t*>(mStrings);
-            for (i=0; i<mStringPoolSize; i++) {
-                s[i] = dtohs(mStrings[i]);
+            if (!(mHeader->flags&ResStringPool_header::UTF8_FLAG)) {
+                const char16_t* strings = (const char16_t*)mStrings;
+                char16_t* s = const_cast<char16_t*>(strings);
+                for (i=0; i<mStringPoolSize; i++) {
+                    s[i] = dtohs(strings[i]);
+                }
             }
         }
 
-        if (mStrings[mStringPoolSize-1] != 0) {
+        if ((mHeader->flags&ResStringPool_header::UTF8_FLAG &&
+                ((uint8_t*)mStrings)[mStringPoolSize-1] != 0) ||
+                (!mHeader->flags&ResStringPool_header::UTF8_FLAG &&
+                ((char16_t*)mStrings)[mStringPoolSize-1] != 0)) {
             LOGW("Bad string block: last string is not 0-terminated\n");
             return (mError=BAD_TYPE);
         }
@@ -410,24 +426,67 @@ void ResStringPool::uninit()
         free(mOwnedData);
         mOwnedData = NULL;
     }
+    if (mHeader != NULL && mCache != NULL) {
+        for (size_t x = 0; x < mHeader->stringCount; x++) {
+            if (mCache[x] != NULL) {
+                free(mCache[x]);
+                mCache[x] = NULL;
+            }
+        }
+        free(mCache);
+        mCache = NULL;
+    }
 }
 
+#define DECODE_LENGTH(str, chrsz, len) \
+    len = *(str); \
+    if (*(str)&(1<<(chrsz*8-1))) { \
+        (str)++; \
+        len = (((len)&((1<<(chrsz*8-1))-1))<<(chrsz*8)) + *(str); \
+    } \
+    (str)++;
+
 const uint16_t* ResStringPool::stringAt(size_t idx, size_t* outLen) const
 {
     if (mError == NO_ERROR && idx < mHeader->stringCount) {
-        const uint32_t off = (mEntries[idx]/sizeof(uint16_t));
+        const bool isUTF8 = (mHeader->flags&ResStringPool_header::UTF8_FLAG) != 0;
+        const uint32_t off = mEntries[idx]/(isUTF8?sizeof(char):sizeof(char16_t));
         if (off < (mStringPoolSize-1)) {
-            const char16_t* str = mStrings+off;
-            *outLen = *str;
-            if ((*str)&0x8000) {
-                str++;
-                *outLen = (((*outLen)&0x7fff)<<16) + *str;
-            }
-            if ((uint32_t)(str+1+*outLen-mStrings) < mStringPoolSize) {
-                return str+1;
+            if (!isUTF8) {
+                const char16_t* strings = (char16_t*)mStrings;
+                const char16_t* str = strings+off;
+                DECODE_LENGTH(str, sizeof(char16_t), *outLen)
+                if ((uint32_t)(str+*outLen-strings) < mStringPoolSize) {
+                    return str;
+                } else {
+                    LOGW("Bad string block: string #%d extends to %d, past end at %d\n",
+                            (int)idx, (int)(str+*outLen-strings), (int)mStringPoolSize);
+                }
             } else {
-                LOGW("Bad string block: string #%d extends to %d, past end at %d\n",
-                        (int)idx, (int)(str+1+*outLen-mStrings), (int)mStringPoolSize);
+                const uint8_t* strings = (uint8_t*)mStrings;
+                const uint8_t* str = strings+off;
+                DECODE_LENGTH(str, sizeof(uint8_t), *outLen)
+                size_t encLen;
+                DECODE_LENGTH(str, sizeof(uint8_t), encLen)
+                if ((uint32_t)(str+encLen-strings) < mStringPoolSize) {
+                    AutoMutex lock(mDecodeLock);
+                    if (mCache[idx] != NULL) {
+                        return mCache[idx];
+                    }
+                    char16_t *u16str = (char16_t *)calloc(*outLen+1, sizeof(char16_t));
+                    if (!u16str) {
+                        LOGW("No memory when trying to allocate decode cache for string #%d\n",
+                                (int)idx);
+                        return NULL;
+                    }
+                    const unsigned char *u8src = reinterpret_cast<const unsigned char *>(str);
+                    utf8_to_utf16(u8src, encLen, u16str, *outLen);
+                    mCache[idx] = u16str;
+                    return u16str;
+                } else {
+                    LOGW("Bad string block: string #%d extends to %d, past end at %d\n",
+                            (int)idx, (int)(str+encLen-strings), (int)mStringPoolSize);
+                }
             }
         } else {
             LOGW("Bad string block: string #%d entry is at %d, past end at %d\n",
@@ -466,6 +525,10 @@ ssize_t ResStringPool::indexOfString(const char16_t* str, size_t strLen) const
 
     size_t len;
 
+    // TODO optimize searching for UTF-8 strings taking into account
+    // the cache fill to determine when to convert the searched-for
+    // string key to UTF-8.
+
     if (mHeader->flags&ResStringPool_header::SORTED_FLAG) {
         // Do a binary search for the string...
         ssize_t l = 0;
@@ -1043,6 +1106,7 @@ status_t ResXMLTree::getError() const
 void ResXMLTree::uninit()
 {
     mError = NO_INIT;
+    mStrings.uninit();
     if (mOwnedData) {
         free(mOwnedData);
         mOwnedData = NULL;
diff --git a/libs/utils/String16.cpp b/libs/utils/String16.cpp
index aef67f2..eab7b2b 100644
--- a/libs/utils/String16.cpp
+++ b/libs/utils/String16.cpp
@@ -172,10 +172,6 @@ int strzcmp16_h_n(const char16_t *s1H, size_t n1, const char16_t *s2N, size_t n2
            : 0);
 }
 
-// ---------------------------------------------------------------------------
-
-namespace android {
-
 static inline size_t
 utf8_char_len(uint8_t ch)
 {
@@ -215,8 +211,38 @@ utf8_to_utf32(const uint8_t *src, size_t length)
     //printf("Char at %p: len=%d, utf-16=%p\n", src, length, (void*)result);
 }
 
+void
+utf8_to_utf16(const uint8_t *src, size_t srcLen,
+        char16_t* dst, const size_t dstLen)
+{
+    const uint8_t* const end = src + srcLen;
+    const char16_t* const dstEnd = dst + dstLen;
+    while (src < end && dst < dstEnd) {
+        size_t len = utf8_char_len(*src);
+        uint32_t codepoint = utf8_to_utf32((const uint8_t*)src, len);
+
+        // Convert the UTF32 codepoint to one or more UTF16 codepoints
+        if (codepoint <= 0xFFFF) {
+            // Single UTF16 character
+            *dst++ = (char16_t) codepoint;
+        } else {
+            // Multiple UTF16 characters with surrogates
+            codepoint = codepoint - 0x10000;
+            *dst++ = (char16_t) ((codepoint >> 10) + 0xD800);
+            *dst++ = (char16_t) ((codepoint & 0x3FF) + 0xDC00);
+        }
+
+        src += len;
+    }
+    if (dst < dstEnd) {
+        *dst = 0;
+    }
+}
+
 // ---------------------------------------------------------------------------
 
+namespace android {
+
 static SharedBuffer* gEmptyStringBuf = NULL;
 static char16_t* gEmptyString = NULL;
 
@@ -260,30 +286,14 @@ static char16_t* allocFromUTF8(const char* in, size_t len)
         p += utf8len;
     }
     
-    SharedBuffer* buf = SharedBuffer::alloc((chars+1)*sizeof(char16_t));
+    size_t bufSize = (chars+1)*sizeof(char16_t);
+    SharedBuffer* buf = SharedBuffer::alloc(bufSize);
     if (buf) {
         p = in;
         char16_t* str = (char16_t*)buf->data();
-        char16_t* d = str;
-        while (p < end) {
-            size_t len = utf8_char_len(*p);
-            uint32_t codepoint = utf8_to_utf32((const uint8_t*)p, len);
-
-            // Convert the UTF32 codepoint to one or more UTF16 codepoints
-            if (codepoint <= 0xFFFF) {
-                // Single UTF16 character
-                *d++ = (char16_t) codepoint;
-            } else {
-                // Multiple UTF16 characters with surrogates
-                codepoint = codepoint - 0x10000;
-                *d++ = (char16_t) ((codepoint >> 10) + 0xD800);
-                *d++ = (char16_t) ((codepoint & 0x3FF) + 0xDC00);
-            }
-
-            p += len;
-        }
-        *d = 0;
         
+        utf8_to_utf16((const uint8_t*)p, len, str, bufSize);
+
         //printf("Created UTF-16 string from UTF-8 \"%s\":", in);
         //printHexData(1, str, buf->size(), 16, 1);
         //printf("\n");
diff --git a/libs/utils/String8.cpp b/libs/utils/String8.cpp
index e908ec1..3a34838 100644
--- a/libs/utils/String8.cpp
+++ b/libs/utils/String8.cpp
@@ -208,10 +208,23 @@ static char* allocFromUTF16OrUTF32(const T* in, L len)
     return getEmptyString();
 }
 
-// Note: not dealing with expanding surrogate pairs.
 static char* allocFromUTF16(const char16_t* in, size_t len)
 {
-    return allocFromUTF16OrUTF32<char16_t, size_t>(in, len);
+    if (len == 0) return getEmptyString();
+
+    const size_t bytes = utf8_length_from_utf16(in, len);
+
+    SharedBuffer* buf = SharedBuffer::alloc(bytes+1);
+    LOG_ASSERT(buf, "Unable to allocate shared buffer");
+    if (buf) {
+        char* str = (char*)buf->data();
+
+        utf16_to_utf8(in, len, str, bytes+1);
+
+        return str;
+    }
+
+    return getEmptyString();
 }
 
 static char* allocFromUTF32(const char32_t* in, size_t len)
@@ -762,6 +775,26 @@ size_t utf8_length_from_utf32(const char32_t *src, size_t src_len)
     return ret;
 }
 
+size_t utf8_length_from_utf16(const char16_t *src, size_t src_len)
+{
+    if (src == NULL || src_len == 0) {
+        return 0;
+    }
+    size_t ret = 0;
+    const char16_t* const end = src + src_len;
+    while (src < end) {
+        if ((*src & 0xFC00) == 0xD800 && (src + 1) < end
+                && (*++src & 0xFC00) == 0xDC00) {
+            // surrogate pairs are always 4 bytes.
+            ret += 4;
+            src++;
+        } else {
+            ret += android::utf32_to_utf8_bytes((char32_t) *src++);
+        }
+    }
+    return ret;
+}
+
 static int32_t utf32_at_internal(const char* cur, size_t *num_read)
 {
     const char first_char = *cur;
@@ -848,3 +881,33 @@ size_t utf32_to_utf8(const char32_t* src, size_t src_len,
     }
     return cur - dst;
 }
+
+size_t utf16_to_utf8(const char16_t* src, size_t src_len,
+                     char* dst, size_t dst_len)
+{
+    if (src == NULL || src_len == 0 || dst == NULL || dst_len == 0) {
+        return 0;
+    }
+    const char16_t* cur_utf16 = src;
+    const char16_t* const end_utf16 = src + src_len;
+    char *cur = dst;
+    const char* const end = dst + dst_len;
+    while (cur_utf16 < end_utf16 && cur < end) {
+        char32_t utf32;
+        // surrogate pairs
+        if ((*cur_utf16 & 0xFC00) == 0xD800 && (cur_utf16 + 1) < end_utf16) {
+            utf32 = (*cur_utf16++ - 0xD800) << 10;
+            utf32 |= *cur_utf16++ - 0xDC00;
+            utf32 += 0x10000;
+        } else {
+            utf32 = (char32_t) *cur_utf16++;
+        }
+        size_t len = android::utf32_to_utf8_bytes(utf32);
+        android::utf32_to_utf8((uint8_t*)cur, utf32, len);
+        cur += len;
+    }
+    if (cur < end) {
+        *cur = '\0';
+    }
+    return cur - dst;
+}