8 files changed, 98 insertions, 55 deletions
diff --git a/media/libmedia/CharacterEncodingDetector.h b/include/media/CharacterEncodingDetector.h
index 7b5ed86..deaa377 100644
--- a/media/libmedia/CharacterEncodingDetector.h
+++ b/include/media/CharacterEncodingDetector.h
@@ -43,7 +43,7 @@ class CharacterEncodingDetector {
         const UCharsetMatch *getPreferred(
                 const char *input, size_t len,
                 const UCharsetMatch** ucma, size_t matches,
-                bool *goodmatch);
+                bool *goodmatch, int *highestmatch);
 
         bool isFrequent(const uint16_t *values, uint32_t c);
 
diff --git a/media/libmedia/StringArray.h b/include/media/StringArray.h
index ae47085..ae47085 100644
--- a/media/libmedia/StringArray.h
+++ b/include/media/StringArray.h
diff --git a/include/media/mediascanner.h b/include/media/mediascanner.h
index 5213bdc..d555279 100644
--- a/include/media/mediascanner.h
+++ b/include/media/mediascanner.h
@@ -122,7 +122,6 @@ public:
 protected:
     // default encoding from MediaScanner::mLocale
     String8 mLocale;
-    CharacterEncodingDetector *mEncodingDetector;
 };
 
 }; // namespace android
diff --git a/media/libmedia/Android.mk b/media/libmedia/Android.mk
index 3be0651..ffadb23 100644
--- a/media/libmedia/Android.mk
+++ b/media/libmedia/Android.mk
@@ -76,9 +76,10 @@ LOCAL_MODULE:= libmedia
 
 LOCAL_C_INCLUDES := \
     $(TOP)/frameworks/native/include/media/openmax \
+    $(TOP)/frameworks/av/include/media/ \
     $(TOP)/frameworks/av/media/libstagefright \
-    external/icu/icu4c/source/common \
-    external/icu/icu4c/source/i18n \
+    $(TOP)/external/icu/icu4c/source/common \
+    $(TOP)/external/icu/icu4c/source/i18n \
     $(call include-path-for, audio-effects) \
     $(call include-path-for, audio-utils)
 
diff --git a/media/libmedia/CharacterEncodingDetector.cpp b/media/libmedia/CharacterEncodingDetector.cpp
index 7d1ddfd..41994dc 100644
--- a/media/libmedia/CharacterEncodingDetector.cpp
+++ b/media/libmedia/CharacterEncodingDetector.cpp
@@ -18,7 +18,7 @@
 #define LOG_TAG "CharacterEncodingDector"
 #include <utils/Log.h>
 
-#include "CharacterEncodingDetector.h"
+#include <CharacterEncodingDetector.h>
 #include "CharacterEncodingDetectorTables.h"
 
 #include "utils/Vector.h"
@@ -118,10 +118,12 @@ void CharacterEncodingDetector::detectAndConvert() {
             int32_t matches;
             const UCharsetMatch** ucma = ucsdet_detectAll(csd, &matches, &status);
             bool goodmatch = true;
+            int highest = 0;
             const UCharsetMatch* bestCombinedMatch = getPreferred(buf, strlen(buf),
-                    ucma, matches, &goodmatch);
+                    ucma, matches, &goodmatch, &highest);
 
-            if (!goodmatch && strlen(buf) < 20) {
+            ALOGV("goodmatch: %s, highest: %d", goodmatch ? "true" : "false", highest);
+            if (!goodmatch && (highest < 15 || strlen(buf) < 20)) {
                 ALOGV("not a good match, trying with more data");
                 // This string might be too short for ICU to do anything useful with.
                 // (real world example: "Björk" in ISO-8859-1 might be detected as GB18030, because
@@ -146,9 +148,10 @@ void CharacterEncodingDetector::detectAndConvert() {
                     ucsdet_setText(csd, buf, strlen(buf), &status);
                     ucma = ucsdet_detectAll(csd, &matches, &status);
                     bestCombinedMatch = getPreferred(buf, strlen(buf),
-                            ucma, matches, &goodmatch);
-                    if (!goodmatch) {
+                            ucma, matches, &goodmatch, &highest);
+                    if (!goodmatch && highest <= 15) {
                         ALOGV("still not a good match after adding printable tags");
+                        bestCombinedMatch = NULL;
                     }
                 } else {
                     ALOGV("no printable tags to add");
@@ -157,6 +160,8 @@ void CharacterEncodingDetector::detectAndConvert() {
 
             if (bestCombinedMatch != NULL) {
                 combinedenc = ucsdet_getName(bestCombinedMatch, &status);
+            } else {
+                combinedenc = "ISO-8859-1";
             }
         }
 
@@ -199,10 +204,17 @@ void CharacterEncodingDetector::detectAndConvert() {
             if (strcmp(enc,"UTF-8") != 0) {
                 // only convert if the source encoding isn't already UTF-8
                 ALOGV("@@@ using converter %s for %s", enc, mNames.getEntry(i));
+                status = U_ZERO_ERROR;
                 UConverter *conv = ucnv_open(enc, &status);
                 if (U_FAILURE(status)) {
-                    ALOGE("could not create UConverter for %s", enc);
-                    continue;
+                    ALOGW("could not create UConverter for %s (%d), falling back to ISO-8859-1",
+                            enc, status);
+                    status = U_ZERO_ERROR;
+                    conv = ucnv_open("ISO-8859-1", &status);
+                    if (U_FAILURE(status)) {
+                        ALOGW("could not create UConverter for ISO-8859-1 either");
+                        continue;
+                    }
                 }
 
                 // convert from native encoding to UTF-8
@@ -224,7 +236,16 @@ void CharacterEncodingDetector::detectAndConvert() {
                 } else {
                     // zero terminate
                     *target = 0;
-                    mValues.setEntry(i, buffer);
+                    // strip trailing spaces
+                    while (--target > buffer && *target == ' ') {
+                        *target = 0;
+                    }
+                    // skip leading spaces
+                    char *start = buffer;
+                    while (*start == ' ') {
+                        start++;
+                    }
+                    mValues.setEntry(i, start);
                 }
 
                 delete[] buffer;
@@ -261,7 +282,7 @@ void CharacterEncodingDetector::detectAndConvert() {
 const UCharsetMatch *CharacterEncodingDetector::getPreferred(
         const char *input, size_t len,
         const UCharsetMatch** ucma, size_t nummatches,
-        bool *goodmatch) {
+        bool *goodmatch, int *highestmatch) {
 
     *goodmatch = false;
     Vector<const UCharsetMatch*> matches;
@@ -316,11 +337,17 @@ const UCharsetMatch *CharacterEncodingDetector::getPreferred(
         }
 
         ALOGV("%zu: %s %d", i, encname, confidence);
+        status = U_ZERO_ERROR;
         UConverter *conv = ucnv_open(encname, &status);
+        int demerit = 0;
+        if (U_FAILURE(status)) {
+            ALOGV("failed to open %s: %d", encname, status);
+            confidence = 0;
+            demerit += 1000;
+        }
         const char *source = input;
         const char *sourceLimit = input + len;
         status = U_ZERO_ERROR;
-        int demerit = 0;
         int frequentchars = 0;
         int totalchars = 0;
         while (true) {
@@ -337,7 +364,8 @@ const UCharsetMatch *CharacterEncodingDetector::getPreferred(
             if (c < 0x20 || (c >= 0x7f && c <= 0x009f)) {
                 ALOGV("control character %x", c);
                 demerit += 100;
-            } else if ((c >= 0xa0 && c <= 0xbe)         // symbols, superscripts
+            } else if ((c == 0xa0)                      // no-break space
+                    || (c >= 0xa2 && c <= 0xbe)         // symbols, superscripts
                     || (c == 0xd7) || (c == 0xf7)       // multiplication and division signs
                     || (c >= 0x2000 && c <= 0x209f)) {  // punctuation, superscripts
                 ALOGV("unlikely character %x", c);
@@ -408,10 +436,14 @@ const UCharsetMatch *CharacterEncodingDetector::getPreferred(
     } else {
         ALOGV("runner up: '%s' w/ %d confidence",
                 ucsdet_getName(matches[runnerupidx], &status), runnerup);
+        if (runnerup < 0) {
+            runnerup = 0;
+        }
         if ((highest - runnerup) > 15) {
             *goodmatch = true;
         }
     }
+    *highestmatch = highest;
     return matches[highestidx];
 }
 
diff --git a/media/libmedia/MediaScannerClient.cpp b/media/libmedia/MediaScannerClient.cpp
index 1661f04..9f803cb 100644
--- a/media/libmedia/MediaScannerClient.cpp
+++ b/media/libmedia/MediaScannerClient.cpp
@@ -25,14 +25,10 @@
 
 namespace android {
 
-MediaScannerClient::MediaScannerClient()
-    :   mEncodingDetector(NULL)
-{
+MediaScannerClient::MediaScannerClient() {
 }
 
-MediaScannerClient::~MediaScannerClient()
-{
-    delete mEncodingDetector;
+MediaScannerClient::~MediaScannerClient() {
 }
 
 void MediaScannerClient::setLocale(const char* locale)
@@ -40,31 +36,16 @@ void MediaScannerClient::setLocale(const char* locale)
     mLocale = locale; // not currently used
 }
 
-void MediaScannerClient::beginFile()
-{
-    delete mEncodingDetector;
-    mEncodingDetector = new CharacterEncodingDetector();
+void MediaScannerClient::beginFile() {
 }
 
 status_t MediaScannerClient::addStringTag(const char* name, const char* value)
 {
-    mEncodingDetector->addTag(name, value);
+    handleStringTag(name, value);
     return OK;
 }
 
-void MediaScannerClient::endFile()
-{
-    mEncodingDetector->detectAndConvert();
-
-    int size = mEncodingDetector->size();
-    if (size) {
-        for (int i = 0; i < size; i++) {
-            const char *name;
-            const char *value;
-            mEncodingDetector->getTag(i, &name, &value);
-            handleStringTag(name, value);
-        }
-    }
+void MediaScannerClient::endFile() {
 }
 
 }  // namespace android
diff --git a/media/libstagefright/Android.mk b/media/libstagefright/Android.mk
index be9af5e..193f8a7 100644
--- a/media/libstagefright/Android.mk
+++ b/media/libstagefright/Android.mk
@@ -62,6 +62,7 @@ LOCAL_SRC_FILES:=                         \
         avc_utils.cpp                     \
 
 LOCAL_C_INCLUDES:= \
+        $(TOP)/frameworks/av/include/media/ \
         $(TOP)/frameworks/av/include/media/stagefright/timedtext \
         $(TOP)/frameworks/native/include/media/hardware \
         $(TOP)/frameworks/native/include/media/openmax \
@@ -70,6 +71,8 @@ LOCAL_C_INCLUDES:= \
         $(TOP)/external/openssl/include \
         $(TOP)/external/libvpx/libwebm \
         $(TOP)/system/netd/include \
+        $(TOP)/external/icu/icu4c/source/common \
+        $(TOP)/external/icu/icu4c/source/i18n \
 
 LOCAL_SHARED_LIBRARIES := \
         libbinder \
diff --git a/media/libstagefright/StagefrightMetadataRetriever.cpp b/media/libstagefright/StagefrightMetadataRetriever.cpp
index 8cc41e7..101fc8a 100644
--- a/media/libstagefright/StagefrightMetadataRetriever.cpp
+++ b/media/libstagefright/StagefrightMetadataRetriever.cpp
@@ -32,6 +32,7 @@
 #include <media/stagefright/MetaData.h>
 #include <media/stagefright/OMXCodec.h>
 #include <media/stagefright/MediaDefs.h>
+#include <CharacterEncodingDetector.h>
 
 namespace android {
 
@@ -450,32 +451,58 @@ void StagefrightMetadataRetriever::parseMetaData() {
     struct Map {
         int from;
         int to;
+        const char *name;
     };
     static const Map kMap[] = {
-        { kKeyMIMEType, METADATA_KEY_MIMETYPE },
-        { kKeyCDTrackNumber, METADATA_KEY_CD_TRACK_NUMBER },
-        { kKeyDiscNumber, METADATA_KEY_DISC_NUMBER },
-        { kKeyAlbum, METADATA_KEY_ALBUM },
-        { kKeyArtist, METADATA_KEY_ARTIST },
-        { kKeyAlbumArtist, METADATA_KEY_ALBUMARTIST },
-        { kKeyAuthor, METADATA_KEY_AUTHOR },
-        { kKeyComposer, METADATA_KEY_COMPOSER },
-        { kKeyDate, METADATA_KEY_DATE },
-        { kKeyGenre, METADATA_KEY_GENRE },
-        { kKeyTitle, METADATA_KEY_TITLE },
-        { kKeyYear, METADATA_KEY_YEAR },
-        { kKeyWriter, METADATA_KEY_WRITER },
-        { kKeyCompilation, METADATA_KEY_COMPILATION },
-        { kKeyLocation, METADATA_KEY_LOCATION },
+        { kKeyMIMEType, METADATA_KEY_MIMETYPE, NULL },
+        { kKeyCDTrackNumber, METADATA_KEY_CD_TRACK_NUMBER, "tracknumber" },
+        { kKeyDiscNumber, METADATA_KEY_DISC_NUMBER, "discnumber" },
+        { kKeyAlbum, METADATA_KEY_ALBUM, "album" },
+        { kKeyArtist, METADATA_KEY_ARTIST, "artist" },
+        { kKeyAlbumArtist, METADATA_KEY_ALBUMARTIST, "albumartist" },
+        { kKeyAuthor, METADATA_KEY_AUTHOR, NULL },
+        { kKeyComposer, METADATA_KEY_COMPOSER, "composer" },
+        { kKeyDate, METADATA_KEY_DATE, NULL },
+        { kKeyGenre, METADATA_KEY_GENRE, "genre" },
+        { kKeyTitle, METADATA_KEY_TITLE, "title" },
+        { kKeyYear, METADATA_KEY_YEAR, "year" },
+        { kKeyWriter, METADATA_KEY_WRITER, "writer" },
+        { kKeyCompilation, METADATA_KEY_COMPILATION, "compilation" },
+        { kKeyLocation, METADATA_KEY_LOCATION, NULL },
     };
+
     static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
 
+    CharacterEncodingDetector *detector = new CharacterEncodingDetector();
+
     for (size_t i = 0; i < kNumMapEntries; ++i) {
         const char *value;
         if (meta->findCString(kMap[i].from, &value)) {
-            mMetaData.add(kMap[i].to, String8(value));
+            if (kMap[i].name) {
+                // add to charset detector
+                detector->addTag(kMap[i].name, value);
+            } else {
+                // directly add to output list
+                mMetaData.add(kMap[i].to, String8(value));
+            }
+        }
+    }
+
+    detector->detectAndConvert();
+    int size = detector->size();
+    if (size) {
+        for (int i = 0; i < size; i++) {
+            const char *name;
+            const char *value;
+            detector->getTag(i, &name, &value);
+            for (size_t j = 0; j < kNumMapEntries; ++j) {
+                if (kMap[j].name && !strcmp(kMap[j].name, name)) {
+                    mMetaData.add(kMap[j].to, String8(value));
+                }
+            }
         }
     }
+    delete detector;
 
     const void *data;
     uint32_t type;