summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/media/CharacterEncodingDetector.h (renamed from media/libmedia/CharacterEncodingDetector.h)2
-rw-r--r--include/media/StringArray.h (renamed from media/libmedia/StringArray.h)0
-rw-r--r--include/media/mediascanner.h1
-rw-r--r--media/libmedia/Android.mk5
-rw-r--r--media/libmedia/CharacterEncodingDetector.cpp54
-rw-r--r--media/libmedia/MediaScannerClient.cpp29
-rw-r--r--media/libstagefright/Android.mk3
-rw-r--r--media/libstagefright/StagefrightMetadataRetriever.cpp59
8 files changed, 98 insertions, 55 deletions
diff --git a/media/libmedia/CharacterEncodingDetector.h b/include/media/CharacterEncodingDetector.h
index 7b5ed86..deaa377 100644
--- a/media/libmedia/CharacterEncodingDetector.h
+++ b/include/media/CharacterEncodingDetector.h
@@ -43,7 +43,7 @@ class CharacterEncodingDetector {
const UCharsetMatch *getPreferred(
const char *input, size_t len,
const UCharsetMatch** ucma, size_t matches,
- bool *goodmatch);
+ bool *goodmatch, int *highestmatch);
bool isFrequent(const uint16_t *values, uint32_t c);
diff --git a/media/libmedia/StringArray.h b/include/media/StringArray.h
index ae47085..ae47085 100644
--- a/media/libmedia/StringArray.h
+++ b/include/media/StringArray.h
diff --git a/include/media/mediascanner.h b/include/media/mediascanner.h
index 5213bdc..d555279 100644
--- a/include/media/mediascanner.h
+++ b/include/media/mediascanner.h
@@ -122,7 +122,6 @@ public:
protected:
// default encoding from MediaScanner::mLocale
String8 mLocale;
- CharacterEncodingDetector *mEncodingDetector;
};
}; // namespace android
diff --git a/media/libmedia/Android.mk b/media/libmedia/Android.mk
index 3be0651..ffadb23 100644
--- a/media/libmedia/Android.mk
+++ b/media/libmedia/Android.mk
@@ -76,9 +76,10 @@ LOCAL_MODULE:= libmedia
LOCAL_C_INCLUDES := \
$(TOP)/frameworks/native/include/media/openmax \
+ $(TOP)/frameworks/av/include/media/ \
$(TOP)/frameworks/av/media/libstagefright \
- external/icu/icu4c/source/common \
- external/icu/icu4c/source/i18n \
+ $(TOP)/external/icu/icu4c/source/common \
+ $(TOP)/external/icu/icu4c/source/i18n \
$(call include-path-for, audio-effects) \
$(call include-path-for, audio-utils)
diff --git a/media/libmedia/CharacterEncodingDetector.cpp b/media/libmedia/CharacterEncodingDetector.cpp
index 7d1ddfd..41994dc 100644
--- a/media/libmedia/CharacterEncodingDetector.cpp
+++ b/media/libmedia/CharacterEncodingDetector.cpp
@@ -18,7 +18,7 @@
#define LOG_TAG "CharacterEncodingDector"
#include <utils/Log.h>
-#include "CharacterEncodingDetector.h"
+#include <CharacterEncodingDetector.h>
#include "CharacterEncodingDetectorTables.h"
#include "utils/Vector.h"
@@ -118,10 +118,12 @@ void CharacterEncodingDetector::detectAndConvert() {
int32_t matches;
const UCharsetMatch** ucma = ucsdet_detectAll(csd, &matches, &status);
bool goodmatch = true;
+ int highest = 0;
const UCharsetMatch* bestCombinedMatch = getPreferred(buf, strlen(buf),
- ucma, matches, &goodmatch);
+ ucma, matches, &goodmatch, &highest);
- if (!goodmatch && strlen(buf) < 20) {
+ ALOGV("goodmatch: %s, highest: %d", goodmatch ? "true" : "false", highest);
+ if (!goodmatch && (highest < 15 || strlen(buf) < 20)) {
ALOGV("not a good match, trying with more data");
// This string might be too short for ICU to do anything useful with.
// (real world example: "Björk" in ISO-8859-1 might be detected as GB18030, because
@@ -146,9 +148,10 @@ void CharacterEncodingDetector::detectAndConvert() {
ucsdet_setText(csd, buf, strlen(buf), &status);
ucma = ucsdet_detectAll(csd, &matches, &status);
bestCombinedMatch = getPreferred(buf, strlen(buf),
- ucma, matches, &goodmatch);
- if (!goodmatch) {
+ ucma, matches, &goodmatch, &highest);
+ if (!goodmatch && highest <= 15) {
ALOGV("still not a good match after adding printable tags");
+ bestCombinedMatch = NULL;
}
} else {
ALOGV("no printable tags to add");
@@ -157,6 +160,8 @@ void CharacterEncodingDetector::detectAndConvert() {
if (bestCombinedMatch != NULL) {
combinedenc = ucsdet_getName(bestCombinedMatch, &status);
+ } else {
+ combinedenc = "ISO-8859-1";
}
}
@@ -199,10 +204,17 @@ void CharacterEncodingDetector::detectAndConvert() {
if (strcmp(enc,"UTF-8") != 0) {
// only convert if the source encoding isn't already UTF-8
ALOGV("@@@ using converter %s for %s", enc, mNames.getEntry(i));
+ status = U_ZERO_ERROR;
UConverter *conv = ucnv_open(enc, &status);
if (U_FAILURE(status)) {
- ALOGE("could not create UConverter for %s", enc);
- continue;
+ ALOGW("could not create UConverter for %s (%d), falling back to ISO-8859-1",
+ enc, status);
+ status = U_ZERO_ERROR;
+ conv = ucnv_open("ISO-8859-1", &status);
+ if (U_FAILURE(status)) {
+ ALOGW("could not create UConverter for ISO-8859-1 either");
+ continue;
+ }
}
// convert from native encoding to UTF-8
@@ -224,7 +236,16 @@ void CharacterEncodingDetector::detectAndConvert() {
} else {
// zero terminate
*target = 0;
- mValues.setEntry(i, buffer);
+ // strip trailing spaces
+ while (--target > buffer && *target == ' ') {
+ *target = 0;
+ }
+ // skip leading spaces
+ char *start = buffer;
+ while (*start == ' ') {
+ start++;
+ }
+ mValues.setEntry(i, start);
}
delete[] buffer;
@@ -261,7 +282,7 @@ void CharacterEncodingDetector::detectAndConvert() {
const UCharsetMatch *CharacterEncodingDetector::getPreferred(
const char *input, size_t len,
const UCharsetMatch** ucma, size_t nummatches,
- bool *goodmatch) {
+ bool *goodmatch, int *highestmatch) {
*goodmatch = false;
Vector<const UCharsetMatch*> matches;
@@ -316,11 +337,17 @@ const UCharsetMatch *CharacterEncodingDetector::getPreferred(
}
ALOGV("%zu: %s %d", i, encname, confidence);
+ status = U_ZERO_ERROR;
UConverter *conv = ucnv_open(encname, &status);
+ int demerit = 0;
+ if (U_FAILURE(status)) {
+ ALOGV("failed to open %s: %d", encname, status);
+ confidence = 0;
+ demerit += 1000;
+ }
const char *source = input;
const char *sourceLimit = input + len;
status = U_ZERO_ERROR;
- int demerit = 0;
int frequentchars = 0;
int totalchars = 0;
while (true) {
@@ -337,7 +364,8 @@ const UCharsetMatch *CharacterEncodingDetector::getPreferred(
if (c < 0x20 || (c >= 0x7f && c <= 0x009f)) {
ALOGV("control character %x", c);
demerit += 100;
- } else if ((c >= 0xa0 && c <= 0xbe) // symbols, superscripts
+ } else if ((c == 0xa0) // no-break space
+ || (c >= 0xa2 && c <= 0xbe) // symbols, superscripts
|| (c == 0xd7) || (c == 0xf7) // multiplication and division signs
|| (c >= 0x2000 && c <= 0x209f)) { // punctuation, superscripts
ALOGV("unlikely character %x", c);
@@ -408,10 +436,14 @@ const UCharsetMatch *CharacterEncodingDetector::getPreferred(
} else {
ALOGV("runner up: '%s' w/ %d confidence",
ucsdet_getName(matches[runnerupidx], &status), runnerup);
+ if (runnerup < 0) {
+ runnerup = 0;
+ }
if ((highest - runnerup) > 15) {
*goodmatch = true;
}
}
+ *highestmatch = highest;
return matches[highestidx];
}
diff --git a/media/libmedia/MediaScannerClient.cpp b/media/libmedia/MediaScannerClient.cpp
index 1661f04..9f803cb 100644
--- a/media/libmedia/MediaScannerClient.cpp
+++ b/media/libmedia/MediaScannerClient.cpp
@@ -25,14 +25,10 @@
namespace android {
-MediaScannerClient::MediaScannerClient()
- : mEncodingDetector(NULL)
-{
+MediaScannerClient::MediaScannerClient() {
}
-MediaScannerClient::~MediaScannerClient()
-{
- delete mEncodingDetector;
+MediaScannerClient::~MediaScannerClient() {
}
void MediaScannerClient::setLocale(const char* locale)
@@ -40,31 +36,16 @@ void MediaScannerClient::setLocale(const char* locale)
mLocale = locale; // not currently used
}
-void MediaScannerClient::beginFile()
-{
- delete mEncodingDetector;
- mEncodingDetector = new CharacterEncodingDetector();
+void MediaScannerClient::beginFile() {
}
status_t MediaScannerClient::addStringTag(const char* name, const char* value)
{
- mEncodingDetector->addTag(name, value);
+ handleStringTag(name, value);
return OK;
}
-void MediaScannerClient::endFile()
-{
- mEncodingDetector->detectAndConvert();
-
- int size = mEncodingDetector->size();
- if (size) {
- for (int i = 0; i < size; i++) {
- const char *name;
- const char *value;
- mEncodingDetector->getTag(i, &name, &value);
- handleStringTag(name, value);
- }
- }
+void MediaScannerClient::endFile() {
}
} // namespace android
diff --git a/media/libstagefright/Android.mk b/media/libstagefright/Android.mk
index be9af5e..193f8a7 100644
--- a/media/libstagefright/Android.mk
+++ b/media/libstagefright/Android.mk
@@ -62,6 +62,7 @@ LOCAL_SRC_FILES:= \
avc_utils.cpp \
LOCAL_C_INCLUDES:= \
+ $(TOP)/frameworks/av/include/media/ \
$(TOP)/frameworks/av/include/media/stagefright/timedtext \
$(TOP)/frameworks/native/include/media/hardware \
$(TOP)/frameworks/native/include/media/openmax \
@@ -70,6 +71,8 @@ LOCAL_C_INCLUDES:= \
$(TOP)/external/openssl/include \
$(TOP)/external/libvpx/libwebm \
$(TOP)/system/netd/include \
+ $(TOP)/external/icu/icu4c/source/common \
+ $(TOP)/external/icu/icu4c/source/i18n \
LOCAL_SHARED_LIBRARIES := \
libbinder \
diff --git a/media/libstagefright/StagefrightMetadataRetriever.cpp b/media/libstagefright/StagefrightMetadataRetriever.cpp
index 8cc41e7..101fc8a 100644
--- a/media/libstagefright/StagefrightMetadataRetriever.cpp
+++ b/media/libstagefright/StagefrightMetadataRetriever.cpp
@@ -32,6 +32,7 @@
#include <media/stagefright/MetaData.h>
#include <media/stagefright/OMXCodec.h>
#include <media/stagefright/MediaDefs.h>
+#include <CharacterEncodingDetector.h>
namespace android {
@@ -450,32 +451,58 @@ void StagefrightMetadataRetriever::parseMetaData() {
struct Map {
int from;
int to;
+ const char *name;
};
static const Map kMap[] = {
- { kKeyMIMEType, METADATA_KEY_MIMETYPE },
- { kKeyCDTrackNumber, METADATA_KEY_CD_TRACK_NUMBER },
- { kKeyDiscNumber, METADATA_KEY_DISC_NUMBER },
- { kKeyAlbum, METADATA_KEY_ALBUM },
- { kKeyArtist, METADATA_KEY_ARTIST },
- { kKeyAlbumArtist, METADATA_KEY_ALBUMARTIST },
- { kKeyAuthor, METADATA_KEY_AUTHOR },
- { kKeyComposer, METADATA_KEY_COMPOSER },
- { kKeyDate, METADATA_KEY_DATE },
- { kKeyGenre, METADATA_KEY_GENRE },
- { kKeyTitle, METADATA_KEY_TITLE },
- { kKeyYear, METADATA_KEY_YEAR },
- { kKeyWriter, METADATA_KEY_WRITER },
- { kKeyCompilation, METADATA_KEY_COMPILATION },
- { kKeyLocation, METADATA_KEY_LOCATION },
+ { kKeyMIMEType, METADATA_KEY_MIMETYPE, NULL },
+ { kKeyCDTrackNumber, METADATA_KEY_CD_TRACK_NUMBER, "tracknumber" },
+ { kKeyDiscNumber, METADATA_KEY_DISC_NUMBER, "discnumber" },
+ { kKeyAlbum, METADATA_KEY_ALBUM, "album" },
+ { kKeyArtist, METADATA_KEY_ARTIST, "artist" },
+ { kKeyAlbumArtist, METADATA_KEY_ALBUMARTIST, "albumartist" },
+ { kKeyAuthor, METADATA_KEY_AUTHOR, NULL },
+ { kKeyComposer, METADATA_KEY_COMPOSER, "composer" },
+ { kKeyDate, METADATA_KEY_DATE, NULL },
+ { kKeyGenre, METADATA_KEY_GENRE, "genre" },
+ { kKeyTitle, METADATA_KEY_TITLE, "title" },
+ { kKeyYear, METADATA_KEY_YEAR, "year" },
+ { kKeyWriter, METADATA_KEY_WRITER, "writer" },
+ { kKeyCompilation, METADATA_KEY_COMPILATION, "compilation" },
+ { kKeyLocation, METADATA_KEY_LOCATION, NULL },
};
+
static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
+ CharacterEncodingDetector *detector = new CharacterEncodingDetector();
+
for (size_t i = 0; i < kNumMapEntries; ++i) {
const char *value;
if (meta->findCString(kMap[i].from, &value)) {
- mMetaData.add(kMap[i].to, String8(value));
+ if (kMap[i].name) {
+ // add to charset detector
+ detector->addTag(kMap[i].name, value);
+ } else {
+ // directly add to output list
+ mMetaData.add(kMap[i].to, String8(value));
+ }
+ }
+ }
+
+ detector->detectAndConvert();
+ int size = detector->size();
+ if (size) {
+ for (int i = 0; i < size; i++) {
+ const char *name;
+ const char *value;
+ detector->getTag(i, &name, &value);
+ for (size_t j = 0; j < kNumMapEntries; ++j) {
+ if (kMap[j].name && !strcmp(kMap[j].name, name)) {
+ mMetaData.add(kMap[j].to, String8(value));
+ }
+ }
}
}
+ delete detector;
const void *data;
uint32_t type;