diff options
author | Marco Nelissen <marcone@google.com> | 2014-09-03 19:09:43 +0000 |
---|---|---|
committer | Android (Google) Code Review <android-gerrit@google.com> | 2014-09-03 19:09:43 +0000 |
commit | d71233a846aca7035a851941c1530d04c6a65086 (patch) | |
tree | a1f528af0c52fd0d78a1843743f2930a2684f2b2 /media | |
parent | acdae5d7865b604acaadd3be1c45c84ca4bf3952 (diff) | |
parent | 34581f44cde67960fbac3ba1f191a2c063ea5145 (diff) | |
download | frameworks_av-d71233a846aca7035a851941c1530d04c6a65086.zip frameworks_av-d71233a846aca7035a851941c1530d04c6a65086.tar.gz frameworks_av-d71233a846aca7035a851941c1530d04c6a65086.tar.bz2 |
Merge "Use CharacterEncodingDetector in metadataretriever" into lmp-dev
Diffstat (limited to 'media')
-rw-r--r-- | media/libmedia/Android.mk | 5 | ||||
-rw-r--r-- | media/libmedia/CharacterEncodingDetector.cpp | 54 | ||||
-rw-r--r-- | media/libmedia/CharacterEncodingDetector.h | 63 | ||||
-rw-r--r-- | media/libmedia/MediaScannerClient.cpp | 29 | ||||
-rw-r--r-- | media/libmedia/StringArray.h | 83 | ||||
-rw-r--r-- | media/libstagefright/Android.mk | 3 | ||||
-rw-r--r-- | media/libstagefright/StagefrightMetadataRetriever.cpp | 59 |
7 files changed, 97 insertions, 199 deletions
diff --git a/media/libmedia/Android.mk b/media/libmedia/Android.mk index 37bc418..e012116 100644 --- a/media/libmedia/Android.mk +++ b/media/libmedia/Android.mk @@ -76,9 +76,10 @@ LOCAL_MODULE:= libmedia LOCAL_C_INCLUDES := \ $(TOP)/frameworks/native/include/media/openmax \ + $(TOP)/frameworks/av/include/media/ \ $(TOP)/frameworks/av/media/libstagefright \ - external/icu/icu4c/source/common \ - external/icu/icu4c/source/i18n \ + $(TOP)/external/icu/icu4c/source/common \ + $(TOP)/external/icu/icu4c/source/i18n \ $(call include-path-for, audio-effects) \ $(call include-path-for, audio-utils) diff --git a/media/libmedia/CharacterEncodingDetector.cpp b/media/libmedia/CharacterEncodingDetector.cpp index 7d1ddfd..41994dc 100644 --- a/media/libmedia/CharacterEncodingDetector.cpp +++ b/media/libmedia/CharacterEncodingDetector.cpp @@ -18,7 +18,7 @@ #define LOG_TAG "CharacterEncodingDector" #include <utils/Log.h> -#include "CharacterEncodingDetector.h" +#include <CharacterEncodingDetector.h> #include "CharacterEncodingDetectorTables.h" #include "utils/Vector.h" @@ -118,10 +118,12 @@ void CharacterEncodingDetector::detectAndConvert() { int32_t matches; const UCharsetMatch** ucma = ucsdet_detectAll(csd, &matches, &status); bool goodmatch = true; + int highest = 0; const UCharsetMatch* bestCombinedMatch = getPreferred(buf, strlen(buf), - ucma, matches, &goodmatch); + ucma, matches, &goodmatch, &highest); - if (!goodmatch && strlen(buf) < 20) { + ALOGV("goodmatch: %s, highest: %d", goodmatch ? "true" : "false", highest); + if (!goodmatch && (highest < 15 || strlen(buf) < 20)) { ALOGV("not a good match, trying with more data"); // This string might be too short for ICU to do anything useful with. // (real world example: "Björk" in ISO-8859-1 might be detected as GB18030, because @@ -146,9 +148,10 @@ void CharacterEncodingDetector::detectAndConvert() { ucsdet_setText(csd, buf, strlen(buf), &status); ucma = ucsdet_detectAll(csd, &matches, &status); bestCombinedMatch = getPreferred(buf, strlen(buf), - ucma, matches, &goodmatch); - if (!goodmatch) { + ucma, matches, &goodmatch, &highest); + if (!goodmatch && highest <= 15) { ALOGV("still not a good match after adding printable tags"); + bestCombinedMatch = NULL; } } else { ALOGV("no printable tags to add"); @@ -157,6 +160,8 @@ void CharacterEncodingDetector::detectAndConvert() { if (bestCombinedMatch != NULL) { combinedenc = ucsdet_getName(bestCombinedMatch, &status); + } else { + combinedenc = "ISO-8859-1"; } } @@ -199,10 +204,17 @@ void CharacterEncodingDetector::detectAndConvert() { if (strcmp(enc,"UTF-8") != 0) { // only convert if the source encoding isn't already UTF-8 ALOGV("@@@ using converter %s for %s", enc, mNames.getEntry(i)); + status = U_ZERO_ERROR; UConverter *conv = ucnv_open(enc, &status); if (U_FAILURE(status)) { - ALOGE("could not create UConverter for %s", enc); - continue; + ALOGW("could not create UConverter for %s (%d), falling back to ISO-8859-1", + enc, status); + status = U_ZERO_ERROR; + conv = ucnv_open("ISO-8859-1", &status); + if (U_FAILURE(status)) { + ALOGW("could not create UConverter for ISO-8859-1 either"); + continue; + } } // convert from native encoding to UTF-8 @@ -224,7 +236,16 @@ void CharacterEncodingDetector::detectAndConvert() { } else { // zero terminate *target = 0; - mValues.setEntry(i, buffer); + // strip trailing spaces + while (--target > buffer && *target == ' ') { + *target = 0; + } + // skip leading spaces + char *start = buffer; + while (*start == ' ') { + start++; + } + mValues.setEntry(i, start); } delete[] buffer; @@ -261,7 +282,7 @@ void CharacterEncodingDetector::detectAndConvert() { const UCharsetMatch *CharacterEncodingDetector::getPreferred( const char *input, size_t len, const UCharsetMatch** ucma, size_t nummatches, - bool *goodmatch) { + bool *goodmatch, int *highestmatch) { *goodmatch = false; Vector<const UCharsetMatch*> matches; @@ -316,11 +337,17 @@ const UCharsetMatch *CharacterEncodingDetector::getPreferred( } ALOGV("%zu: %s %d", i, encname, confidence); + status = U_ZERO_ERROR; UConverter *conv = ucnv_open(encname, &status); + int demerit = 0; + if (U_FAILURE(status)) { + ALOGV("failed to open %s: %d", encname, status); + confidence = 0; + demerit += 1000; + } const char *source = input; const char *sourceLimit = input + len; status = U_ZERO_ERROR; - int demerit = 0; int frequentchars = 0; int totalchars = 0; while (true) { @@ -337,7 +364,8 @@ const UCharsetMatch *CharacterEncodingDetector::getPreferred( if (c < 0x20 || (c >= 0x7f && c <= 0x009f)) { ALOGV("control character %x", c); demerit += 100; - } else if ((c >= 0xa0 && c <= 0xbe) // symbols, superscripts + } else if ((c == 0xa0) // no-break space + || (c >= 0xa2 && c <= 0xbe) // symbols, superscripts || (c == 0xd7) || (c == 0xf7) // multiplication and division signs || (c >= 0x2000 && c <= 0x209f)) { // punctuation, superscripts ALOGV("unlikely character %x", c); @@ -408,10 +436,14 @@ const UCharsetMatch *CharacterEncodingDetector::getPreferred( } else { ALOGV("runner up: '%s' w/ %d confidence", ucsdet_getName(matches[runnerupidx], &status), runnerup); + if (runnerup < 0) { + runnerup = 0; + } if ((highest - runnerup) > 15) { *goodmatch = true; } } + *highestmatch = highest; return matches[highestidx]; } diff --git a/media/libmedia/CharacterEncodingDetector.h b/media/libmedia/CharacterEncodingDetector.h deleted file mode 100644 index 7b5ed86..0000000 --- a/media/libmedia/CharacterEncodingDetector.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef _CHARACTER_ENCODING_DETECTOR_H -#define _CHARACTER_ENCODING_DETECTOR_H - -#include <media/mediascanner.h> - -#include "StringArray.h" - -#include "unicode/ucnv.h" -#include "unicode/ucsdet.h" -#include "unicode/ustring.h" - -namespace android { - -class CharacterEncodingDetector { - - public: - CharacterEncodingDetector(); - ~CharacterEncodingDetector(); - - void addTag(const char *name, const char *value); - size_t size(); - - void detectAndConvert(); - status_t getTag(int index, const char **name, const char**value); - - private: - const UCharsetMatch *getPreferred( - const char *input, size_t len, - const UCharsetMatch** ucma, size_t matches, - bool *goodmatch); - - bool isFrequent(const uint16_t *values, uint32_t c); - - // cached name and value strings, for native encoding support. - // TODO: replace these with byte blob arrays that don't require the data to be - // singlenullbyte-terminated - StringArray mNames; - StringArray mValues; - - UConverter* mUtf8Conv; -}; - - - -}; // namespace android - -#endif diff --git a/media/libmedia/MediaScannerClient.cpp b/media/libmedia/MediaScannerClient.cpp index 1661f04..9f803cb 100644 --- a/media/libmedia/MediaScannerClient.cpp +++ b/media/libmedia/MediaScannerClient.cpp @@ -25,14 +25,10 @@ namespace android { -MediaScannerClient::MediaScannerClient() - : mEncodingDetector(NULL) -{ +MediaScannerClient::MediaScannerClient() { } -MediaScannerClient::~MediaScannerClient() -{ - delete mEncodingDetector; +MediaScannerClient::~MediaScannerClient() { } void MediaScannerClient::setLocale(const char* locale) @@ -40,31 +36,16 @@ void MediaScannerClient::setLocale(const char* locale) mLocale = locale; // not currently used } -void MediaScannerClient::beginFile() -{ - delete mEncodingDetector; - mEncodingDetector = new CharacterEncodingDetector(); +void MediaScannerClient::beginFile() { } status_t MediaScannerClient::addStringTag(const char* name, const char* value) { - mEncodingDetector->addTag(name, value); + handleStringTag(name, value); return OK; } -void MediaScannerClient::endFile() -{ - mEncodingDetector->detectAndConvert(); - - int size = mEncodingDetector->size(); - if (size) { - for (int i = 0; i < size; i++) { - const char *name; - const char *value; - mEncodingDetector->getTag(i, &name, &value); - handleStringTag(name, value); - } - } +void MediaScannerClient::endFile() { } } // namespace android diff --git a/media/libmedia/StringArray.h b/media/libmedia/StringArray.h deleted file mode 100644 index ae47085..0000000 --- a/media/libmedia/StringArray.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (C) 2009 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// -// Sortable array of strings. STL-ish, but STL-free. -// -#ifndef _LIBS_MEDIA_STRING_ARRAY_H -#define _LIBS_MEDIA_STRING_ARRAY_H - -#include <stdlib.h> -#include <string.h> - -namespace android { - -// -// An expanding array of strings. Add, get, sort, delete. -// -class StringArray { -public: - StringArray(); - virtual ~StringArray(); - - // - // Add a string. A copy of the string is made. - // - bool push_back(const char* str); - - // - // Delete an entry. - // - void erase(int idx); - - // - // Sort the array. - // - void sort(int (*compare)(const void*, const void*)); - - // - // Pass this to the sort routine to do an ascending alphabetical sort. - // - static int cmpAscendingAlpha(const void* pstr1, const void* pstr2); - - // - // Get the #of items in the array. - // - inline int size(void) const { return mCurrent; } - - // - // Return entry N. - // [should use operator[] here] - // - const char* getEntry(int idx) const { - return (unsigned(idx) >= unsigned(mCurrent)) ? NULL : mArray[idx]; - } - - // - // Set entry N to specified string. - // [should use operator[] here] - // - void setEntry(int idx, const char* str); - -private: - int mMax; - int mCurrent; - char** mArray; -}; - -}; // namespace android - -#endif // _LIBS_MEDIA_STRING_ARRAY_H diff --git a/media/libstagefright/Android.mk b/media/libstagefright/Android.mk index be9af5e..193f8a7 100644 --- a/media/libstagefright/Android.mk +++ b/media/libstagefright/Android.mk @@ -62,6 +62,7 @@ LOCAL_SRC_FILES:= \ avc_utils.cpp \ LOCAL_C_INCLUDES:= \ + $(TOP)/frameworks/av/include/media/ \ $(TOP)/frameworks/av/include/media/stagefright/timedtext \ $(TOP)/frameworks/native/include/media/hardware \ $(TOP)/frameworks/native/include/media/openmax \ @@ -70,6 +71,8 @@ LOCAL_C_INCLUDES:= \ $(TOP)/external/openssl/include \ $(TOP)/external/libvpx/libwebm \ $(TOP)/system/netd/include \ + $(TOP)/external/icu/icu4c/source/common \ + $(TOP)/external/icu/icu4c/source/i18n \ LOCAL_SHARED_LIBRARIES := \ libbinder \ diff --git a/media/libstagefright/StagefrightMetadataRetriever.cpp b/media/libstagefright/StagefrightMetadataRetriever.cpp index 8cc41e7..101fc8a 100644 --- a/media/libstagefright/StagefrightMetadataRetriever.cpp +++ b/media/libstagefright/StagefrightMetadataRetriever.cpp @@ -32,6 +32,7 @@ #include <media/stagefright/MetaData.h> #include <media/stagefright/OMXCodec.h> #include <media/stagefright/MediaDefs.h> +#include <CharacterEncodingDetector.h> namespace android { @@ -450,32 +451,58 @@ void StagefrightMetadataRetriever::parseMetaData() { struct Map { int from; int to; + const char *name; }; static const Map kMap[] = { - { kKeyMIMEType, METADATA_KEY_MIMETYPE }, - { kKeyCDTrackNumber, METADATA_KEY_CD_TRACK_NUMBER }, - { kKeyDiscNumber, METADATA_KEY_DISC_NUMBER }, - { kKeyAlbum, METADATA_KEY_ALBUM }, - { kKeyArtist, METADATA_KEY_ARTIST }, - { kKeyAlbumArtist, METADATA_KEY_ALBUMARTIST }, - { kKeyAuthor, METADATA_KEY_AUTHOR }, - { kKeyComposer, METADATA_KEY_COMPOSER }, - { kKeyDate, METADATA_KEY_DATE }, - { kKeyGenre, METADATA_KEY_GENRE }, - { kKeyTitle, METADATA_KEY_TITLE }, - { kKeyYear, METADATA_KEY_YEAR }, - { kKeyWriter, METADATA_KEY_WRITER }, - { kKeyCompilation, METADATA_KEY_COMPILATION }, - { kKeyLocation, METADATA_KEY_LOCATION }, + { kKeyMIMEType, METADATA_KEY_MIMETYPE, NULL }, + { kKeyCDTrackNumber, METADATA_KEY_CD_TRACK_NUMBER, "tracknumber" }, + { kKeyDiscNumber, METADATA_KEY_DISC_NUMBER, "discnumber" }, + { kKeyAlbum, METADATA_KEY_ALBUM, "album" }, + { kKeyArtist, METADATA_KEY_ARTIST, "artist" }, + { kKeyAlbumArtist, METADATA_KEY_ALBUMARTIST, "albumartist" }, + { kKeyAuthor, METADATA_KEY_AUTHOR, NULL }, + { kKeyComposer, METADATA_KEY_COMPOSER, "composer" }, + { kKeyDate, METADATA_KEY_DATE, NULL }, + { kKeyGenre, METADATA_KEY_GENRE, "genre" }, + { kKeyTitle, METADATA_KEY_TITLE, "title" }, + { kKeyYear, METADATA_KEY_YEAR, "year" }, + { kKeyWriter, METADATA_KEY_WRITER, "writer" }, + { kKeyCompilation, METADATA_KEY_COMPILATION, "compilation" }, + { kKeyLocation, METADATA_KEY_LOCATION, NULL }, }; + static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]); + CharacterEncodingDetector *detector = new CharacterEncodingDetector(); + for (size_t i = 0; i < kNumMapEntries; ++i) { const char *value; if (meta->findCString(kMap[i].from, &value)) { - mMetaData.add(kMap[i].to, String8(value)); + if (kMap[i].name) { + // add to charset detector + detector->addTag(kMap[i].name, value); + } else { + // directly add to output list + mMetaData.add(kMap[i].to, String8(value)); + } + } + } + + detector->detectAndConvert(); + int size = detector->size(); + if (size) { + for (int i = 0; i < size; i++) { + const char *name; + const char *value; + detector->getTag(i, &name, &value); + for (size_t j = 0; j < kNumMapEntries; ++j) { + if (kMap[j].name && !strcmp(kMap[j].name, name)) { + mMetaData.add(kMap[j].to, String8(value)); + } + } } } + delete detector; const void *data; uint32_t type; |