summaryrefslogtreecommitdiffstats
path: root/media/libmedia
diff options
context:
space:
mode:
authorMarco Nelissen <marcone@google.com>2014-09-03 19:09:43 +0000
committerAndroid (Google) Code Review <android-gerrit@google.com>2014-09-03 19:09:43 +0000
commitd71233a846aca7035a851941c1530d04c6a65086 (patch)
treea1f528af0c52fd0d78a1843743f2930a2684f2b2 /media/libmedia
parentacdae5d7865b604acaadd3be1c45c84ca4bf3952 (diff)
parent34581f44cde67960fbac3ba1f191a2c063ea5145 (diff)
downloadframeworks_av-d71233a846aca7035a851941c1530d04c6a65086.zip
frameworks_av-d71233a846aca7035a851941c1530d04c6a65086.tar.gz
frameworks_av-d71233a846aca7035a851941c1530d04c6a65086.tar.bz2
Merge "Use CharacterEncodingDetector in metadataretriever" into lmp-dev
Diffstat (limited to 'media/libmedia')
-rw-r--r--media/libmedia/Android.mk5
-rw-r--r--media/libmedia/CharacterEncodingDetector.cpp54
-rw-r--r--media/libmedia/CharacterEncodingDetector.h63
-rw-r--r--media/libmedia/MediaScannerClient.cpp29
-rw-r--r--media/libmedia/StringArray.h83
5 files changed, 51 insertions, 183 deletions
diff --git a/media/libmedia/Android.mk b/media/libmedia/Android.mk
index 37bc418..e012116 100644
--- a/media/libmedia/Android.mk
+++ b/media/libmedia/Android.mk
@@ -76,9 +76,10 @@ LOCAL_MODULE:= libmedia
LOCAL_C_INCLUDES := \
$(TOP)/frameworks/native/include/media/openmax \
+ $(TOP)/frameworks/av/include/media/ \
$(TOP)/frameworks/av/media/libstagefright \
- external/icu/icu4c/source/common \
- external/icu/icu4c/source/i18n \
+ $(TOP)/external/icu/icu4c/source/common \
+ $(TOP)/external/icu/icu4c/source/i18n \
$(call include-path-for, audio-effects) \
$(call include-path-for, audio-utils)
diff --git a/media/libmedia/CharacterEncodingDetector.cpp b/media/libmedia/CharacterEncodingDetector.cpp
index 7d1ddfd..41994dc 100644
--- a/media/libmedia/CharacterEncodingDetector.cpp
+++ b/media/libmedia/CharacterEncodingDetector.cpp
@@ -18,7 +18,7 @@
#define LOG_TAG "CharacterEncodingDector"
#include <utils/Log.h>
-#include "CharacterEncodingDetector.h"
+#include <CharacterEncodingDetector.h>
#include "CharacterEncodingDetectorTables.h"
#include "utils/Vector.h"
@@ -118,10 +118,12 @@ void CharacterEncodingDetector::detectAndConvert() {
int32_t matches;
const UCharsetMatch** ucma = ucsdet_detectAll(csd, &matches, &status);
bool goodmatch = true;
+ int highest = 0;
const UCharsetMatch* bestCombinedMatch = getPreferred(buf, strlen(buf),
- ucma, matches, &goodmatch);
+ ucma, matches, &goodmatch, &highest);
- if (!goodmatch && strlen(buf) < 20) {
+ ALOGV("goodmatch: %s, highest: %d", goodmatch ? "true" : "false", highest);
+ if (!goodmatch && (highest < 15 || strlen(buf) < 20)) {
ALOGV("not a good match, trying with more data");
// This string might be too short for ICU to do anything useful with.
// (real world example: "Björk" in ISO-8859-1 might be detected as GB18030, because
@@ -146,9 +148,10 @@ void CharacterEncodingDetector::detectAndConvert() {
ucsdet_setText(csd, buf, strlen(buf), &status);
ucma = ucsdet_detectAll(csd, &matches, &status);
bestCombinedMatch = getPreferred(buf, strlen(buf),
- ucma, matches, &goodmatch);
- if (!goodmatch) {
+ ucma, matches, &goodmatch, &highest);
+ if (!goodmatch && highest <= 15) {
ALOGV("still not a good match after adding printable tags");
+ bestCombinedMatch = NULL;
}
} else {
ALOGV("no printable tags to add");
@@ -157,6 +160,8 @@ void CharacterEncodingDetector::detectAndConvert() {
if (bestCombinedMatch != NULL) {
combinedenc = ucsdet_getName(bestCombinedMatch, &status);
+ } else {
+ combinedenc = "ISO-8859-1";
}
}
@@ -199,10 +204,17 @@ void CharacterEncodingDetector::detectAndConvert() {
if (strcmp(enc,"UTF-8") != 0) {
// only convert if the source encoding isn't already UTF-8
ALOGV("@@@ using converter %s for %s", enc, mNames.getEntry(i));
+ status = U_ZERO_ERROR;
UConverter *conv = ucnv_open(enc, &status);
if (U_FAILURE(status)) {
- ALOGE("could not create UConverter for %s", enc);
- continue;
+ ALOGW("could not create UConverter for %s (%d), falling back to ISO-8859-1",
+ enc, status);
+ status = U_ZERO_ERROR;
+ conv = ucnv_open("ISO-8859-1", &status);
+ if (U_FAILURE(status)) {
+ ALOGW("could not create UConverter for ISO-8859-1 either");
+ continue;
+ }
}
// convert from native encoding to UTF-8
@@ -224,7 +236,16 @@ void CharacterEncodingDetector::detectAndConvert() {
} else {
// zero terminate
*target = 0;
- mValues.setEntry(i, buffer);
+ // strip trailing spaces
+ while (--target > buffer && *target == ' ') {
+ *target = 0;
+ }
+ // skip leading spaces
+ char *start = buffer;
+ while (*start == ' ') {
+ start++;
+ }
+ mValues.setEntry(i, start);
}
delete[] buffer;
@@ -261,7 +282,7 @@ void CharacterEncodingDetector::detectAndConvert() {
const UCharsetMatch *CharacterEncodingDetector::getPreferred(
const char *input, size_t len,
const UCharsetMatch** ucma, size_t nummatches,
- bool *goodmatch) {
+ bool *goodmatch, int *highestmatch) {
*goodmatch = false;
Vector<const UCharsetMatch*> matches;
@@ -316,11 +337,17 @@ const UCharsetMatch *CharacterEncodingDetector::getPreferred(
}
ALOGV("%zu: %s %d", i, encname, confidence);
+ status = U_ZERO_ERROR;
UConverter *conv = ucnv_open(encname, &status);
+ int demerit = 0;
+ if (U_FAILURE(status)) {
+ ALOGV("failed to open %s: %d", encname, status);
+ confidence = 0;
+ demerit += 1000;
+ }
const char *source = input;
const char *sourceLimit = input + len;
status = U_ZERO_ERROR;
- int demerit = 0;
int frequentchars = 0;
int totalchars = 0;
while (true) {
@@ -337,7 +364,8 @@ const UCharsetMatch *CharacterEncodingDetector::getPreferred(
if (c < 0x20 || (c >= 0x7f && c <= 0x009f)) {
ALOGV("control character %x", c);
demerit += 100;
- } else if ((c >= 0xa0 && c <= 0xbe) // symbols, superscripts
+ } else if ((c == 0xa0) // no-break space
+ || (c >= 0xa2 && c <= 0xbe) // symbols, superscripts
|| (c == 0xd7) || (c == 0xf7) // multiplication and division signs
|| (c >= 0x2000 && c <= 0x209f)) { // punctuation, superscripts
ALOGV("unlikely character %x", c);
@@ -408,10 +436,14 @@ const UCharsetMatch *CharacterEncodingDetector::getPreferred(
} else {
ALOGV("runner up: '%s' w/ %d confidence",
ucsdet_getName(matches[runnerupidx], &status), runnerup);
+ if (runnerup < 0) {
+ runnerup = 0;
+ }
if ((highest - runnerup) > 15) {
*goodmatch = true;
}
}
+ *highestmatch = highest;
return matches[highestidx];
}
diff --git a/media/libmedia/CharacterEncodingDetector.h b/media/libmedia/CharacterEncodingDetector.h
deleted file mode 100644
index 7b5ed86..0000000
--- a/media/libmedia/CharacterEncodingDetector.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef _CHARACTER_ENCODING_DETECTOR_H
-#define _CHARACTER_ENCODING_DETECTOR_H
-
-#include <media/mediascanner.h>
-
-#include "StringArray.h"
-
-#include "unicode/ucnv.h"
-#include "unicode/ucsdet.h"
-#include "unicode/ustring.h"
-
-namespace android {
-
-class CharacterEncodingDetector {
-
- public:
- CharacterEncodingDetector();
- ~CharacterEncodingDetector();
-
- void addTag(const char *name, const char *value);
- size_t size();
-
- void detectAndConvert();
- status_t getTag(int index, const char **name, const char**value);
-
- private:
- const UCharsetMatch *getPreferred(
- const char *input, size_t len,
- const UCharsetMatch** ucma, size_t matches,
- bool *goodmatch);
-
- bool isFrequent(const uint16_t *values, uint32_t c);
-
- // cached name and value strings, for native encoding support.
- // TODO: replace these with byte blob arrays that don't require the data to be
- // singlenullbyte-terminated
- StringArray mNames;
- StringArray mValues;
-
- UConverter* mUtf8Conv;
-};
-
-
-
-}; // namespace android
-
-#endif
diff --git a/media/libmedia/MediaScannerClient.cpp b/media/libmedia/MediaScannerClient.cpp
index 1661f04..9f803cb 100644
--- a/media/libmedia/MediaScannerClient.cpp
+++ b/media/libmedia/MediaScannerClient.cpp
@@ -25,14 +25,10 @@
namespace android {
-MediaScannerClient::MediaScannerClient()
- : mEncodingDetector(NULL)
-{
+MediaScannerClient::MediaScannerClient() {
}
-MediaScannerClient::~MediaScannerClient()
-{
- delete mEncodingDetector;
+MediaScannerClient::~MediaScannerClient() {
}
void MediaScannerClient::setLocale(const char* locale)
@@ -40,31 +36,16 @@ void MediaScannerClient::setLocale(const char* locale)
mLocale = locale; // not currently used
}
-void MediaScannerClient::beginFile()
-{
- delete mEncodingDetector;
- mEncodingDetector = new CharacterEncodingDetector();
+void MediaScannerClient::beginFile() {
}
status_t MediaScannerClient::addStringTag(const char* name, const char* value)
{
- mEncodingDetector->addTag(name, value);
+ handleStringTag(name, value);
return OK;
}
-void MediaScannerClient::endFile()
-{
- mEncodingDetector->detectAndConvert();
-
- int size = mEncodingDetector->size();
- if (size) {
- for (int i = 0; i < size; i++) {
- const char *name;
- const char *value;
- mEncodingDetector->getTag(i, &name, &value);
- handleStringTag(name, value);
- }
- }
+void MediaScannerClient::endFile() {
}
} // namespace android
diff --git a/media/libmedia/StringArray.h b/media/libmedia/StringArray.h
deleted file mode 100644
index ae47085..0000000
--- a/media/libmedia/StringArray.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-//
-// Sortable array of strings. STL-ish, but STL-free.
-//
-#ifndef _LIBS_MEDIA_STRING_ARRAY_H
-#define _LIBS_MEDIA_STRING_ARRAY_H
-
-#include <stdlib.h>
-#include <string.h>
-
-namespace android {
-
-//
-// An expanding array of strings. Add, get, sort, delete.
-//
-class StringArray {
-public:
- StringArray();
- virtual ~StringArray();
-
- //
- // Add a string. A copy of the string is made.
- //
- bool push_back(const char* str);
-
- //
- // Delete an entry.
- //
- void erase(int idx);
-
- //
- // Sort the array.
- //
- void sort(int (*compare)(const void*, const void*));
-
- //
- // Pass this to the sort routine to do an ascending alphabetical sort.
- //
- static int cmpAscendingAlpha(const void* pstr1, const void* pstr2);
-
- //
- // Get the #of items in the array.
- //
- inline int size(void) const { return mCurrent; }
-
- //
- // Return entry N.
- // [should use operator[] here]
- //
- const char* getEntry(int idx) const {
- return (unsigned(idx) >= unsigned(mCurrent)) ? NULL : mArray[idx];
- }
-
- //
- // Set entry N to specified string.
- // [should use operator[] here]
- //
- void setEntry(int idx, const char* str);
-
-private:
- int mMax;
- int mCurrent;
- char** mArray;
-};
-
-}; // namespace android
-
-#endif // _LIBS_MEDIA_STRING_ARRAY_H