summaryrefslogtreecommitdiffstats
path: root/media/libmedia/CharacterEncodingDetector.h
diff options
context:
space:
mode:
authorMarco Nelissen <marcone@google.com>2014-03-18 14:00:39 -0700
committerMarco Nelissen <marcone@google.com>2014-03-19 09:48:18 -0700
commitbfd55f243feb3f04e26ad07aae035475768ada8a (patch)
treeace7974e03aa66e5858c58e3205171d786e0882e /media/libmedia/CharacterEncodingDetector.h
parente848bd9abb3bbbd4c66f9fb9d1442f7663d7ba40 (diff)
downloadframeworks_av-bfd55f243feb3f04e26ad07aae035475768ada8a.zip
frameworks_av-bfd55f243feb3f04e26ad07aae035475768ada8a.tar.gz
frameworks_av-bfd55f243feb3f04e26ad07aae035475768ada8a.tar.bz2
Use more tags to help the ICU detector.
The detector only gave non-ascii data to ICU. In some cases that could result in very short data, for which ICU would issue a low confidence level for the actual encoding. By padding the data with additional (ascii) tags, we improve accuracy for such files. Becauses this can reduce accuracy in other cases, only do this when the initial confidence is low. b/13473604 Change-Id: I63d932043155c310b0e358cdf2d37787961e94b7
Diffstat (limited to 'media/libmedia/CharacterEncodingDetector.h')
-rw-r--r--media/libmedia/CharacterEncodingDetector.h4
1 files changed, 3 insertions, 1 deletions
diff --git a/media/libmedia/CharacterEncodingDetector.h b/media/libmedia/CharacterEncodingDetector.h
index 3655a91..7b5ed86 100644
--- a/media/libmedia/CharacterEncodingDetector.h
+++ b/media/libmedia/CharacterEncodingDetector.h
@@ -41,7 +41,9 @@ class CharacterEncodingDetector {
private:
const UCharsetMatch *getPreferred(
- const char *input, size_t len, const UCharsetMatch** ucma, size_t matches);
+ const char *input, size_t len,
+ const UCharsetMatch** ucma, size_t matches,
+ bool *goodmatch);
bool isFrequent(const uint16_t *values, uint32_t c);