/* * Copyright (C) 2009 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //#define LOG_NDEBUG 0 #define LOG_TAG "MPEG4Extractor" #include #include "include/MPEG4Extractor.h" #include "include/SampleTable.h" #include "include/ESDS.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef QCOM_HARDWARE #include #include #endif namespace android { class MPEG4Source : public MediaSource { public: // Caller retains ownership of both "dataSource" and "sampleTable". MPEG4Source(const sp &format, const sp &dataSource, int32_t timeScale, const sp &sampleTable); virtual status_t start(MetaData *params = NULL); virtual status_t stop(); virtual sp getFormat(); virtual status_t read( MediaBuffer **buffer, const ReadOptions *options = NULL); protected: virtual ~MPEG4Source(); private: Mutex mLock; sp mFormat; sp mDataSource; int32_t mTimescale; sp mSampleTable; uint32_t mCurrentSampleIndex; bool mIsAVC; size_t mNALLengthSize; bool mStarted; MediaBufferGroup *mGroup; MediaBuffer *mBuffer; bool mWantsNALFragments; uint8_t *mSrcBuffer; size_t parseNALSize(const uint8_t *data) const; MPEG4Source(const MPEG4Source &); MPEG4Source &operator=(const MPEG4Source &); }; // This custom data source wraps an existing one and satisfies requests // falling entirely within a cached range from the cache while forwarding // all remaining requests to the wrapped datasource. // This is used to cache the full sampletable metadata for a single track, // possibly wrapping multiple times to cover all tracks, i.e. // Each MPEG4DataSource caches the sampletable metadata for a single track. struct MPEG4DataSource : public DataSource { MPEG4DataSource(const sp &source); virtual status_t initCheck() const; virtual ssize_t readAt(off64_t offset, void *data, size_t size); virtual status_t getSize(off64_t *size); virtual uint32_t flags(); status_t setCachedRange(off64_t offset, size_t size); protected: virtual ~MPEG4DataSource(); private: Mutex mLock; sp mSource; off64_t mCachedOffset; size_t mCachedSize; uint8_t *mCache; void clearCache(); MPEG4DataSource(const MPEG4DataSource &); MPEG4DataSource &operator=(const MPEG4DataSource &); }; MPEG4DataSource::MPEG4DataSource(const sp &source) : mSource(source), mCachedOffset(0), mCachedSize(0), mCache(NULL) { } MPEG4DataSource::~MPEG4DataSource() { clearCache(); } void MPEG4DataSource::clearCache() { if (mCache) { free(mCache); mCache = NULL; } mCachedOffset = 0; mCachedSize = 0; } status_t MPEG4DataSource::initCheck() const { return mSource->initCheck(); } ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) { Mutex::Autolock autoLock(mLock); if (offset >= mCachedOffset && offset + size <= mCachedOffset + mCachedSize) { memcpy(data, &mCache[offset - mCachedOffset], size); return size; } return mSource->readAt(offset, data, size); } status_t MPEG4DataSource::getSize(off64_t *size) { return mSource->getSize(size); } uint32_t MPEG4DataSource::flags() { return mSource->flags(); } status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) { Mutex::Autolock autoLock(mLock); clearCache(); mCache = (uint8_t *)malloc(size); if (mCache == NULL) { return -ENOMEM; } mCachedOffset = offset; mCachedSize = size; ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize); if (err < (ssize_t)size) { clearCache(); return ERROR_IO; } return OK; } //////////////////////////////////////////////////////////////////////////////// static void hexdump(const void *_data, size_t size) { const uint8_t *data = (const uint8_t *)_data; size_t offset = 0; while (offset < size) { printf("0x%04x ", offset); size_t n = size - offset; if (n > 16) { n = 16; } for (size_t i = 0; i < 16; ++i) { if (i == 8) { printf(" "); } if (offset + i < size) { printf("%02x ", data[offset + i]); } else { printf(" "); } } printf(" "); for (size_t i = 0; i < n; ++i) { if (isprint(data[offset + i])) { printf("%c", data[offset + i]); } else { printf("."); } } printf("\n"); offset += 16; } } static const char *FourCC2MIME(uint32_t fourcc) { switch (fourcc) { case FOURCC('m', 'p', '4', 'a'): return MEDIA_MIMETYPE_AUDIO_AAC; case FOURCC('.', 'm', 'p', '3'): return MEDIA_MIMETYPE_AUDIO_MPEG; case FOURCC('s', 'a', 'm', 'r'): return MEDIA_MIMETYPE_AUDIO_AMR_NB; case FOURCC('s', 'a', 'w', 'b'): return MEDIA_MIMETYPE_AUDIO_AMR_WB; case FOURCC('m', 'p', '4', 'v'): return MEDIA_MIMETYPE_VIDEO_MPEG4; case FOURCC('s', '2', '6', '3'): case FOURCC('h', '2', '6', '3'): case FOURCC('H', '2', '6', '3'): return MEDIA_MIMETYPE_VIDEO_H263; case FOURCC('a', 'v', 'c', '1'): return MEDIA_MIMETYPE_VIDEO_AVC; #ifdef QCOM_HARDWARE case FOURCC('s', 'q', 'c', 'p'): return MEDIA_MIMETYPE_AUDIO_QCELP; case FOURCC('s', 'e', 'v', 'c'): return MEDIA_MIMETYPE_AUDIO_EVRC; case FOURCC('d', 't', 's', 'c'): case FOURCC('d', 't', 's', 'h'): case FOURCC('d', 't', 's', 'l'): return MEDIA_MIMETYPE_AUDIO_DTS; case FOURCC('d', 't', 's', 'e'): return MEDIA_MIMETYPE_AUDIO_DTS_LBR; case FOURCC('a', 'c', '-', '3'): return MEDIA_MIMETYPE_AUDIO_AC3; case FOURCC('e', 'c', '-', '3'): return MEDIA_MIMETYPE_AUDIO_EAC3; #endif default: CHECK(!"should not be here."); return NULL; } } MPEG4Extractor::MPEG4Extractor(const sp &source) : mDataSource(source), mInitCheck(NO_INIT), mHasVideo(false), mFirstTrack(NULL), mLastTrack(NULL), mFileMetaData(new MetaData), mFirstSINF(NULL), mIsDrm(false) { } MPEG4Extractor::~MPEG4Extractor() { Track *track = mFirstTrack; while (track) { Track *next = track->next; delete track; track = next; } mFirstTrack = mLastTrack = NULL; SINF *sinf = mFirstSINF; while (sinf) { SINF *next = sinf->next; delete sinf->IPMPData; delete sinf; sinf = next; } mFirstSINF = NULL; } sp MPEG4Extractor::getMetaData() { status_t err; if ((err = readMetaData()) != OK) { return new MetaData; } return mFileMetaData; } size_t MPEG4Extractor::countTracks() { status_t err; if ((err = readMetaData()) != OK) { return 0; } size_t n = 0; Track *track = mFirstTrack; while (track) { ++n; track = track->next; } return n; } sp MPEG4Extractor::getTrackMetaData( size_t index, uint32_t flags) { status_t err; if ((err = readMetaData()) != OK) { return NULL; } Track *track = mFirstTrack; while (index > 0) { if (track == NULL) { return NULL; } track = track->next; --index; } if (track == NULL) { return NULL; } if ((flags & kIncludeExtensiveMetaData) && !track->includes_expensive_metadata) { track->includes_expensive_metadata = true; const char *mime; CHECK(track->meta->findCString(kKeyMIMEType, &mime)); if (!strncasecmp("video/", mime, 6)) { uint32_t sampleIndex; uint32_t sampleTime; if (track->sampleTable->findThumbnailSample(&sampleIndex) == OK && track->sampleTable->getMetaDataForSample( sampleIndex, NULL /* offset */, NULL /* size */, &sampleTime) == OK) { track->meta->setInt64( kKeyThumbnailTime, ((int64_t)sampleTime * 1000000) / track->timescale); } } } return track->meta; } status_t MPEG4Extractor::readMetaData() { if (mInitCheck != NO_INIT) { return mInitCheck; } off64_t offset = 0; status_t err; while ((err = parseChunk(&offset, 0)) == OK) { } if (mInitCheck == OK) { if (mHasVideo) { mFileMetaData->setCString( kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4); } else { mFileMetaData->setCString(kKeyMIMEType, "audio/mp4"); } mInitCheck = OK; } else { mInitCheck = err; } CHECK_NE(err, (status_t)NO_INIT); return mInitCheck; } char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) { if (mFirstSINF == NULL) { return NULL; } SINF *sinf = mFirstSINF; while (sinf && (trackID != sinf->trackID)) { sinf = sinf->next; } if (sinf == NULL) { return NULL; } *len = sinf->len; return sinf->IPMPData; } // Reads an encoded integer 7 bits at a time until it encounters the high bit clear. static int32_t readSize(off64_t offset, const sp DataSource, uint8_t *numOfBytes) { uint32_t size = 0; uint8_t data; bool moreData = true; *numOfBytes = 0; while (moreData) { if (DataSource->readAt(offset, &data, 1) < 1) { return -1; } offset ++; moreData = (data >= 128) ? true : false; size = (size << 7) | (data & 0x7f); // Take last 7 bits (*numOfBytes) ++; } return size; } status_t MPEG4Extractor::parseDrmSINF(off64_t *offset, off64_t data_offset) { uint8_t updateIdTag; if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { return ERROR_IO; } data_offset ++; if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { return ERROR_MALFORMED; } uint8_t numOfBytes; int32_t size = readSize(data_offset, mDataSource, &numOfBytes); if (size < 0) { return ERROR_IO; } int32_t classSize = size; data_offset += numOfBytes; while(size >= 11 ) { uint8_t descriptorTag; if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) { return ERROR_IO; } data_offset ++; if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) { return ERROR_MALFORMED; } uint8_t buffer[8]; //ObjectDescriptorID and ObjectDescriptor url flag if (mDataSource->readAt(data_offset, buffer, 2) < 2) { return ERROR_IO; } data_offset += 2; if ((buffer[1] >> 5) & 0x0001) { //url flag is set return ERROR_MALFORMED; } if (mDataSource->readAt(data_offset, buffer, 8) < 8) { return ERROR_IO; } data_offset += 8; if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1]) || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) { return ERROR_MALFORMED; } SINF *sinf = new SINF; sinf->trackID = U16_AT(&buffer[3]); sinf->IPMPDescriptorID = buffer[7]; sinf->next = mFirstSINF; mFirstSINF = sinf; size -= (8 + 2 + 1); } if (size != 0) { return ERROR_MALFORMED; } if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { return ERROR_IO; } data_offset ++; if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { return ERROR_MALFORMED; } size = readSize(data_offset, mDataSource, &numOfBytes); if (size < 0) { return ERROR_IO; } classSize = size; data_offset += numOfBytes; while (size > 0) { uint8_t tag; int32_t dataLen; if (mDataSource->readAt(data_offset, &tag, 1) < 1) { return ERROR_IO; } data_offset ++; if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) { uint8_t id; dataLen = readSize(data_offset, mDataSource, &numOfBytes); if (dataLen < 0) { return ERROR_IO; } else if (dataLen < 4) { return ERROR_MALFORMED; } data_offset += numOfBytes; if (mDataSource->readAt(data_offset, &id, 1) < 1) { return ERROR_IO; } data_offset ++; SINF *sinf = mFirstSINF; while (sinf && (sinf->IPMPDescriptorID != id)) { sinf = sinf->next; } if (sinf == NULL) { return ERROR_MALFORMED; } sinf->len = dataLen - 3; sinf->IPMPData = new char[sinf->len]; if (mDataSource->readAt(data_offset + 2, sinf->IPMPData, sinf->len) < sinf->len) { return ERROR_IO; } data_offset += sinf->len; size -= (dataLen + numOfBytes + 1); } } if (size != 0) { return ERROR_MALFORMED; } return UNKNOWN_ERROR; // Return a dummy error. } static void MakeFourCCString(uint32_t x, char *s) { s[0] = x >> 24; s[1] = (x >> 16) & 0xff; s[2] = (x >> 8) & 0xff; s[3] = x & 0xff; s[4] = '\0'; } struct PathAdder { PathAdder(Vector *path, uint32_t chunkType) : mPath(path) { mPath->push(chunkType); } ~PathAdder() { mPath->pop(); } private: Vector *mPath; PathAdder(const PathAdder &); PathAdder &operator=(const PathAdder &); }; static bool underMetaDataPath(const Vector &path) { return path.size() >= 5 && path[0] == FOURCC('m', 'o', 'o', 'v') && path[1] == FOURCC('u', 'd', 't', 'a') && path[2] == FOURCC('m', 'e', 't', 'a') && path[3] == FOURCC('i', 'l', 's', 't'); } // Given a time in seconds since Jan 1 1904, produce a human-readable string. static void convertTimeToDate(int64_t time_1904, String8 *s) { time_t time_1970 = time_1904 - (((66 * 365 + 17) * 24) * 3600); char tmp[32]; strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", gmtime(&time_1970)); s->setTo(tmp); } status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) { ALOGV("entering parseChunk %lld/%d", *offset, depth); uint32_t hdr[2]; if (mDataSource->readAt(*offset, hdr, 8) < 8) { return ERROR_IO; } uint64_t chunk_size = ntohl(hdr[0]); uint32_t chunk_type = ntohl(hdr[1]); off64_t data_offset = *offset + 8; if (chunk_size == 1) { if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { return ERROR_IO; } chunk_size = ntoh64(chunk_size); data_offset += 8; if (chunk_size < 16) { // The smallest valid chunk is 16 bytes long in this case. return ERROR_MALFORMED; } } else if (chunk_size < 8) { // The smallest valid chunk is 8 bytes long. return ERROR_MALFORMED; } char chunk[5]; MakeFourCCString(chunk_type, chunk); ALOGV("chunk: %s @ %lld", chunk, *offset); #if 0 static const char kWhitespace[] = " "; const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth]; printf("%sfound chunk '%s' of size %lld\n", indent, chunk, chunk_size); char buffer[256]; size_t n = chunk_size; if (n > sizeof(buffer)) { n = sizeof(buffer); } if (mDataSource->readAt(*offset, buffer, n) < (ssize_t)n) { return ERROR_IO; } hexdump(buffer, n); #endif PathAdder autoAdder(&mPath, chunk_type); off64_t chunk_data_size = *offset + chunk_size - data_offset; if (chunk_type != FOURCC('c', 'p', 'r', 't') && chunk_type != FOURCC('c', 'o', 'v', 'r') && mPath.size() == 5 && underMetaDataPath(mPath)) { off64_t stop_offset = *offset + chunk_size; *offset = data_offset; while (*offset < stop_offset) { status_t err = parseChunk(offset, depth + 1); if (err != OK) { return err; } } if (*offset != stop_offset) { return ERROR_MALFORMED; } return OK; } switch(chunk_type) { case FOURCC('m', 'o', 'o', 'v'): case FOURCC('t', 'r', 'a', 'k'): case FOURCC('m', 'd', 'i', 'a'): case FOURCC('m', 'i', 'n', 'f'): case FOURCC('d', 'i', 'n', 'f'): case FOURCC('s', 't', 'b', 'l'): case FOURCC('m', 'v', 'e', 'x'): case FOURCC('m', 'o', 'o', 'f'): case FOURCC('t', 'r', 'a', 'f'): case FOURCC('m', 'f', 'r', 'a'): case FOURCC('u', 'd', 't', 'a'): case FOURCC('i', 'l', 's', 't'): { if (chunk_type == FOURCC('s', 't', 'b', 'l')) { ALOGV("sampleTable chunk is %d bytes long.", (size_t)chunk_size); if (mDataSource->flags() & (DataSource::kWantsPrefetching | DataSource::kIsCachingDataSource)) { sp cachedSource = new MPEG4DataSource(mDataSource); if (cachedSource->setCachedRange(*offset, chunk_size) == OK) { mDataSource = cachedSource; } } if (!mLastTrack) return ERROR_MALFORMED; mLastTrack->sampleTable = new SampleTable(mDataSource); } bool isTrack = false; if (chunk_type == FOURCC('t', 'r', 'a', 'k')) { isTrack = true; Track *track = new Track; track->next = NULL; if (mLastTrack) { mLastTrack->next = track; } else { mFirstTrack = track; } mLastTrack = track; track->meta = new MetaData; track->includes_expensive_metadata = false; track->skipTrack = false; track->timescale = 0; track->meta->setCString(kKeyMIMEType, "application/octet-stream"); } off64_t stop_offset = *offset + chunk_size; *offset = data_offset; while (*offset < stop_offset) { status_t err = parseChunk(offset, depth + 1); if (err != OK) { return err; } } if (*offset != stop_offset) { return ERROR_MALFORMED; } if (isTrack) { if (mLastTrack->skipTrack) { Track *cur = mFirstTrack; if (cur == mLastTrack) { delete cur; mFirstTrack = mLastTrack = NULL; } else { while (cur && cur->next != mLastTrack) { cur = cur->next; } cur->next = NULL; delete mLastTrack; mLastTrack = cur; } return OK; } status_t err = verifyTrack(mLastTrack); if (err != OK) { return err; } } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) { mInitCheck = OK; if (!mIsDrm) { return UNKNOWN_ERROR; // Return a dummy error. } else { return OK; } } break; } case FOURCC('t', 'k', 'h', 'd'): { status_t err; if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) { return err; } *offset += chunk_size; break; } case FOURCC('m', 'd', 'h', 'd'): { if (chunk_data_size < 4) { return ERROR_MALFORMED; } uint8_t version; if (mDataSource->readAt( data_offset, &version, sizeof(version)) < (ssize_t)sizeof(version)) { return ERROR_IO; } off64_t timescale_offset; if (version == 1) { timescale_offset = data_offset + 4 + 16; } else if (version == 0) { timescale_offset = data_offset + 4 + 8; } else { return ERROR_IO; } uint32_t timescale; if (mDataSource->readAt( timescale_offset, ×cale, sizeof(timescale)) < (ssize_t)sizeof(timescale)) { return ERROR_IO; } mLastTrack->timescale = ntohl(timescale); int64_t duration; if (version == 1) { if (mDataSource->readAt( timescale_offset + 4, &duration, sizeof(duration)) < (ssize_t)sizeof(duration)) { return ERROR_IO; } duration = ntoh64(duration); } else { int32_t duration32; if (mDataSource->readAt( timescale_offset + 4, &duration32, sizeof(duration32)) < (ssize_t)sizeof(duration32)) { return ERROR_IO; } duration = ntohl(duration32); } mLastTrack->meta->setInt64( kKeyDuration, (duration * 1000000) / mLastTrack->timescale); uint8_t lang[2]; off64_t lang_offset; if (version == 1) { lang_offset = timescale_offset + 4 + 8; } else if (version == 0) { lang_offset = timescale_offset + 4 + 4; } else { return ERROR_IO; } if (mDataSource->readAt(lang_offset, &lang, sizeof(lang)) < (ssize_t)sizeof(lang)) { return ERROR_IO; } // To get the ISO-639-2/T three character language code // 1 bit pad followed by 3 5-bits characters. Each character // is packed as the difference between its ASCII value and 0x60. char lang_code[4]; lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60; lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60; lang_code[2] = (lang[1] & 0x1f) + 0x60; lang_code[3] = '\0'; mLastTrack->meta->setCString( kKeyMediaLanguage, lang_code); *offset += chunk_size; break; } case FOURCC('s', 't', 's', 'd'): { if (chunk_data_size < 8) { return ERROR_MALFORMED; } uint8_t buffer[8]; if (chunk_data_size < (off64_t)sizeof(buffer)) { return ERROR_MALFORMED; } if (mDataSource->readAt( data_offset, buffer, 8) < 8) { return ERROR_IO; } if (U32_AT(buffer) != 0) { // Should be version 0, flags 0. return ERROR_MALFORMED; } uint32_t entry_count = U32_AT(&buffer[4]); if (entry_count > 1) { // For 3GPP timed text, there could be multiple tx3g boxes contain // multiple text display formats. These formats will be used to // display the timed text. const char *mime; if (!mLastTrack) return ERROR_MALFORMED; CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP)) { // For now we only support a single type of media per track. mLastTrack->skipTrack = true; *offset += chunk_size; break; } } off64_t stop_offset = *offset + chunk_size; *offset = data_offset + 8; for (uint32_t i = 0; i < entry_count; ++i) { status_t err = parseChunk(offset, depth + 1); if (err != OK) { return err; } } if (*offset != stop_offset) { return ERROR_MALFORMED; } break; } case FOURCC('m', 'p', '4', 'a'): case FOURCC('s', 'a', 'm', 'r'): case FOURCC('s', 'a', 'w', 'b'): #ifdef QCOM_HARDWARE case FOURCC('.', 'm', 'p', '3'): case FOURCC('s', 'e', 'v', 'c'): case FOURCC('s', 'q', 'c', 'p'): case FOURCC('d', 't', 's', 'c'): case FOURCC('d', 't', 's', 'h'): case FOURCC('d', 't', 's', 'l'): case FOURCC('d', 't', 's', 'e'): case FOURCC('a', 'c', '-', '3'): case FOURCC('e', 'c', '-', '3'): #endif { uint8_t buffer[8 + 20]; if (chunk_data_size < (ssize_t)sizeof(buffer)) { // Basic AudioSampleEntry size. return ERROR_MALFORMED; } if (mDataSource->readAt( data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { return ERROR_IO; } uint16_t data_ref_index = U16_AT(&buffer[6]); uint16_t num_channels = U16_AT(&buffer[16]); uint16_t sample_size = U16_AT(&buffer[18]); uint32_t sample_rate = U32_AT(&buffer[24]) >> 16; if (!mLastTrack) return ERROR_MALFORMED; if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(chunk_type))) { // AMR NB audio is always mono, 8kHz num_channels = 1; sample_rate = 8000; } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(chunk_type))) { // AMR WB audio is always mono, 16kHz num_channels = 1; sample_rate = 16000; } #if 0 printf("*** coding='%s' %d channels, size %d, rate %d\n", chunk, num_channels, sample_size, sample_rate); #endif mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); off64_t stop_offset = *offset + chunk_size; #ifdef QCOM_HARDWARE if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_MPEG, FourCC2MIME(chunk_type)) || !strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(chunk_type))) { // ESD is not required in mp3 // amr wb with damr atom corrupted can cause the clip to not play *offset = stop_offset; } else #endif *offset = data_offset + sizeof(buffer); while (*offset < stop_offset) { status_t err = parseChunk(offset, depth + 1); if (err != OK) { return err; } } if (*offset != stop_offset) { return ERROR_MALFORMED; } break; } case FOURCC('m', 'p', '4', 'v'): case FOURCC('s', '2', '6', '3'): case FOURCC('H', '2', '6', '3'): case FOURCC('h', '2', '6', '3'): case FOURCC('a', 'v', 'c', '1'): { mHasVideo = true; uint8_t buffer[78]; if (chunk_data_size < (ssize_t)sizeof(buffer)) { // Basic VideoSampleEntry size. return ERROR_MALFORMED; } if (mDataSource->readAt( data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { return ERROR_IO; } uint16_t data_ref_index = U16_AT(&buffer[6]); uint16_t width = U16_AT(&buffer[6 + 18]); uint16_t height = U16_AT(&buffer[6 + 20]); // The video sample is not stand-compliant if it has invalid dimension. // Use some default width and height value, and // let the decoder figure out the actual width and height (and thus // be prepared for INFO_FOMRAT_CHANGED event). if (width == 0) width = 352; if (height == 0) height = 288; // printf("*** coding='%s' width=%d height=%d\n", // chunk, width, height); if (!mLastTrack) return ERROR_MALFORMED; mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); mLastTrack->meta->setInt32(kKeyWidth, width); mLastTrack->meta->setInt32(kKeyHeight, height); off64_t stop_offset = *offset + chunk_size; *offset = data_offset + sizeof(buffer); while (*offset < stop_offset) { status_t err = parseChunk(offset, depth + 1); if (err != OK) { return err; } } if (*offset != stop_offset) { return ERROR_MALFORMED; } break; } case FOURCC('s', 't', 'c', 'o'): case FOURCC('c', 'o', '6', '4'): { if (!mLastTrack || !mLastTrack->sampleTable.get()) return ERROR_MALFORMED; status_t err = mLastTrack->sampleTable->setChunkOffsetParams( chunk_type, data_offset, chunk_data_size); if (err != OK) { return err; } *offset += chunk_size; break; } case FOURCC('s', 't', 's', 'c'): { if (!mLastTrack || !mLastTrack->sampleTable.get()) return ERROR_MALFORMED; status_t err = mLastTrack->sampleTable->setSampleToChunkParams( data_offset, chunk_data_size); if (err != OK) { return err; } *offset += chunk_size; break; } case FOURCC('s', 't', 's', 'z'): case FOURCC('s', 't', 'z', '2'): { if (!mLastTrack || !mLastTrack->sampleTable.get()) return ERROR_MALFORMED; status_t err = mLastTrack->sampleTable->setSampleSizeParams( chunk_type, data_offset, chunk_data_size); if (err != OK) { return err; } size_t max_size; err = mLastTrack->sampleTable->getMaxSampleSize(&max_size); if (err != OK) { return err; } // Assume that a given buffer only contains at most 10 fragments, // each fragment originally prefixed with a 2 byte length will // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion, // and thus will grow by 2 bytes per fragment. mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2); *offset += chunk_size; // Calculate average frame rate. const char *mime; CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); if (!strncasecmp("video/", mime, 6)) { size_t nSamples = mLastTrack->sampleTable->countSamples(); int64_t durationUs; if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) { if (durationUs > 0) { int32_t frameRate = (nSamples * 1000000LL + (durationUs >> 1)) / durationUs; mLastTrack->meta->setInt32(kKeyFrameRate, frameRate); } } } break; } case FOURCC('s', 't', 't', 's'): { if (!mLastTrack || !mLastTrack->sampleTable.get()) return ERROR_MALFORMED; status_t err = mLastTrack->sampleTable->setTimeToSampleParams( data_offset, chunk_data_size); if (err != OK) { return err; } *offset += chunk_size; break; } case FOURCC('c', 't', 't', 's'): { if (!mLastTrack || !mLastTrack->sampleTable.get()) return ERROR_MALFORMED; status_t err = mLastTrack->sampleTable->setCompositionTimeToSampleParams( data_offset, chunk_data_size); if (err != OK) { return err; } *offset += chunk_size; break; } case FOURCC('s', 't', 's', 's'): { if (!mLastTrack || !mLastTrack->sampleTable.get()) return ERROR_MALFORMED; status_t err = mLastTrack->sampleTable->setSyncSampleParams( data_offset, chunk_data_size); if (err != OK) { return err; } *offset += chunk_size; break; } // @xyz case FOURCC('\xA9', 'x', 'y', 'z'): { // Best case the total data length inside "@xyz" box // would be 8, for instance "@xyz" + "\x00\x04\x15\xc7" + "0+0/", // where "\x00\x04" is the text string length with value = 4, // "\0x15\xc7" is the language code = en, and "0+0" is a // location (string) value with longitude = 0 and latitude = 0. if (chunk_data_size < 8) { return ERROR_MALFORMED; } // Worst case the location string length would be 18, // for instance +90.0000-180.0000, without the trailing "/" and // the string length + language code. char buffer[18]; // Substracting 5 from the data size is because the text string length + // language code takes 4 bytes, and the trailing slash "/" takes 1 byte. off64_t location_length = chunk_data_size - 5; if (location_length >= (off64_t) sizeof(buffer)) { return ERROR_MALFORMED; } if (mDataSource->readAt( data_offset + 4, buffer, location_length) < location_length) { return ERROR_IO; } buffer[location_length] = '\0'; mFileMetaData->setCString(kKeyLocation, buffer); *offset += chunk_size; break; } case FOURCC('e', 's', 'd', 's'): { if (chunk_data_size < 4) { return ERROR_MALFORMED; } uint8_t buffer[256]; if (chunk_data_size > (off64_t)sizeof(buffer)) { return ERROR_BUFFER_TOO_SMALL; } if (mDataSource->readAt( data_offset, buffer, chunk_data_size) < chunk_data_size) { return ERROR_IO; } if (U32_AT(buffer) != 0) { // Should be version 0, flags 0. return ERROR_MALFORMED; } mLastTrack->meta->setData( kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4); if (mPath.size() >= 2 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) { // Information from the ESDS must be relied on for proper // setup of sample rate and channel count for MPEG4 Audio. // The generic header appears to only contain generic // information... status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio( &buffer[4], chunk_data_size - 4); if (err != OK) { return err; } } *offset += chunk_size; break; } #ifdef QCOM_HARDWARE case FOURCC('d', 'd', 't', 's'): case FOURCC('d', 'a', 'c', '3'): case FOURCC('d', 'e', 'c', '3'): { //no information need to be passed here, just log and end ALOGV("ddts/dac3/dec3 pass from mpeg4 extractor"); *offset += chunk_size; break; } #endif case FOURCC('a', 'v', 'c', 'C'): { char buffer[256]; if (chunk_data_size > (off64_t)sizeof(buffer)) { return ERROR_BUFFER_TOO_SMALL; } if (mDataSource->readAt( data_offset, buffer, chunk_data_size) < chunk_data_size) { return ERROR_IO; } if (!mLastTrack) return ERROR_MALFORMED; mLastTrack->meta->setData( kKeyAVCC, kTypeAVCC, buffer, chunk_data_size); *offset += chunk_size; break; } case FOURCC('d', '2', '6', '3'): { /* * d263 contains a fixed 7 bytes part: * vendor - 4 bytes * version - 1 byte * level - 1 byte * profile - 1 byte * optionally, "d263" box itself may contain a 16-byte * bit rate box (bitr) * average bit rate - 4 bytes * max bit rate - 4 bytes */ char buffer[23]; if (chunk_data_size != 7 && chunk_data_size != 23) { ALOGE("Incorrect D263 box size %lld", chunk_data_size); return ERROR_MALFORMED; } if (mDataSource->readAt( data_offset, buffer, chunk_data_size) < chunk_data_size) { return ERROR_IO; } if (!mLastTrack) return ERROR_MALFORMED; mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size); *offset += chunk_size; break; } case FOURCC('m', 'e', 't', 'a'): { uint8_t buffer[4]; if (chunk_data_size < (off64_t)sizeof(buffer)) { return ERROR_MALFORMED; } if (mDataSource->readAt( data_offset, buffer, 4) < 4) { return ERROR_IO; } if (U32_AT(buffer) != 0) { // Should be version 0, flags 0. // If it's not, let's assume this is one of those // apparently malformed chunks that don't have flags // and completely different semantics than what's // in the MPEG4 specs and skip it. *offset += chunk_size; return OK; } off64_t stop_offset = *offset + chunk_size; *offset = data_offset + sizeof(buffer); while (*offset < stop_offset) { status_t err = parseChunk(offset, depth + 1); if (err != OK) { return err; } } if (*offset != stop_offset) { return ERROR_MALFORMED; } break; } case FOURCC('m', 'e', 'a', 'n'): case FOURCC('n', 'a', 'm', 'e'): case FOURCC('d', 'a', 't', 'a'): { if (mPath.size() == 6 && underMetaDataPath(mPath)) { status_t err = parseMetaData(data_offset, chunk_data_size); if (err != OK) { return err; } } *offset += chunk_size; break; } case FOURCC('m', 'v', 'h', 'd'): { if (chunk_data_size < 12) { return ERROR_MALFORMED; } uint8_t header[12]; if (mDataSource->readAt( data_offset, header, sizeof(header)) < (ssize_t)sizeof(header)) { return ERROR_IO; } int64_t creationTime; if (header[0] == 1) { creationTime = U64_AT(&header[4]); } else if (header[0] != 0) { return ERROR_MALFORMED; } else { creationTime = U32_AT(&header[4]); } String8 s; convertTimeToDate(creationTime, &s); mFileMetaData->setCString(kKeyDate, s.string()); *offset += chunk_size; break; } case FOURCC('m', 'd', 'a', 't'): { if (!mIsDrm) { *offset += chunk_size; break; } if (chunk_size < 8) { return ERROR_MALFORMED; } return parseDrmSINF(offset, data_offset); } case FOURCC('h', 'd', 'l', 'r'): { uint32_t buffer; if (mDataSource->readAt( data_offset + 8, &buffer, 4) < 4) { return ERROR_IO; } if (!mLastTrack) return ERROR_MALFORMED; uint32_t type = ntohl(buffer); // For the 3GPP file format, the handler-type within the 'hdlr' box // shall be 'text'. We also want to support 'sbtl' handler type // for a practical reason as various MPEG4 containers use it. if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) { mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP); } *offset += chunk_size; break; } case FOURCC('t', 'x', '3', 'g'): { if (!mLastTrack) return ERROR_MALFORMED; uint32_t type; const void *data; size_t size = 0; if (!mLastTrack->meta->findData( kKeyTextFormatData, &type, &data, &size)) { size = 0; } if ((chunk_size > SIZE_MAX) || (SIZE_MAX - chunk_size <= size)) { return ERROR_MALFORMED; } uint8_t *buffer = new uint8_t[size + chunk_size]; if (buffer == NULL) { return ERROR_MALFORMED; } if (size > 0) { memcpy(buffer, data, size); } if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size)) < chunk_size) { delete[] buffer; buffer = NULL; return ERROR_IO; } mLastTrack->meta->setData( kKeyTextFormatData, 0, buffer, size + chunk_size); delete[] buffer; *offset += chunk_size; break; } case FOURCC('c', 'o', 'v', 'r'): { if (mFileMetaData != NULL) { ALOGV("chunk_data_size = %lld and data_offset = %lld", (long long)chunk_data_size, (long long)data_offset); if (chunk_data_size >= SIZE_MAX - 1) { return ERROR_MALFORMED; } uint8_t *buffer = new uint8_t[chunk_data_size + 1]; if (mDataSource->readAt( data_offset, buffer, chunk_data_size) != (ssize_t)chunk_data_size) { delete[] buffer; buffer = NULL; return ERROR_IO; } const int kSkipBytesOfDataBox = 16; if (chunk_data_size <= kSkipBytesOfDataBox) { return ERROR_MALFORMED; } mFileMetaData->setData( kKeyAlbumArt, MetaData::TYPE_NONE, buffer + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox); } *offset += chunk_size; break; } case FOURCC('-', '-', '-', '-'): { mLastCommentMean.clear(); mLastCommentName.clear(); mLastCommentData.clear(); *offset += chunk_size; break; } default: { *offset += chunk_size; break; } } return OK; } status_t MPEG4Extractor::parseTrackHeader( off64_t data_offset, off64_t data_size) { if (data_size < 4) { return ERROR_MALFORMED; } uint8_t version; if (mDataSource->readAt(data_offset, &version, 1) < 1) { return ERROR_IO; } size_t dynSize = (version == 1) ? 36 : 24; uint8_t buffer[36 + 60]; if (data_size != (off64_t)dynSize + 60) { return ERROR_MALFORMED; } if (mDataSource->readAt( data_offset, buffer, data_size) < (ssize_t)data_size) { return ERROR_IO; } uint64_t ctime, mtime, duration; int32_t id; if (version == 1) { ctime = U64_AT(&buffer[4]); mtime = U64_AT(&buffer[12]); id = U32_AT(&buffer[20]); duration = U64_AT(&buffer[28]); } else { CHECK_EQ((unsigned)version, 0u); ctime = U32_AT(&buffer[4]); mtime = U32_AT(&buffer[8]); id = U32_AT(&buffer[12]); duration = U32_AT(&buffer[20]); } if (!mLastTrack) return ERROR_MALFORMED; mLastTrack->meta->setInt32(kKeyTrackID, id); size_t matrixOffset = dynSize + 16; int32_t a00 = U32_AT(&buffer[matrixOffset]); int32_t a01 = U32_AT(&buffer[matrixOffset + 4]); int32_t dx = U32_AT(&buffer[matrixOffset + 8]); int32_t a10 = U32_AT(&buffer[matrixOffset + 12]); int32_t a11 = U32_AT(&buffer[matrixOffset + 16]); int32_t dy = U32_AT(&buffer[matrixOffset + 20]); #if 0 ALOGI("x' = %.2f * x + %.2f * y + %.2f", a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f); ALOGI("y' = %.2f * x + %.2f * y + %.2f", a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f); #endif uint32_t rotationDegrees; static const int32_t kFixedOne = 0x10000; if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) { // Identity, no rotation rotationDegrees = 0; } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) { rotationDegrees = 90; } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) { rotationDegrees = 270; } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) { rotationDegrees = 180; } else { ALOGW("We only support 0,90,180,270 degree rotation matrices"); rotationDegrees = 0; } if (rotationDegrees != 0) { mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees); } // Handle presentation display size, which could be different // from the image size indicated by kKeyWidth and kKeyHeight. uint32_t width = U32_AT(&buffer[dynSize + 52]); uint32_t height = U32_AT(&buffer[dynSize + 56]); mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16); mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16); return OK; } status_t MPEG4Extractor::parseMetaData(off64_t offset, size_t size) { if (size < 4) { return ERROR_MALFORMED; } uint8_t *buffer = new uint8_t[size + 1]; if (mDataSource->readAt( offset, buffer, size) != (ssize_t)size) { delete[] buffer; buffer = NULL; return ERROR_IO; } uint32_t flags = U32_AT(buffer); uint32_t metadataKey = 0; char chunk[5]; MakeFourCCString(mPath[4], chunk); ALOGV("meta: %s @ %lld", chunk, offset); switch (mPath[4]) { case FOURCC(0xa9, 'a', 'l', 'b'): { metadataKey = kKeyAlbum; break; } case FOURCC(0xa9, 'A', 'R', 'T'): { metadataKey = kKeyArtist; break; } case FOURCC('a', 'A', 'R', 'T'): { metadataKey = kKeyAlbumArtist; break; } case FOURCC(0xa9, 'd', 'a', 'y'): { metadataKey = kKeyYear; break; } case FOURCC(0xa9, 'n', 'a', 'm'): { metadataKey = kKeyTitle; break; } case FOURCC(0xa9, 'w', 'r', 't'): { metadataKey = kKeyWriter; break; } case FOURCC('c', 'o', 'v', 'r'): { metadataKey = kKeyAlbumArt; break; } case FOURCC('g', 'n', 'r', 'e'): { metadataKey = kKeyGenre; break; } case FOURCC(0xa9, 'g', 'e', 'n'): { metadataKey = kKeyGenre; break; } case FOURCC('c', 'p', 'i', 'l'): { if (size == 9 && flags == 21) { char tmp[16]; sprintf(tmp, "%d", (int)buffer[size - 1]); mFileMetaData->setCString(kKeyCompilation, tmp); } break; } case FOURCC('t', 'r', 'k', 'n'): { if (size == 16 && flags == 0) { char tmp[16]; sprintf(tmp, "%d/%d", (int)buffer[size - 5], (int)buffer[size - 3]); mFileMetaData->setCString(kKeyCDTrackNumber, tmp); } break; } case FOURCC('d', 'i', 's', 'k'): { if (size == 14 && flags == 0) { char tmp[16]; sprintf(tmp, "%d/%d", (int)buffer[size - 3], (int)buffer[size - 1]); mFileMetaData->setCString(kKeyDiscNumber, tmp); } break; } case FOURCC('-', '-', '-', '-'): { buffer[size] = '\0'; switch (mPath[5]) { case FOURCC('m', 'e', 'a', 'n'): mLastCommentMean.setTo((const char *)buffer + 4); break; case FOURCC('n', 'a', 'm', 'e'): mLastCommentName.setTo((const char *)buffer + 4); break; case FOURCC('d', 'a', 't', 'a'): mLastCommentData.setTo((const char *)buffer + 8); break; } // Once we have a set of mean/name/data info, go ahead and process // it to see if its something we are interested in. Whether or not // were are interested in the specific tag, make sure to clear out // the set so we can be ready to process another tuple should one // show up later in the file. if ((mLastCommentMean.length() != 0) && (mLastCommentName.length() != 0) && (mLastCommentData.length() != 0)) { if (mLastCommentMean == "com.apple.iTunes" && mLastCommentName == "iTunSMPB") { int32_t delay, padding; if (sscanf(mLastCommentData, " %*x %x %x %*x", &delay, &padding) == 2) { if (!mLastTrack) return ERROR_MALFORMED; mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); mLastTrack->meta->setInt32(kKeyEncoderPadding, padding); } } mLastCommentMean.clear(); mLastCommentName.clear(); mLastCommentData.clear(); } break; } default: break; } if (size >= 8 && metadataKey) { if (metadataKey == kKeyAlbumArt) { mFileMetaData->setData( kKeyAlbumArt, MetaData::TYPE_NONE, buffer + 8, size - 8); } else if (metadataKey == kKeyGenre) { if (flags == 0) { // uint8_t genre code, iTunes genre codes are // the standard id3 codes, except they start // at 1 instead of 0 (e.g. Pop is 14, not 13) // We use standard id3 numbering, so subtract 1. int genrecode = (int)buffer[size - 1]; genrecode--; if (genrecode < 0) { genrecode = 255; // reserved for 'unknown genre' } char genre[10]; sprintf(genre, "%d", genrecode); mFileMetaData->setCString(metadataKey, genre); } else if (flags == 1) { // custom genre string buffer[size] = '\0'; mFileMetaData->setCString( metadataKey, (const char *)buffer + 8); } } else { buffer[size] = '\0'; mFileMetaData->setCString( metadataKey, (const char *)buffer + 8); } } delete[] buffer; buffer = NULL; return OK; } sp MPEG4Extractor::getTrack(size_t index) { status_t err; if ((err = readMetaData()) != OK) { return NULL; } Track *track = mFirstTrack; while (index > 0) { if (track == NULL) { return NULL; } track = track->next; --index; } if (track == NULL) { return NULL; } return new MPEG4Source( track->meta, mDataSource, track->timescale, track->sampleTable); } // static status_t MPEG4Extractor::verifyTrack(Track *track) { const char *mime; CHECK(track->meta->findCString(kKeyMIMEType, &mime)); uint32_t type; const void *data; size_t size; if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { if (!track->meta->findData(kKeyAVCC, &type, &data, &size) || type != kTypeAVCC) { return ERROR_MALFORMED; } } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4) || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) { if (!track->meta->findData(kKeyESDS, &type, &data, &size) || type != kTypeESDS) { return ERROR_MALFORMED; } } if (!track->sampleTable->isValid()) { // Make sure we have all the metadata we need. return ERROR_MALFORMED; } return OK; } status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio( const void *esds_data, size_t esds_size) { ESDS esds(esds_data, esds_size); uint8_t objectTypeIndication; if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) { return ERROR_MALFORMED; } #ifdef QCOM_HARDWARE if (objectTypeIndication == 0xA0) { // This isn't MPEG4 audio at all, it's EVRC mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_EVRC); return OK; } #endif if (objectTypeIndication == 0xe1) { // This isn't MPEG4 audio at all, it's QCELP 14k... if (!mLastTrack) return ERROR_MALFORMED; mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP); return OK; } if (objectTypeIndication == 0x6b || objectTypeIndication == 0x69) { // This is mpeg1/2 audio content, set mimetype to mpeg mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG); ALOGD("objectTypeIndication:0x%x, set mimetype to mpeg ",objectTypeIndication); return OK; } const uint8_t *csd; size_t csd_size; if (esds.getCodecSpecificInfo( (const void **)&csd, &csd_size) != OK) { return ERROR_MALFORMED; } #if 0 printf("ESD of size %d\n", csd_size); hexdump(csd, csd_size); #endif if (csd_size == 0) { // There's no further information, i.e. no codec specific data // Let's assume that the information provided in the mpeg4 headers // is accurate and hope for the best. return OK; } if (csd_size < 2) { return ERROR_MALFORMED; } ABitReader br(csd, csd_size); uint32_t objectType = br.getBits(5); if (objectType == 31) { // AAC-ELD => additional 6 bits objectType = 32 + br.getBits(6); } if (!mLastTrack) return ERROR_MALFORMED; uint32_t freqIndex = br.getBits(4); int32_t sampleRate = 0; int32_t numChannels = 0; if (freqIndex == 15) { if (csd_size < 5) { return ERROR_MALFORMED; } sampleRate = br.getBits(24); numChannels = br.getBits(4); } else { static uint32_t kSamplingRate[] = { 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 16000, 12000, 11025, 8000, 7350 }; if (freqIndex == 13 || freqIndex == 14) { return ERROR_MALFORMED; } sampleRate = kSamplingRate[freqIndex]; numChannels = br.getBits(4); } if (numChannels == 0) { return ERROR_UNSUPPORTED; } if (!mLastTrack) return ERROR_MALFORMED; int32_t prevSampleRate; CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate)); if (prevSampleRate != sampleRate) { ALOGV("mpeg4 audio sample rate different from previous setting. " "was: %d, now: %d", prevSampleRate, sampleRate); } mLastTrack->meta->setInt32(kKeySampleRate, sampleRate); int32_t prevChannelCount; CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount)); if (prevChannelCount != numChannels) { ALOGV("mpeg4 audio channel count different from previous setting. " "was: %d, now: %d", prevChannelCount, numChannels); } mLastTrack->meta->setInt32(kKeyChannelCount, numChannels); return OK; } //////////////////////////////////////////////////////////////////////////////// MPEG4Source::MPEG4Source( const sp &format, const sp &dataSource, int32_t timeScale, const sp &sampleTable) : mFormat(format), mDataSource(dataSource), mTimescale(timeScale), mSampleTable(sampleTable), mCurrentSampleIndex(0), mIsAVC(false), mNALLengthSize(0), mStarted(false), mGroup(NULL), mBuffer(NULL), mWantsNALFragments(false), mSrcBuffer(NULL) { const char *mime; bool success = mFormat->findCString(kKeyMIMEType, &mime); CHECK(success); mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC); if (mIsAVC) { uint32_t type; const void *data; size_t size; CHECK(format->findData(kKeyAVCC, &type, &data, &size)); const uint8_t *ptr = (const uint8_t *)data; CHECK(size >= 7); CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 // The number of bytes used to encode the length of a NAL unit. mNALLengthSize = 1 + (ptr[4] & 3); } #ifdef QCOM_HARDWARE //MPEG4 extractor can give complete frames, //set arbitrary mode to false mFormat->setInt32(kKeyUseArbitraryMode, 0); #endif } MPEG4Source::~MPEG4Source() { if (mStarted) { stop(); } } status_t MPEG4Source::start(MetaData *params) { Mutex::Autolock autoLock(mLock); CHECK(!mStarted); int32_t val; if (params && params->findInt32(kKeyWantsNALFragments, &val) && val != 0) { mWantsNALFragments = true; } else { mWantsNALFragments = false; } mGroup = new MediaBufferGroup; int32_t max_size; CHECK(mFormat->findInt32(kKeyMaxInputSize, &max_size)); mGroup->add_buffer(new MediaBuffer(max_size)); mSrcBuffer = new uint8_t[max_size]; mStarted = true; return OK; } status_t MPEG4Source::stop() { Mutex::Autolock autoLock(mLock); CHECK(mStarted); if (mBuffer != NULL) { mBuffer->release(); mBuffer = NULL; } delete[] mSrcBuffer; mSrcBuffer = NULL; delete mGroup; mGroup = NULL; mStarted = false; mCurrentSampleIndex = 0; return OK; } sp MPEG4Source::getFormat() { Mutex::Autolock autoLock(mLock); return mFormat; } size_t MPEG4Source::parseNALSize(const uint8_t *data) const { switch (mNALLengthSize) { case 1: return *data; case 2: return U16_AT(data); case 3: return ((size_t)data[0] << 16) | U16_AT(&data[1]); case 4: return U32_AT(data); } // This cannot happen, mNALLengthSize springs to life by adding 1 to // a 2-bit integer. CHECK(!"Should not be here."); return 0; } status_t MPEG4Source::read( MediaBuffer **out, const ReadOptions *options) { Mutex::Autolock autoLock(mLock); CHECK(mStarted); *out = NULL; int64_t targetSampleTimeUs = -1; int64_t seekTimeUs; ReadOptions::SeekMode mode; if (options && options->getSeekTo(&seekTimeUs, &mode)) { uint32_t findFlags = 0; switch (mode) { case ReadOptions::SEEK_PREVIOUS_SYNC: findFlags = SampleTable::kFlagBefore; break; case ReadOptions::SEEK_NEXT_SYNC: findFlags = SampleTable::kFlagAfter; break; case ReadOptions::SEEK_CLOSEST_SYNC: case ReadOptions::SEEK_CLOSEST: findFlags = SampleTable::kFlagClosest; break; default: CHECK(!"Should not be here."); break; } uint32_t sampleIndex; status_t err = mSampleTable->findSampleAtTime( seekTimeUs * mTimescale / 1000000, &sampleIndex, findFlags); if (mode == ReadOptions::SEEK_CLOSEST) { // We found the closest sample already, now we want the sync // sample preceding it (or the sample itself of course), even // if the subsequent sync sample is closer. findFlags = SampleTable::kFlagBefore; } uint32_t syncSampleIndex; if (err == OK) { err = mSampleTable->findSyncSampleNear( sampleIndex, &syncSampleIndex, findFlags); } uint32_t sampleTime; if (err == OK) { err = mSampleTable->getMetaDataForSample( sampleIndex, NULL, NULL, &sampleTime); } if (err != OK) { if (err == ERROR_OUT_OF_RANGE) { // An attempt to seek past the end of the stream would // normally cause this ERROR_OUT_OF_RANGE error. Propagating // this all the way to the MediaPlayer would cause abnormal // termination. Legacy behaviour appears to be to behave as if // we had seeked to the end of stream, ending normally. err = ERROR_END_OF_STREAM; } return err; } if (mode == ReadOptions::SEEK_CLOSEST) { targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale; } #if 0 uint32_t syncSampleTime; CHECK_EQ(OK, mSampleTable->getMetaDataForSample( syncSampleIndex, NULL, NULL, &syncSampleTime)); ALOGI("seek to time %lld us => sample at time %lld us, " "sync sample at time %lld us", seekTimeUs, sampleTime * 1000000ll / mTimescale, syncSampleTime * 1000000ll / mTimescale); #endif mCurrentSampleIndex = syncSampleIndex; if (mBuffer != NULL) { mBuffer->release(); mBuffer = NULL; } // fall through } off64_t offset; size_t size; uint32_t cts; bool isSyncSample; bool newBuffer = false; if (mBuffer == NULL) { newBuffer = true; status_t err = mSampleTable->getMetaDataForSample( mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample); if (err != OK) { return err; } err = mGroup->acquire_buffer(&mBuffer); if (err != OK) { CHECK(mBuffer == NULL); return err; } } if (!mIsAVC || mWantsNALFragments) { if (newBuffer) { ssize_t num_bytes_read = mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); if (num_bytes_read < (ssize_t)size) { mBuffer->release(); mBuffer = NULL; return ERROR_IO; } CHECK(mBuffer != NULL); mBuffer->set_range(0, size); mBuffer->meta_data()->clear(); mBuffer->meta_data()->setInt64( kKeyTime, ((int64_t)cts * 1000000) / mTimescale); if (targetSampleTimeUs >= 0) { mBuffer->meta_data()->setInt64( kKeyTargetTime, targetSampleTimeUs); } if (isSyncSample) { mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); } ++mCurrentSampleIndex; } if (!mIsAVC) { *out = mBuffer; mBuffer = NULL; return OK; } // Each NAL unit is split up into its constituent fragments and // each one of them returned in its own buffer. CHECK(mBuffer->range_length() >= mNALLengthSize); const uint8_t *src = (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); size_t nal_size = parseNALSize(src); if (mBuffer->range_length() < mNALLengthSize + nal_size) { ALOGE("incomplete NAL unit."); mBuffer->release(); mBuffer = NULL; return ERROR_MALFORMED; } MediaBuffer *clone = mBuffer->clone(); CHECK(clone != NULL); clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); CHECK(mBuffer != NULL); mBuffer->set_range( mBuffer->range_offset() + mNALLengthSize + nal_size, mBuffer->range_length() - mNALLengthSize - nal_size); if (mBuffer->range_length() == 0) { mBuffer->release(); mBuffer = NULL; } *out = clone; return OK; } else { // Whole NAL units are returned but each fragment is prefixed by // the start code (0x00 00 00 01). ssize_t num_bytes_read = 0; int32_t drm = 0; bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); if (usesDRM) { num_bytes_read = mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size); } else { num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size); } if (num_bytes_read < (ssize_t)size) { mBuffer->release(); mBuffer = NULL; return ERROR_IO; } if (usesDRM) { CHECK(mBuffer != NULL); mBuffer->set_range(0, size); } else { uint8_t *dstData = (uint8_t *)mBuffer->data(); size_t srcOffset = 0; size_t dstOffset = 0; while (srcOffset < size) { bool isMalFormed = (srcOffset + mNALLengthSize > size); size_t nalLength = 0; if (!isMalFormed) { nalLength = parseNALSize(&mSrcBuffer[srcOffset]); srcOffset += mNALLengthSize; isMalFormed = srcOffset + nalLength > size; } if (isMalFormed) { ALOGE("Video is malformed"); mBuffer->release(); mBuffer = NULL; return ERROR_MALFORMED; } if (nalLength == 0) { continue; } CHECK(dstOffset + 4 <= mBuffer->size()); dstData[dstOffset++] = 0; dstData[dstOffset++] = 0; dstData[dstOffset++] = 0; dstData[dstOffset++] = 1; memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); srcOffset += nalLength; dstOffset += nalLength; } CHECK_EQ(srcOffset, size); CHECK(mBuffer != NULL); mBuffer->set_range(0, dstOffset); } mBuffer->meta_data()->clear(); mBuffer->meta_data()->setInt64( kKeyTime, ((int64_t)cts * 1000000) / mTimescale); if (targetSampleTimeUs >= 0) { mBuffer->meta_data()->setInt64( kKeyTargetTime, targetSampleTimeUs); } if (isSyncSample) { mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); } ++mCurrentSampleIndex; *out = mBuffer; mBuffer = NULL; return OK; } } MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix( const char *mimePrefix) { for (Track *track = mFirstTrack; track != NULL; track = track->next) { const char *mime; if (track->meta != NULL && track->meta->findCString(kKeyMIMEType, &mime) && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) { return track; } } return NULL; } static bool LegacySniffMPEG4( const sp &source, String8 *mimeType, float *confidence) { uint8_t header[8]; ssize_t n = source->readAt(4, header, sizeof(header)); if (n < (ssize_t)sizeof(header)) { return false; } if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8) #ifdef QCOM_HARDWARE || !memcmp(header, "ftyp3g2a", 8) || !memcmp(header, "ftyp3g2b", 8) || !memcmp(header, "ftyp3g2c", 8) #endif || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8) || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8) || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8) || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8) || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)) { *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; *confidence = 0.4; return true; } return false; } static bool isCompatibleBrand(uint32_t fourcc) { static const uint32_t kCompatibleBrands[] = { FOURCC('i', 's', 'o', 'm'), FOURCC('i', 's', 'o', '2'), FOURCC('a', 'v', 'c', '1'), FOURCC('3', 'g', 'p', '4'), FOURCC('m', 'p', '4', '1'), FOURCC('m', 'p', '4', '2'), // Won't promise that the following file types can be played. // Just give these file types a chance. FOURCC('q', 't', ' ', ' '), // Apple's QuickTime FOURCC('M', 'S', 'N', 'V'), // Sony's PSP FOURCC('3', 'g', '2', 'a'), // 3GPP2 FOURCC('3', 'g', '2', 'b'), }; for (size_t i = 0; i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]); ++i) { if (kCompatibleBrands[i] == fourcc) { return true; } } return false; } // Attempt to actually parse the 'ftyp' atom and determine if a suitable // compatible brand is present. // Also try to identify where this file's metadata ends // (end of the 'moov' atom) and report it to the caller as part of // the metadata. static bool BetterSniffMPEG4( const sp &source, String8 *mimeType, float *confidence, sp *meta) { // We scan up to 128 bytes to identify this file as an MP4. static const off64_t kMaxScanOffset = 128ll; off64_t offset = 0ll; bool foundGoodFileType = false; off64_t moovAtomEndOffset = -1ll; bool done = false; while (!done && offset < kMaxScanOffset) { uint32_t hdr[2]; if (source->readAt(offset, hdr, 8) < 8) { return false; } uint64_t chunkSize = ntohl(hdr[0]); uint32_t chunkType = ntohl(hdr[1]); off64_t chunkDataOffset = offset + 8; if (chunkSize == 1) { if (source->readAt(offset + 8, &chunkSize, 8) < 8) { return false; } chunkSize = ntoh64(chunkSize); chunkDataOffset += 8; if (chunkSize < 16) { // The smallest valid chunk is 16 bytes long in this case. return false; } } else if (chunkSize < 8) { // The smallest valid chunk is 8 bytes long. return false; } off64_t chunkDataSize = offset + chunkSize - chunkDataOffset; switch (chunkType) { case FOURCC('f', 't', 'y', 'p'): { if (chunkDataSize < 8) { return false; } uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4; for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { if (i == 1) { // Skip this index, it refers to the minorVersion, // not a brand. continue; } uint32_t brand; if (source->readAt( chunkDataOffset + 4 * i, &brand, 4) < 4) { return false; } brand = ntohl(brand); if (isCompatibleBrand(brand)) { foundGoodFileType = true; break; } } if (!foundGoodFileType) { return false; } break; } case FOURCC('m', 'o', 'o', 'v'): { moovAtomEndOffset = offset + chunkSize; done = true; break; } default: break; } offset += chunkSize; } if (!foundGoodFileType) { return false; } *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; *confidence = 0.4f; if (moovAtomEndOffset >= 0) { *meta = new AMessage; (*meta)->setInt64("meta-data-size", moovAtomEndOffset); ALOGV("found metadata size: %lld", moovAtomEndOffset); } return true; } bool SniffMPEG4( const sp &source, String8 *mimeType, float *confidence, sp *meta) { if (BetterSniffMPEG4(source, mimeType, confidence, meta)) { return true; } if (LegacySniffMPEG4(source, mimeType, confidence)) { ALOGW("Identified supported mpeg4 through LegacySniffMPEG4."); return true; } return false; } } // namespace android