diff options
Diffstat (limited to 'media/libstagefright/MPEG4Extractor.cpp')
-rw-r--r-- | media/libstagefright/MPEG4Extractor.cpp | 2477 |
1 files changed, 2477 insertions, 0 deletions
diff --git a/media/libstagefright/MPEG4Extractor.cpp b/media/libstagefright/MPEG4Extractor.cpp new file mode 100644 index 0000000..9385b8a --- /dev/null +++ b/media/libstagefright/MPEG4Extractor.cpp @@ -0,0 +1,2477 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define LOG_TAG "MPEG4Extractor" +#include <utils/Log.h> + +#include "include/MPEG4Extractor.h" +#include "include/SampleTable.h" +#include "include/ESDS.h" + +#include <arpa/inet.h> + +#include <ctype.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include <media/stagefright/foundation/ADebug.h> +#include <media/stagefright/foundation/AMessage.h> +#include <media/stagefright/DataSource.h> +#include <media/stagefright/MediaBuffer.h> +#include <media/stagefright/MediaBufferGroup.h> +#include <media/stagefright/MediaDefs.h> +#include <media/stagefright/MediaSource.h> +#include <media/stagefright/MetaData.h> +#include <media/stagefright/Utils.h> +#include <utils/String8.h> + +namespace android { + +class MPEG4Source : public MediaSource { +public: + // Caller retains ownership of both "dataSource" and "sampleTable". + MPEG4Source(const sp<MetaData> &format, + const sp<DataSource> &dataSource, + int32_t timeScale, + const sp<SampleTable> &sampleTable); + + virtual status_t start(MetaData *params = NULL); + virtual status_t stop(); + + virtual sp<MetaData> getFormat(); + + virtual status_t read( + MediaBuffer **buffer, const ReadOptions *options = NULL); + +protected: + virtual ~MPEG4Source(); + +private: + Mutex mLock; + + sp<MetaData> mFormat; + sp<DataSource> mDataSource; + int32_t mTimescale; + sp<SampleTable> mSampleTable; + uint32_t mCurrentSampleIndex; + + bool mIsAVC; + size_t mNALLengthSize; + + bool mStarted; + + MediaBufferGroup *mGroup; + + MediaBuffer *mBuffer; + + bool mWantsNALFragments; + + uint8_t *mSrcBuffer; + + size_t parseNALSize(const uint8_t *data) const; + + MPEG4Source(const MPEG4Source &); + MPEG4Source &operator=(const MPEG4Source &); +}; + +// This custom data source wraps an existing one and satisfies requests +// falling entirely within a cached range from the cache while forwarding +// all remaining requests to the wrapped datasource. +// This is used to cache the full sampletable metadata for a single track, +// possibly wrapping multiple times to cover all tracks, i.e. +// Each MPEG4DataSource caches the sampletable metadata for a single track. + +struct MPEG4DataSource : public DataSource { + MPEG4DataSource(const sp<DataSource> &source); + + virtual status_t initCheck() const; + virtual ssize_t readAt(off64_t offset, void *data, size_t size); + virtual status_t getSize(off64_t *size); + virtual uint32_t flags(); + + status_t setCachedRange(off64_t offset, size_t size); + +protected: + virtual ~MPEG4DataSource(); + +private: + Mutex mLock; + + sp<DataSource> mSource; + off64_t mCachedOffset; + size_t mCachedSize; + uint8_t *mCache; + + void clearCache(); + + MPEG4DataSource(const MPEG4DataSource &); + MPEG4DataSource &operator=(const MPEG4DataSource &); +}; + +MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source) + : mSource(source), + mCachedOffset(0), + mCachedSize(0), + mCache(NULL) { +} + +MPEG4DataSource::~MPEG4DataSource() { + clearCache(); +} + +void MPEG4DataSource::clearCache() { + if (mCache) { + free(mCache); + mCache = NULL; + } + + mCachedOffset = 0; + mCachedSize = 0; +} + +status_t MPEG4DataSource::initCheck() const { + return mSource->initCheck(); +} + +ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) { + Mutex::Autolock autoLock(mLock); + + if (offset >= mCachedOffset + && offset + size <= mCachedOffset + mCachedSize) { + memcpy(data, &mCache[offset - mCachedOffset], size); + return size; + } + + return mSource->readAt(offset, data, size); +} + +status_t MPEG4DataSource::getSize(off64_t *size) { + return mSource->getSize(size); +} + +uint32_t MPEG4DataSource::flags() { + return mSource->flags(); +} + +status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) { + Mutex::Autolock autoLock(mLock); + + clearCache(); + + mCache = (uint8_t *)malloc(size); + + if (mCache == NULL) { + return -ENOMEM; + } + + mCachedOffset = offset; + mCachedSize = size; + + ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize); + + if (err < (ssize_t)size) { + clearCache(); + + return ERROR_IO; + } + + return OK; +} + +//////////////////////////////////////////////////////////////////////////////// + +static void hexdump(const void *_data, size_t size) { + const uint8_t *data = (const uint8_t *)_data; + size_t offset = 0; + while (offset < size) { + printf("0x%04x ", offset); + + size_t n = size - offset; + if (n > 16) { + n = 16; + } + + for (size_t i = 0; i < 16; ++i) { + if (i == 8) { + printf(" "); + } + + if (offset + i < size) { + printf("%02x ", data[offset + i]); + } else { + printf(" "); + } + } + + printf(" "); + + for (size_t i = 0; i < n; ++i) { + if (isprint(data[offset + i])) { + printf("%c", data[offset + i]); + } else { + printf("."); + } + } + + printf("\n"); + + offset += 16; + } +} + +static const char *FourCC2MIME(uint32_t fourcc) { + switch (fourcc) { + case FOURCC('m', 'p', '4', 'a'): + return MEDIA_MIMETYPE_AUDIO_AAC; + + case FOURCC('s', 'a', 'm', 'r'): + return MEDIA_MIMETYPE_AUDIO_AMR_NB; + + case FOURCC('s', 'a', 'w', 'b'): + return MEDIA_MIMETYPE_AUDIO_AMR_WB; + + case FOURCC('m', 'p', '4', 'v'): + return MEDIA_MIMETYPE_VIDEO_MPEG4; + + case FOURCC('s', '2', '6', '3'): + case FOURCC('h', '2', '6', '3'): + case FOURCC('H', '2', '6', '3'): + return MEDIA_MIMETYPE_VIDEO_H263; + + case FOURCC('a', 'v', 'c', '1'): + return MEDIA_MIMETYPE_VIDEO_AVC; + + default: + CHECK(!"should not be here."); + return NULL; + } +} + +MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source) + : mDataSource(source), + mInitCheck(NO_INIT), + mHasVideo(false), + mFirstTrack(NULL), + mLastTrack(NULL), + mFileMetaData(new MetaData), + mFirstSINF(NULL), + mIsDrm(false) { +} + +MPEG4Extractor::~MPEG4Extractor() { + Track *track = mFirstTrack; + while (track) { + Track *next = track->next; + + delete track; + track = next; + } + mFirstTrack = mLastTrack = NULL; + + SINF *sinf = mFirstSINF; + while (sinf) { + SINF *next = sinf->next; + delete sinf->IPMPData; + delete sinf; + sinf = next; + } + mFirstSINF = NULL; +} + +sp<MetaData> MPEG4Extractor::getMetaData() { + status_t err; + if ((err = readMetaData()) != OK) { + return new MetaData; + } + + return mFileMetaData; +} + +size_t MPEG4Extractor::countTracks() { + status_t err; + if ((err = readMetaData()) != OK) { + return 0; + } + + size_t n = 0; + Track *track = mFirstTrack; + while (track) { + ++n; + track = track->next; + } + + return n; +} + +sp<MetaData> MPEG4Extractor::getTrackMetaData( + size_t index, uint32_t flags) { + status_t err; + if ((err = readMetaData()) != OK) { + return NULL; + } + + Track *track = mFirstTrack; + while (index > 0) { + if (track == NULL) { + return NULL; + } + + track = track->next; + --index; + } + + if (track == NULL) { + return NULL; + } + + if ((flags & kIncludeExtensiveMetaData) + && !track->includes_expensive_metadata) { + track->includes_expensive_metadata = true; + + const char *mime; + CHECK(track->meta->findCString(kKeyMIMEType, &mime)); + if (!strncasecmp("video/", mime, 6)) { + uint32_t sampleIndex; + uint32_t sampleTime; + if (track->sampleTable->findThumbnailSample(&sampleIndex) == OK + && track->sampleTable->getMetaDataForSample( + sampleIndex, NULL /* offset */, NULL /* size */, + &sampleTime) == OK) { + track->meta->setInt64( + kKeyThumbnailTime, + ((int64_t)sampleTime * 1000000) / track->timescale); + } + } + } + + return track->meta; +} + +status_t MPEG4Extractor::readMetaData() { + if (mInitCheck != NO_INIT) { + return mInitCheck; + } + + off64_t offset = 0; + status_t err; + while ((err = parseChunk(&offset, 0)) == OK) { + } + + if (mInitCheck == OK) { + if (mHasVideo) { + mFileMetaData->setCString( + kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4); + } else { + mFileMetaData->setCString(kKeyMIMEType, "audio/mp4"); + } + + mInitCheck = OK; + } else { + mInitCheck = err; + } + + CHECK_NE(err, (status_t)NO_INIT); + return mInitCheck; +} + +char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) { + if (mFirstSINF == NULL) { + return NULL; + } + + SINF *sinf = mFirstSINF; + while (sinf && (trackID != sinf->trackID)) { + sinf = sinf->next; + } + + if (sinf == NULL) { + return NULL; + } + + *len = sinf->len; + return sinf->IPMPData; +} + +// Reads an encoded integer 7 bits at a time until it encounters the high bit clear. +int32_t readSize(off64_t offset, + const sp<DataSource> DataSource, uint8_t *numOfBytes) { + uint32_t size = 0; + uint8_t data; + bool moreData = true; + *numOfBytes = 0; + + while (moreData) { + if (DataSource->readAt(offset, &data, 1) < 1) { + return -1; + } + offset ++; + moreData = (data >= 128) ? true : false; + size = (size << 7) | (data & 0x7f); // Take last 7 bits + (*numOfBytes) ++; + } + + return size; +} + +status_t MPEG4Extractor::parseDrmSINF(off64_t *offset, off64_t data_offset) { + uint8_t updateIdTag; + if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { + return ERROR_IO; + } + data_offset ++; + + if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { + return ERROR_MALFORMED; + } + + uint8_t numOfBytes; + int32_t size = readSize(data_offset, mDataSource, &numOfBytes); + if (size < 0) { + return ERROR_IO; + } + int32_t classSize = size; + data_offset += numOfBytes; + + while(size >= 11 ) { + uint8_t descriptorTag; + if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) { + return ERROR_IO; + } + data_offset ++; + + if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) { + return ERROR_MALFORMED; + } + + uint8_t buffer[8]; + //ObjectDescriptorID and ObjectDescriptor url flag + if (mDataSource->readAt(data_offset, buffer, 2) < 2) { + return ERROR_IO; + } + data_offset += 2; + + if ((buffer[1] >> 5) & 0x0001) { //url flag is set + return ERROR_MALFORMED; + } + + if (mDataSource->readAt(data_offset, buffer, 8) < 8) { + return ERROR_IO; + } + data_offset += 8; + + if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1]) + || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) { + return ERROR_MALFORMED; + } + + SINF *sinf = new SINF; + sinf->trackID = U16_AT(&buffer[3]); + sinf->IPMPDescriptorID = buffer[7]; + sinf->next = mFirstSINF; + mFirstSINF = sinf; + + size -= (8 + 2 + 1); + } + + if (size != 0) { + return ERROR_MALFORMED; + } + + if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { + return ERROR_IO; + } + data_offset ++; + + if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { + return ERROR_MALFORMED; + } + + size = readSize(data_offset, mDataSource, &numOfBytes); + if (size < 0) { + return ERROR_IO; + } + classSize = size; + data_offset += numOfBytes; + + while (size > 0) { + uint8_t tag; + int32_t dataLen; + if (mDataSource->readAt(data_offset, &tag, 1) < 1) { + return ERROR_IO; + } + data_offset ++; + + if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) { + uint8_t id; + dataLen = readSize(data_offset, mDataSource, &numOfBytes); + if (dataLen < 0) { + return ERROR_IO; + } else if (dataLen < 4) { + return ERROR_MALFORMED; + } + data_offset += numOfBytes; + + if (mDataSource->readAt(data_offset, &id, 1) < 1) { + return ERROR_IO; + } + data_offset ++; + + SINF *sinf = mFirstSINF; + while (sinf && (sinf->IPMPDescriptorID != id)) { + sinf = sinf->next; + } + if (sinf == NULL) { + return ERROR_MALFORMED; + } + sinf->len = dataLen - 3; + sinf->IPMPData = new char[sinf->len]; + + if (mDataSource->readAt(data_offset + 2, sinf->IPMPData, sinf->len) < sinf->len) { + return ERROR_IO; + } + data_offset += sinf->len; + + size -= (dataLen + numOfBytes + 1); + } + } + + if (size != 0) { + return ERROR_MALFORMED; + } + + return UNKNOWN_ERROR; // Return a dummy error. +} + +static void MakeFourCCString(uint32_t x, char *s) { + s[0] = x >> 24; + s[1] = (x >> 16) & 0xff; + s[2] = (x >> 8) & 0xff; + s[3] = x & 0xff; + s[4] = '\0'; +} + +struct PathAdder { + PathAdder(Vector<uint32_t> *path, uint32_t chunkType) + : mPath(path) { + mPath->push(chunkType); + } + + ~PathAdder() { + mPath->pop(); + } + +private: + Vector<uint32_t> *mPath; + + PathAdder(const PathAdder &); + PathAdder &operator=(const PathAdder &); +}; + +static bool underMetaDataPath(const Vector<uint32_t> &path) { + return path.size() >= 5 + && path[0] == FOURCC('m', 'o', 'o', 'v') + && path[1] == FOURCC('u', 'd', 't', 'a') + && path[2] == FOURCC('m', 'e', 't', 'a') + && path[3] == FOURCC('i', 'l', 's', 't'); +} + +// Given a time in seconds since Jan 1 1904, produce a human-readable string. +static void convertTimeToDate(int64_t time_1904, String8 *s) { + time_t time_1970 = time_1904 - (((66 * 365 + 17) * 24) * 3600); + + char tmp[32]; + strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", gmtime(&time_1970)); + + s->setTo(tmp); +} + +status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) { + ALOGV("entering parseChunk %lld/%d", *offset, depth); + uint32_t hdr[2]; + if (mDataSource->readAt(*offset, hdr, 8) < 8) { + return ERROR_IO; + } + uint64_t chunk_size = ntohl(hdr[0]); + uint32_t chunk_type = ntohl(hdr[1]); + off64_t data_offset = *offset + 8; + + if (chunk_size == 1) { + if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { + return ERROR_IO; + } + chunk_size = ntoh64(chunk_size); + data_offset += 8; + + if (chunk_size < 16) { + // The smallest valid chunk is 16 bytes long in this case. + return ERROR_MALFORMED; + } + } else if (chunk_size < 8) { + // The smallest valid chunk is 8 bytes long. + return ERROR_MALFORMED; + } + + char chunk[5]; + MakeFourCCString(chunk_type, chunk); + ALOGV("chunk: %s @ %lld", chunk, *offset); + +#if 0 + static const char kWhitespace[] = " "; + const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth]; + printf("%sfound chunk '%s' of size %lld\n", indent, chunk, chunk_size); + + char buffer[256]; + size_t n = chunk_size; + if (n > sizeof(buffer)) { + n = sizeof(buffer); + } + if (mDataSource->readAt(*offset, buffer, n) + < (ssize_t)n) { + return ERROR_IO; + } + + hexdump(buffer, n); +#endif + + PathAdder autoAdder(&mPath, chunk_type); + + off64_t chunk_data_size = *offset + chunk_size - data_offset; + + if (chunk_type != FOURCC('c', 'p', 'r', 't') + && chunk_type != FOURCC('c', 'o', 'v', 'r') + && mPath.size() == 5 && underMetaDataPath(mPath)) { + off64_t stop_offset = *offset + chunk_size; + *offset = data_offset; + while (*offset < stop_offset) { + status_t err = parseChunk(offset, depth + 1); + if (err != OK) { + return err; + } + } + + if (*offset != stop_offset) { + return ERROR_MALFORMED; + } + + return OK; + } + + switch(chunk_type) { + case FOURCC('m', 'o', 'o', 'v'): + case FOURCC('t', 'r', 'a', 'k'): + case FOURCC('m', 'd', 'i', 'a'): + case FOURCC('m', 'i', 'n', 'f'): + case FOURCC('d', 'i', 'n', 'f'): + case FOURCC('s', 't', 'b', 'l'): + case FOURCC('m', 'v', 'e', 'x'): + case FOURCC('m', 'o', 'o', 'f'): + case FOURCC('t', 'r', 'a', 'f'): + case FOURCC('m', 'f', 'r', 'a'): + case FOURCC('u', 'd', 't', 'a'): + case FOURCC('i', 'l', 's', 't'): + { + if (chunk_type == FOURCC('s', 't', 'b', 'l')) { + ALOGV("sampleTable chunk is %d bytes long.", (size_t)chunk_size); + + if (mDataSource->flags() + & (DataSource::kWantsPrefetching + | DataSource::kIsCachingDataSource)) { + sp<MPEG4DataSource> cachedSource = + new MPEG4DataSource(mDataSource); + + if (cachedSource->setCachedRange(*offset, chunk_size) == OK) { + mDataSource = cachedSource; + } + } + + mLastTrack->sampleTable = new SampleTable(mDataSource); + } + + bool isTrack = false; + if (chunk_type == FOURCC('t', 'r', 'a', 'k')) { + isTrack = true; + + Track *track = new Track; + track->next = NULL; + if (mLastTrack) { + mLastTrack->next = track; + } else { + mFirstTrack = track; + } + mLastTrack = track; + + track->meta = new MetaData; + track->includes_expensive_metadata = false; + track->skipTrack = false; + track->timescale = 0; + track->meta->setCString(kKeyMIMEType, "application/octet-stream"); + } + + off64_t stop_offset = *offset + chunk_size; + *offset = data_offset; + while (*offset < stop_offset) { + status_t err = parseChunk(offset, depth + 1); + if (err != OK) { + return err; + } + } + + if (*offset != stop_offset) { + return ERROR_MALFORMED; + } + + if (isTrack) { + if (mLastTrack->skipTrack) { + Track *cur = mFirstTrack; + + if (cur == mLastTrack) { + delete cur; + mFirstTrack = mLastTrack = NULL; + } else { + while (cur && cur->next != mLastTrack) { + cur = cur->next; + } + cur->next = NULL; + delete mLastTrack; + mLastTrack = cur; + } + + return OK; + } + + status_t err = verifyTrack(mLastTrack); + + if (err != OK) { + return err; + } + } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) { + mInitCheck = OK; + + if (!mIsDrm) { + return UNKNOWN_ERROR; // Return a dummy error. + } else { + return OK; + } + } + break; + } + + case FOURCC('t', 'k', 'h', 'd'): + { + status_t err; + if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) { + return err; + } + + *offset += chunk_size; + break; + } + + case FOURCC('m', 'd', 'h', 'd'): + { + if (chunk_data_size < 4) { + return ERROR_MALFORMED; + } + + uint8_t version; + if (mDataSource->readAt( + data_offset, &version, sizeof(version)) + < (ssize_t)sizeof(version)) { + return ERROR_IO; + } + + off64_t timescale_offset; + + if (version == 1) { + timescale_offset = data_offset + 4 + 16; + } else if (version == 0) { + timescale_offset = data_offset + 4 + 8; + } else { + return ERROR_IO; + } + + uint32_t timescale; + if (mDataSource->readAt( + timescale_offset, ×cale, sizeof(timescale)) + < (ssize_t)sizeof(timescale)) { + return ERROR_IO; + } + + mLastTrack->timescale = ntohl(timescale); + + int64_t duration; + if (version == 1) { + if (mDataSource->readAt( + timescale_offset + 4, &duration, sizeof(duration)) + < (ssize_t)sizeof(duration)) { + return ERROR_IO; + } + duration = ntoh64(duration); + } else { + int32_t duration32; + if (mDataSource->readAt( + timescale_offset + 4, &duration32, sizeof(duration32)) + < (ssize_t)sizeof(duration32)) { + return ERROR_IO; + } + duration = ntohl(duration32); + } + mLastTrack->meta->setInt64( + kKeyDuration, (duration * 1000000) / mLastTrack->timescale); + + uint8_t lang[2]; + off64_t lang_offset; + if (version == 1) { + lang_offset = timescale_offset + 4 + 8; + } else if (version == 0) { + lang_offset = timescale_offset + 4 + 4; + } else { + return ERROR_IO; + } + + if (mDataSource->readAt(lang_offset, &lang, sizeof(lang)) + < (ssize_t)sizeof(lang)) { + return ERROR_IO; + } + + // To get the ISO-639-2/T three character language code + // 1 bit pad followed by 3 5-bits characters. Each character + // is packed as the difference between its ASCII value and 0x60. + char lang_code[4]; + lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60; + lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60; + lang_code[2] = (lang[1] & 0x1f) + 0x60; + lang_code[3] = '\0'; + + mLastTrack->meta->setCString( + kKeyMediaLanguage, lang_code); + + *offset += chunk_size; + break; + } + + case FOURCC('s', 't', 's', 'd'): + { + if (chunk_data_size < 8) { + return ERROR_MALFORMED; + } + + uint8_t buffer[8]; + if (chunk_data_size < (off64_t)sizeof(buffer)) { + return ERROR_MALFORMED; + } + + if (mDataSource->readAt( + data_offset, buffer, 8) < 8) { + return ERROR_IO; + } + + if (U32_AT(buffer) != 0) { + // Should be version 0, flags 0. + return ERROR_MALFORMED; + } + + uint32_t entry_count = U32_AT(&buffer[4]); + + if (entry_count > 1) { + // For 3GPP timed text, there could be multiple tx3g boxes contain + // multiple text display formats. These formats will be used to + // display the timed text. + const char *mime; + CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); + if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP)) { + // For now we only support a single type of media per track. + mLastTrack->skipTrack = true; + *offset += chunk_size; + break; + } + } + + off64_t stop_offset = *offset + chunk_size; + *offset = data_offset + 8; + for (uint32_t i = 0; i < entry_count; ++i) { + status_t err = parseChunk(offset, depth + 1); + if (err != OK) { + return err; + } + } + + if (*offset != stop_offset) { + return ERROR_MALFORMED; + } + break; + } + + case FOURCC('m', 'p', '4', 'a'): + case FOURCC('s', 'a', 'm', 'r'): + case FOURCC('s', 'a', 'w', 'b'): + { + uint8_t buffer[8 + 20]; + if (chunk_data_size < (ssize_t)sizeof(buffer)) { + // Basic AudioSampleEntry size. + return ERROR_MALFORMED; + } + + if (mDataSource->readAt( + data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { + return ERROR_IO; + } + + uint16_t data_ref_index = U16_AT(&buffer[6]); + uint16_t num_channels = U16_AT(&buffer[16]); + + uint16_t sample_size = U16_AT(&buffer[18]); + uint32_t sample_rate = U32_AT(&buffer[24]) >> 16; + + if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, + FourCC2MIME(chunk_type))) { + // AMR NB audio is always mono, 8kHz + num_channels = 1; + sample_rate = 8000; + } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, + FourCC2MIME(chunk_type))) { + // AMR WB audio is always mono, 16kHz + num_channels = 1; + sample_rate = 16000; + } + +#if 0 + printf("*** coding='%s' %d channels, size %d, rate %d\n", + chunk, num_channels, sample_size, sample_rate); +#endif + + mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); + mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); + mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); + + off64_t stop_offset = *offset + chunk_size; + *offset = data_offset + sizeof(buffer); + while (*offset < stop_offset) { + status_t err = parseChunk(offset, depth + 1); + if (err != OK) { + return err; + } + } + + if (*offset != stop_offset) { + return ERROR_MALFORMED; + } + break; + } + + case FOURCC('m', 'p', '4', 'v'): + case FOURCC('s', '2', '6', '3'): + case FOURCC('H', '2', '6', '3'): + case FOURCC('h', '2', '6', '3'): + case FOURCC('a', 'v', 'c', '1'): + { + mHasVideo = true; + + uint8_t buffer[78]; + if (chunk_data_size < (ssize_t)sizeof(buffer)) { + // Basic VideoSampleEntry size. + return ERROR_MALFORMED; + } + + if (mDataSource->readAt( + data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { + return ERROR_IO; + } + + uint16_t data_ref_index = U16_AT(&buffer[6]); + uint16_t width = U16_AT(&buffer[6 + 18]); + uint16_t height = U16_AT(&buffer[6 + 20]); + + // The video sample is not stand-compliant if it has invalid dimension. + // Use some default width and height value, and + // let the decoder figure out the actual width and height (and thus + // be prepared for INFO_FOMRAT_CHANGED event). + if (width == 0) width = 352; + if (height == 0) height = 288; + + // printf("*** coding='%s' width=%d height=%d\n", + // chunk, width, height); + + mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); + mLastTrack->meta->setInt32(kKeyWidth, width); + mLastTrack->meta->setInt32(kKeyHeight, height); + + off64_t stop_offset = *offset + chunk_size; + *offset = data_offset + sizeof(buffer); + while (*offset < stop_offset) { + status_t err = parseChunk(offset, depth + 1); + if (err != OK) { + return err; + } + } + + if (*offset != stop_offset) { + return ERROR_MALFORMED; + } + break; + } + + case FOURCC('s', 't', 'c', 'o'): + case FOURCC('c', 'o', '6', '4'): + { + status_t err = + mLastTrack->sampleTable->setChunkOffsetParams( + chunk_type, data_offset, chunk_data_size); + + if (err != OK) { + return err; + } + + *offset += chunk_size; + break; + } + + case FOURCC('s', 't', 's', 'c'): + { + status_t err = + mLastTrack->sampleTable->setSampleToChunkParams( + data_offset, chunk_data_size); + + if (err != OK) { + return err; + } + + *offset += chunk_size; + break; + } + + case FOURCC('s', 't', 's', 'z'): + case FOURCC('s', 't', 'z', '2'): + { + status_t err = + mLastTrack->sampleTable->setSampleSizeParams( + chunk_type, data_offset, chunk_data_size); + + if (err != OK) { + return err; + } + + size_t max_size; + err = mLastTrack->sampleTable->getMaxSampleSize(&max_size); + + if (err != OK) { + return err; + } + + // Assume that a given buffer only contains at most 10 fragments, + // each fragment originally prefixed with a 2 byte length will + // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion, + // and thus will grow by 2 bytes per fragment. + mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2); + *offset += chunk_size; + + // Calculate average frame rate. + const char *mime; + CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); + if (!strncasecmp("video/", mime, 6)) { + size_t nSamples = mLastTrack->sampleTable->countSamples(); + int64_t durationUs; + if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) { + if (durationUs > 0) { + int32_t frameRate = (nSamples * 1000000LL + + (durationUs >> 1)) / durationUs; + mLastTrack->meta->setInt32(kKeyFrameRate, frameRate); + } + } + } + + break; + } + + case FOURCC('s', 't', 't', 's'): + { + status_t err = + mLastTrack->sampleTable->setTimeToSampleParams( + data_offset, chunk_data_size); + + if (err != OK) { + return err; + } + + *offset += chunk_size; + break; + } + + case FOURCC('c', 't', 't', 's'): + { + status_t err = + mLastTrack->sampleTable->setCompositionTimeToSampleParams( + data_offset, chunk_data_size); + + if (err != OK) { + return err; + } + + *offset += chunk_size; + break; + } + + case FOURCC('s', 't', 's', 's'): + { + status_t err = + mLastTrack->sampleTable->setSyncSampleParams( + data_offset, chunk_data_size); + + if (err != OK) { + return err; + } + + *offset += chunk_size; + break; + } + + // @xyz + case FOURCC('\xA9', 'x', 'y', 'z'): + { + // Best case the total data length inside "@xyz" box + // would be 8, for instance "@xyz" + "\x00\x04\x15\xc7" + "0+0/", + // where "\x00\x04" is the text string length with value = 4, + // "\0x15\xc7" is the language code = en, and "0+0" is a + // location (string) value with longitude = 0 and latitude = 0. + if (chunk_data_size < 8) { + return ERROR_MALFORMED; + } + + // Worst case the location string length would be 18, + // for instance +90.0000-180.0000, without the trailing "/" and + // the string length + language code. + char buffer[18]; + + // Substracting 5 from the data size is because the text string length + + // language code takes 4 bytes, and the trailing slash "/" takes 1 byte. + off64_t location_length = chunk_data_size - 5; + if (location_length >= (off64_t) sizeof(buffer)) { + return ERROR_MALFORMED; + } + + if (mDataSource->readAt( + data_offset + 4, buffer, location_length) < location_length) { + return ERROR_IO; + } + + buffer[location_length] = '\0'; + mFileMetaData->setCString(kKeyLocation, buffer); + *offset += chunk_size; + break; + } + + case FOURCC('e', 's', 'd', 's'): + { + if (chunk_data_size < 4) { + return ERROR_MALFORMED; + } + + uint8_t buffer[256]; + if (chunk_data_size > (off64_t)sizeof(buffer)) { + return ERROR_BUFFER_TOO_SMALL; + } + + if (mDataSource->readAt( + data_offset, buffer, chunk_data_size) < chunk_data_size) { + return ERROR_IO; + } + + if (U32_AT(buffer) != 0) { + // Should be version 0, flags 0. + return ERROR_MALFORMED; + } + + mLastTrack->meta->setData( + kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4); + + if (mPath.size() >= 2 + && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) { + // Information from the ESDS must be relied on for proper + // setup of sample rate and channel count for MPEG4 Audio. + // The generic header appears to only contain generic + // information... + + status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio( + &buffer[4], chunk_data_size - 4); + + if (err != OK) { + return err; + } + } + + *offset += chunk_size; + break; + } + + case FOURCC('a', 'v', 'c', 'C'): + { + char buffer[256]; + if (chunk_data_size > (off64_t)sizeof(buffer)) { + return ERROR_BUFFER_TOO_SMALL; + } + + if (mDataSource->readAt( + data_offset, buffer, chunk_data_size) < chunk_data_size) { + return ERROR_IO; + } + + mLastTrack->meta->setData( + kKeyAVCC, kTypeAVCC, buffer, chunk_data_size); + + *offset += chunk_size; + break; + } + + case FOURCC('d', '2', '6', '3'): + { + /* + * d263 contains a fixed 7 bytes part: + * vendor - 4 bytes + * version - 1 byte + * level - 1 byte + * profile - 1 byte + * optionally, "d263" box itself may contain a 16-byte + * bit rate box (bitr) + * average bit rate - 4 bytes + * max bit rate - 4 bytes + */ + char buffer[23]; + if (chunk_data_size != 7 && + chunk_data_size != 23) { + ALOGE("Incorrect D263 box size %lld", chunk_data_size); + return ERROR_MALFORMED; + } + + if (mDataSource->readAt( + data_offset, buffer, chunk_data_size) < chunk_data_size) { + return ERROR_IO; + } + + mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size); + + *offset += chunk_size; + break; + } + + case FOURCC('m', 'e', 't', 'a'): + { + uint8_t buffer[4]; + if (chunk_data_size < (off64_t)sizeof(buffer)) { + return ERROR_MALFORMED; + } + + if (mDataSource->readAt( + data_offset, buffer, 4) < 4) { + return ERROR_IO; + } + + if (U32_AT(buffer) != 0) { + // Should be version 0, flags 0. + + // If it's not, let's assume this is one of those + // apparently malformed chunks that don't have flags + // and completely different semantics than what's + // in the MPEG4 specs and skip it. + *offset += chunk_size; + return OK; + } + + off64_t stop_offset = *offset + chunk_size; + *offset = data_offset + sizeof(buffer); + while (*offset < stop_offset) { + status_t err = parseChunk(offset, depth + 1); + if (err != OK) { + return err; + } + } + + if (*offset != stop_offset) { + return ERROR_MALFORMED; + } + break; + } + + case FOURCC('m', 'e', 'a', 'n'): + case FOURCC('n', 'a', 'm', 'e'): + case FOURCC('d', 'a', 't', 'a'): + { + if (mPath.size() == 6 && underMetaDataPath(mPath)) { + status_t err = parseMetaData(data_offset, chunk_data_size); + + if (err != OK) { + return err; + } + } + + *offset += chunk_size; + break; + } + + case FOURCC('m', 'v', 'h', 'd'): + { + if (chunk_data_size < 12) { + return ERROR_MALFORMED; + } + + uint8_t header[12]; + if (mDataSource->readAt( + data_offset, header, sizeof(header)) + < (ssize_t)sizeof(header)) { + return ERROR_IO; + } + + int64_t creationTime; + if (header[0] == 1) { + creationTime = U64_AT(&header[4]); + } else if (header[0] != 0) { + return ERROR_MALFORMED; + } else { + creationTime = U32_AT(&header[4]); + } + + String8 s; + convertTimeToDate(creationTime, &s); + + mFileMetaData->setCString(kKeyDate, s.string()); + + *offset += chunk_size; + break; + } + + case FOURCC('m', 'd', 'a', 't'): + { + if (!mIsDrm) { + *offset += chunk_size; + break; + } + + if (chunk_size < 8) { + return ERROR_MALFORMED; + } + + return parseDrmSINF(offset, data_offset); + } + + case FOURCC('h', 'd', 'l', 'r'): + { + uint32_t buffer; + if (mDataSource->readAt( + data_offset + 8, &buffer, 4) < 4) { + return ERROR_IO; + } + + uint32_t type = ntohl(buffer); + // For the 3GPP file format, the handler-type within the 'hdlr' box + // shall be 'text'. We also want to support 'sbtl' handler type + // for a practical reason as various MPEG4 containers use it. + if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) { + mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP); + } + + *offset += chunk_size; + break; + } + + case FOURCC('t', 'x', '3', 'g'): + { + uint32_t type; + const void *data; + size_t size = 0; + if (!mLastTrack->meta->findData( + kKeyTextFormatData, &type, &data, &size)) { + size = 0; + } + + uint8_t *buffer = new uint8_t[size + chunk_size]; + + if (size > 0) { + memcpy(buffer, data, size); + } + + if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size)) + < chunk_size) { + delete[] buffer; + buffer = NULL; + + return ERROR_IO; + } + + mLastTrack->meta->setData( + kKeyTextFormatData, 0, buffer, size + chunk_size); + + delete[] buffer; + + *offset += chunk_size; + break; + } + + case FOURCC('c', 'o', 'v', 'r'): + { + if (mFileMetaData != NULL) { + ALOGV("chunk_data_size = %lld and data_offset = %lld", + chunk_data_size, data_offset); + uint8_t *buffer = new uint8_t[chunk_data_size + 1]; + if (mDataSource->readAt( + data_offset, buffer, chunk_data_size) != (ssize_t)chunk_data_size) { + delete[] buffer; + buffer = NULL; + + return ERROR_IO; + } + const int kSkipBytesOfDataBox = 16; + mFileMetaData->setData( + kKeyAlbumArt, MetaData::TYPE_NONE, + buffer + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox); + } + + *offset += chunk_size; + break; + } + + case FOURCC('-', '-', '-', '-'): + { + mLastCommentMean.clear(); + mLastCommentName.clear(); + mLastCommentData.clear(); + *offset += chunk_size; + break; + } + + default: + { + *offset += chunk_size; + break; + } + } + + return OK; +} + +status_t MPEG4Extractor::parseTrackHeader( + off64_t data_offset, off64_t data_size) { + if (data_size < 4) { + return ERROR_MALFORMED; + } + + uint8_t version; + if (mDataSource->readAt(data_offset, &version, 1) < 1) { + return ERROR_IO; + } + + size_t dynSize = (version == 1) ? 36 : 24; + + uint8_t buffer[36 + 60]; + + if (data_size != (off64_t)dynSize + 60) { + return ERROR_MALFORMED; + } + + if (mDataSource->readAt( + data_offset, buffer, data_size) < (ssize_t)data_size) { + return ERROR_IO; + } + + uint64_t ctime, mtime, duration; + int32_t id; + + if (version == 1) { + ctime = U64_AT(&buffer[4]); + mtime = U64_AT(&buffer[12]); + id = U32_AT(&buffer[20]); + duration = U64_AT(&buffer[28]); + } else { + CHECK_EQ((unsigned)version, 0u); + + ctime = U32_AT(&buffer[4]); + mtime = U32_AT(&buffer[8]); + id = U32_AT(&buffer[12]); + duration = U32_AT(&buffer[20]); + } + + mLastTrack->meta->setInt32(kKeyTrackID, id); + + size_t matrixOffset = dynSize + 16; + int32_t a00 = U32_AT(&buffer[matrixOffset]); + int32_t a01 = U32_AT(&buffer[matrixOffset + 4]); + int32_t dx = U32_AT(&buffer[matrixOffset + 8]); + int32_t a10 = U32_AT(&buffer[matrixOffset + 12]); + int32_t a11 = U32_AT(&buffer[matrixOffset + 16]); + int32_t dy = U32_AT(&buffer[matrixOffset + 20]); + +#if 0 + ALOGI("x' = %.2f * x + %.2f * y + %.2f", + a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f); + ALOGI("y' = %.2f * x + %.2f * y + %.2f", + a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f); +#endif + + uint32_t rotationDegrees; + + static const int32_t kFixedOne = 0x10000; + if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) { + // Identity, no rotation + rotationDegrees = 0; + } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) { + rotationDegrees = 90; + } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) { + rotationDegrees = 270; + } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) { + rotationDegrees = 180; + } else { + ALOGW("We only support 0,90,180,270 degree rotation matrices"); + rotationDegrees = 0; + } + + if (rotationDegrees != 0) { + mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees); + } + + // Handle presentation display size, which could be different + // from the image size indicated by kKeyWidth and kKeyHeight. + uint32_t width = U32_AT(&buffer[dynSize + 52]); + uint32_t height = U32_AT(&buffer[dynSize + 56]); + mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16); + mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16); + + return OK; +} + +status_t MPEG4Extractor::parseMetaData(off64_t offset, size_t size) { + if (size < 4) { + return ERROR_MALFORMED; + } + + uint8_t *buffer = new uint8_t[size + 1]; + if (mDataSource->readAt( + offset, buffer, size) != (ssize_t)size) { + delete[] buffer; + buffer = NULL; + + return ERROR_IO; + } + + uint32_t flags = U32_AT(buffer); + + uint32_t metadataKey = 0; + char chunk[5]; + MakeFourCCString(mPath[4], chunk); + ALOGV("meta: %s @ %lld", chunk, offset); + switch (mPath[4]) { + case FOURCC(0xa9, 'a', 'l', 'b'): + { + metadataKey = kKeyAlbum; + break; + } + case FOURCC(0xa9, 'A', 'R', 'T'): + { + metadataKey = kKeyArtist; + break; + } + case FOURCC('a', 'A', 'R', 'T'): + { + metadataKey = kKeyAlbumArtist; + break; + } + case FOURCC(0xa9, 'd', 'a', 'y'): + { + metadataKey = kKeyYear; + break; + } + case FOURCC(0xa9, 'n', 'a', 'm'): + { + metadataKey = kKeyTitle; + break; + } + case FOURCC(0xa9, 'w', 'r', 't'): + { + metadataKey = kKeyWriter; + break; + } + case FOURCC('c', 'o', 'v', 'r'): + { + metadataKey = kKeyAlbumArt; + break; + } + case FOURCC('g', 'n', 'r', 'e'): + { + metadataKey = kKeyGenre; + break; + } + case FOURCC(0xa9, 'g', 'e', 'n'): + { + metadataKey = kKeyGenre; + break; + } + case FOURCC('c', 'p', 'i', 'l'): + { + if (size == 9 && flags == 21) { + char tmp[16]; + sprintf(tmp, "%d", + (int)buffer[size - 1]); + + mFileMetaData->setCString(kKeyCompilation, tmp); + } + break; + } + case FOURCC('t', 'r', 'k', 'n'): + { + if (size == 16 && flags == 0) { + char tmp[16]; + sprintf(tmp, "%d/%d", + (int)buffer[size - 5], (int)buffer[size - 3]); + + mFileMetaData->setCString(kKeyCDTrackNumber, tmp); + } + break; + } + case FOURCC('d', 'i', 's', 'k'): + { + if (size == 14 && flags == 0) { + char tmp[16]; + sprintf(tmp, "%d/%d", + (int)buffer[size - 3], (int)buffer[size - 1]); + + mFileMetaData->setCString(kKeyDiscNumber, tmp); + } + break; + } + case FOURCC('-', '-', '-', '-'): + { + buffer[size] = '\0'; + switch (mPath[5]) { + case FOURCC('m', 'e', 'a', 'n'): + mLastCommentMean.setTo((const char *)buffer + 4); + break; + case FOURCC('n', 'a', 'm', 'e'): + mLastCommentName.setTo((const char *)buffer + 4); + break; + case FOURCC('d', 'a', 't', 'a'): + mLastCommentData.setTo((const char *)buffer + 8); + break; + } + if (mLastCommentMean == "com.apple.iTunes" + && mLastCommentName == "iTunSMPB" + && mLastCommentData.length() != 0) { + int32_t delay, padding; + if (sscanf(mLastCommentData, + " %*x %x %x %*x", &delay, &padding) == 2) { + mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); + mLastTrack->meta->setInt32(kKeyEncoderPadding, padding); + } + mLastCommentMean.clear(); + mLastCommentName.clear(); + mLastCommentData.clear(); + } + break; + } + + default: + break; + } + + if (size >= 8 && metadataKey) { + if (metadataKey == kKeyAlbumArt) { + mFileMetaData->setData( + kKeyAlbumArt, MetaData::TYPE_NONE, + buffer + 8, size - 8); + } else if (metadataKey == kKeyGenre) { + if (flags == 0) { + // uint8_t genre code, iTunes genre codes are + // the standard id3 codes, except they start + // at 1 instead of 0 (e.g. Pop is 14, not 13) + // We use standard id3 numbering, so subtract 1. + int genrecode = (int)buffer[size - 1]; + genrecode--; + if (genrecode < 0) { + genrecode = 255; // reserved for 'unknown genre' + } + char genre[10]; + sprintf(genre, "%d", genrecode); + + mFileMetaData->setCString(metadataKey, genre); + } else if (flags == 1) { + // custom genre string + buffer[size] = '\0'; + + mFileMetaData->setCString( + metadataKey, (const char *)buffer + 8); + } + } else { + buffer[size] = '\0'; + + mFileMetaData->setCString( + metadataKey, (const char *)buffer + 8); + } + } + + delete[] buffer; + buffer = NULL; + + return OK; +} + +sp<MediaSource> MPEG4Extractor::getTrack(size_t index) { + status_t err; + if ((err = readMetaData()) != OK) { + return NULL; + } + + Track *track = mFirstTrack; + while (index > 0) { + if (track == NULL) { + return NULL; + } + + track = track->next; + --index; + } + + if (track == NULL) { + return NULL; + } + + return new MPEG4Source( + track->meta, mDataSource, track->timescale, track->sampleTable); +} + +// static +status_t MPEG4Extractor::verifyTrack(Track *track) { + const char *mime; + CHECK(track->meta->findCString(kKeyMIMEType, &mime)); + + uint32_t type; + const void *data; + size_t size; + if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { + if (!track->meta->findData(kKeyAVCC, &type, &data, &size) + || type != kTypeAVCC) { + return ERROR_MALFORMED; + } + } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4) + || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) { + if (!track->meta->findData(kKeyESDS, &type, &data, &size) + || type != kTypeESDS) { + return ERROR_MALFORMED; + } + } + + if (!track->sampleTable->isValid()) { + // Make sure we have all the metadata we need. + return ERROR_MALFORMED; + } + + return OK; +} + +status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio( + const void *esds_data, size_t esds_size) { + ESDS esds(esds_data, esds_size); + + uint8_t objectTypeIndication; + if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) { + return ERROR_MALFORMED; + } + + if (objectTypeIndication == 0xe1) { + // This isn't MPEG4 audio at all, it's QCELP 14k... + mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP); + return OK; + } + + if (objectTypeIndication == 0x6b) { + // The media subtype is MP3 audio + // Our software MP3 audio decoder may not be able to handle + // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED + ALOGE("MP3 track in MP4/3GPP file is not supported"); + return ERROR_UNSUPPORTED; + } + + const uint8_t *csd; + size_t csd_size; + if (esds.getCodecSpecificInfo( + (const void **)&csd, &csd_size) != OK) { + return ERROR_MALFORMED; + } + +#if 0 + printf("ESD of size %d\n", csd_size); + hexdump(csd, csd_size); +#endif + + if (csd_size == 0) { + // There's no further information, i.e. no codec specific data + // Let's assume that the information provided in the mpeg4 headers + // is accurate and hope for the best. + + return OK; + } + + if (csd_size < 2) { + return ERROR_MALFORMED; + } + + uint32_t objectType = csd[0] >> 3; + + if (objectType == 31) { + return ERROR_UNSUPPORTED; + } + + uint32_t freqIndex = (csd[0] & 7) << 1 | (csd[1] >> 7); + int32_t sampleRate = 0; + int32_t numChannels = 0; + if (freqIndex == 15) { + if (csd_size < 5) { + return ERROR_MALFORMED; + } + + sampleRate = (csd[1] & 0x7f) << 17 + | csd[2] << 9 + | csd[3] << 1 + | (csd[4] >> 7); + + numChannels = (csd[4] >> 3) & 15; + } else { + static uint32_t kSamplingRate[] = { + 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, + 16000, 12000, 11025, 8000, 7350 + }; + + if (freqIndex == 13 || freqIndex == 14) { + return ERROR_MALFORMED; + } + + sampleRate = kSamplingRate[freqIndex]; + numChannels = (csd[1] >> 3) & 15; + } + + if (numChannels == 0) { + return ERROR_UNSUPPORTED; + } + + int32_t prevSampleRate; + CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate)); + + if (prevSampleRate != sampleRate) { + ALOGV("mpeg4 audio sample rate different from previous setting. " + "was: %d, now: %d", prevSampleRate, sampleRate); + } + + mLastTrack->meta->setInt32(kKeySampleRate, sampleRate); + + int32_t prevChannelCount; + CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount)); + + if (prevChannelCount != numChannels) { + ALOGV("mpeg4 audio channel count different from previous setting. " + "was: %d, now: %d", prevChannelCount, numChannels); + } + + mLastTrack->meta->setInt32(kKeyChannelCount, numChannels); + + return OK; +} + +//////////////////////////////////////////////////////////////////////////////// + +MPEG4Source::MPEG4Source( + const sp<MetaData> &format, + const sp<DataSource> &dataSource, + int32_t timeScale, + const sp<SampleTable> &sampleTable) + : mFormat(format), + mDataSource(dataSource), + mTimescale(timeScale), + mSampleTable(sampleTable), + mCurrentSampleIndex(0), + mIsAVC(false), + mNALLengthSize(0), + mStarted(false), + mGroup(NULL), + mBuffer(NULL), + mWantsNALFragments(false), + mSrcBuffer(NULL) { + const char *mime; + bool success = mFormat->findCString(kKeyMIMEType, &mime); + CHECK(success); + + mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC); + + if (mIsAVC) { + uint32_t type; + const void *data; + size_t size; + CHECK(format->findData(kKeyAVCC, &type, &data, &size)); + + const uint8_t *ptr = (const uint8_t *)data; + + CHECK(size >= 7); + CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 + + // The number of bytes used to encode the length of a NAL unit. + mNALLengthSize = 1 + (ptr[4] & 3); + } +} + +MPEG4Source::~MPEG4Source() { + if (mStarted) { + stop(); + } +} + +status_t MPEG4Source::start(MetaData *params) { + Mutex::Autolock autoLock(mLock); + + CHECK(!mStarted); + + int32_t val; + if (params && params->findInt32(kKeyWantsNALFragments, &val) + && val != 0) { + mWantsNALFragments = true; + } else { + mWantsNALFragments = false; + } + + mGroup = new MediaBufferGroup; + + int32_t max_size; + CHECK(mFormat->findInt32(kKeyMaxInputSize, &max_size)); + + mGroup->add_buffer(new MediaBuffer(max_size)); + + mSrcBuffer = new uint8_t[max_size]; + + mStarted = true; + + return OK; +} + +status_t MPEG4Source::stop() { + Mutex::Autolock autoLock(mLock); + + CHECK(mStarted); + + if (mBuffer != NULL) { + mBuffer->release(); + mBuffer = NULL; + } + + delete[] mSrcBuffer; + mSrcBuffer = NULL; + + delete mGroup; + mGroup = NULL; + + mStarted = false; + mCurrentSampleIndex = 0; + + return OK; +} + +sp<MetaData> MPEG4Source::getFormat() { + Mutex::Autolock autoLock(mLock); + + return mFormat; +} + +size_t MPEG4Source::parseNALSize(const uint8_t *data) const { + switch (mNALLengthSize) { + case 1: + return *data; + case 2: + return U16_AT(data); + case 3: + return ((size_t)data[0] << 16) | U16_AT(&data[1]); + case 4: + return U32_AT(data); + } + + // This cannot happen, mNALLengthSize springs to life by adding 1 to + // a 2-bit integer. + CHECK(!"Should not be here."); + + return 0; +} + +status_t MPEG4Source::read( + MediaBuffer **out, const ReadOptions *options) { + Mutex::Autolock autoLock(mLock); + + CHECK(mStarted); + + *out = NULL; + + int64_t targetSampleTimeUs = -1; + + int64_t seekTimeUs; + ReadOptions::SeekMode mode; + if (options && options->getSeekTo(&seekTimeUs, &mode)) { + uint32_t findFlags = 0; + switch (mode) { + case ReadOptions::SEEK_PREVIOUS_SYNC: + findFlags = SampleTable::kFlagBefore; + break; + case ReadOptions::SEEK_NEXT_SYNC: + findFlags = SampleTable::kFlagAfter; + break; + case ReadOptions::SEEK_CLOSEST_SYNC: + case ReadOptions::SEEK_CLOSEST: + findFlags = SampleTable::kFlagClosest; + break; + default: + CHECK(!"Should not be here."); + break; + } + + uint32_t sampleIndex; + status_t err = mSampleTable->findSampleAtTime( + seekTimeUs * mTimescale / 1000000, + &sampleIndex, findFlags); + + if (mode == ReadOptions::SEEK_CLOSEST) { + // We found the closest sample already, now we want the sync + // sample preceding it (or the sample itself of course), even + // if the subsequent sync sample is closer. + findFlags = SampleTable::kFlagBefore; + } + + uint32_t syncSampleIndex; + if (err == OK) { + err = mSampleTable->findSyncSampleNear( + sampleIndex, &syncSampleIndex, findFlags); + } + + uint32_t sampleTime; + if (err == OK) { + err = mSampleTable->getMetaDataForSample( + sampleIndex, NULL, NULL, &sampleTime); + } + + if (err != OK) { + if (err == ERROR_OUT_OF_RANGE) { + // An attempt to seek past the end of the stream would + // normally cause this ERROR_OUT_OF_RANGE error. Propagating + // this all the way to the MediaPlayer would cause abnormal + // termination. Legacy behaviour appears to be to behave as if + // we had seeked to the end of stream, ending normally. + err = ERROR_END_OF_STREAM; + } + return err; + } + + if (mode == ReadOptions::SEEK_CLOSEST) { + targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale; + } + +#if 0 + uint32_t syncSampleTime; + CHECK_EQ(OK, mSampleTable->getMetaDataForSample( + syncSampleIndex, NULL, NULL, &syncSampleTime)); + + ALOGI("seek to time %lld us => sample at time %lld us, " + "sync sample at time %lld us", + seekTimeUs, + sampleTime * 1000000ll / mTimescale, + syncSampleTime * 1000000ll / mTimescale); +#endif + + mCurrentSampleIndex = syncSampleIndex; + if (mBuffer != NULL) { + mBuffer->release(); + mBuffer = NULL; + } + + // fall through + } + + off64_t offset; + size_t size; + uint32_t cts; + bool isSyncSample; + bool newBuffer = false; + if (mBuffer == NULL) { + newBuffer = true; + + status_t err = + mSampleTable->getMetaDataForSample( + mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample); + + if (err != OK) { + return err; + } + + err = mGroup->acquire_buffer(&mBuffer); + + if (err != OK) { + CHECK(mBuffer == NULL); + return err; + } + } + + if (!mIsAVC || mWantsNALFragments) { + if (newBuffer) { + ssize_t num_bytes_read = + mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); + + if (num_bytes_read < (ssize_t)size) { + mBuffer->release(); + mBuffer = NULL; + + return ERROR_IO; + } + + CHECK(mBuffer != NULL); + mBuffer->set_range(0, size); + mBuffer->meta_data()->clear(); + mBuffer->meta_data()->setInt64( + kKeyTime, ((int64_t)cts * 1000000) / mTimescale); + + if (targetSampleTimeUs >= 0) { + mBuffer->meta_data()->setInt64( + kKeyTargetTime, targetSampleTimeUs); + } + + if (isSyncSample) { + mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); + } + + ++mCurrentSampleIndex; + } + + if (!mIsAVC) { + *out = mBuffer; + mBuffer = NULL; + + return OK; + } + + // Each NAL unit is split up into its constituent fragments and + // each one of them returned in its own buffer. + + CHECK(mBuffer->range_length() >= mNALLengthSize); + + const uint8_t *src = + (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); + + size_t nal_size = parseNALSize(src); + if (mBuffer->range_length() < mNALLengthSize + nal_size) { + ALOGE("incomplete NAL unit."); + + mBuffer->release(); + mBuffer = NULL; + + return ERROR_MALFORMED; + } + + MediaBuffer *clone = mBuffer->clone(); + CHECK(clone != NULL); + clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); + + CHECK(mBuffer != NULL); + mBuffer->set_range( + mBuffer->range_offset() + mNALLengthSize + nal_size, + mBuffer->range_length() - mNALLengthSize - nal_size); + + if (mBuffer->range_length() == 0) { + mBuffer->release(); + mBuffer = NULL; + } + + *out = clone; + + return OK; + } else { + // Whole NAL units are returned but each fragment is prefixed by + // the start code (0x00 00 00 01). + ssize_t num_bytes_read = 0; + int32_t drm = 0; + bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); + if (usesDRM) { + num_bytes_read = + mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size); + } else { + num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size); + } + + if (num_bytes_read < (ssize_t)size) { + mBuffer->release(); + mBuffer = NULL; + + return ERROR_IO; + } + + if (usesDRM) { + CHECK(mBuffer != NULL); + mBuffer->set_range(0, size); + + } else { + uint8_t *dstData = (uint8_t *)mBuffer->data(); + size_t srcOffset = 0; + size_t dstOffset = 0; + + while (srcOffset < size) { + bool isMalFormed = (srcOffset + mNALLengthSize > size); + size_t nalLength = 0; + if (!isMalFormed) { + nalLength = parseNALSize(&mSrcBuffer[srcOffset]); + srcOffset += mNALLengthSize; + isMalFormed = srcOffset + nalLength > size; + } + + if (isMalFormed) { + ALOGE("Video is malformed"); + mBuffer->release(); + mBuffer = NULL; + return ERROR_MALFORMED; + } + + if (nalLength == 0) { + continue; + } + + CHECK(dstOffset + 4 <= mBuffer->size()); + + dstData[dstOffset++] = 0; + dstData[dstOffset++] = 0; + dstData[dstOffset++] = 0; + dstData[dstOffset++] = 1; + memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); + srcOffset += nalLength; + dstOffset += nalLength; + } + CHECK_EQ(srcOffset, size); + CHECK(mBuffer != NULL); + mBuffer->set_range(0, dstOffset); + } + + mBuffer->meta_data()->clear(); + mBuffer->meta_data()->setInt64( + kKeyTime, ((int64_t)cts * 1000000) / mTimescale); + + if (targetSampleTimeUs >= 0) { + mBuffer->meta_data()->setInt64( + kKeyTargetTime, targetSampleTimeUs); + } + + if (isSyncSample) { + mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); + } + + ++mCurrentSampleIndex; + + *out = mBuffer; + mBuffer = NULL; + + return OK; + } +} + +MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix( + const char *mimePrefix) { + for (Track *track = mFirstTrack; track != NULL; track = track->next) { + const char *mime; + if (track->meta != NULL + && track->meta->findCString(kKeyMIMEType, &mime) + && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) { + return track; + } + } + + return NULL; +} + +static bool LegacySniffMPEG4( + const sp<DataSource> &source, String8 *mimeType, float *confidence) { + uint8_t header[8]; + + ssize_t n = source->readAt(4, header, sizeof(header)); + if (n < (ssize_t)sizeof(header)) { + return false; + } + + if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8) + || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8) + || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8) + || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8) + || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8) + || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)) { + *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; + *confidence = 0.4; + + return true; + } + + return false; +} + +static bool isCompatibleBrand(uint32_t fourcc) { + static const uint32_t kCompatibleBrands[] = { + FOURCC('i', 's', 'o', 'm'), + FOURCC('i', 's', 'o', '2'), + FOURCC('a', 'v', 'c', '1'), + FOURCC('3', 'g', 'p', '4'), + FOURCC('m', 'p', '4', '1'), + FOURCC('m', 'p', '4', '2'), + + // Won't promise that the following file types can be played. + // Just give these file types a chance. + FOURCC('q', 't', ' ', ' '), // Apple's QuickTime + FOURCC('M', 'S', 'N', 'V'), // Sony's PSP + + FOURCC('3', 'g', '2', 'a'), // 3GPP2 + FOURCC('3', 'g', '2', 'b'), + }; + + for (size_t i = 0; + i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]); + ++i) { + if (kCompatibleBrands[i] == fourcc) { + return true; + } + } + + return false; +} + +// Attempt to actually parse the 'ftyp' atom and determine if a suitable +// compatible brand is present. +// Also try to identify where this file's metadata ends +// (end of the 'moov' atom) and report it to the caller as part of +// the metadata. +static bool BetterSniffMPEG4( + const sp<DataSource> &source, String8 *mimeType, float *confidence, + sp<AMessage> *meta) { + // We scan up to 128 bytes to identify this file as an MP4. + static const off64_t kMaxScanOffset = 128ll; + + off64_t offset = 0ll; + bool foundGoodFileType = false; + off64_t moovAtomEndOffset = -1ll; + bool done = false; + + while (!done && offset < kMaxScanOffset) { + uint32_t hdr[2]; + if (source->readAt(offset, hdr, 8) < 8) { + return false; + } + + uint64_t chunkSize = ntohl(hdr[0]); + uint32_t chunkType = ntohl(hdr[1]); + off64_t chunkDataOffset = offset + 8; + + if (chunkSize == 1) { + if (source->readAt(offset + 8, &chunkSize, 8) < 8) { + return false; + } + + chunkSize = ntoh64(chunkSize); + chunkDataOffset += 8; + + if (chunkSize < 16) { + // The smallest valid chunk is 16 bytes long in this case. + return false; + } + } else if (chunkSize < 8) { + // The smallest valid chunk is 8 bytes long. + return false; + } + + off64_t chunkDataSize = offset + chunkSize - chunkDataOffset; + + switch (chunkType) { + case FOURCC('f', 't', 'y', 'p'): + { + if (chunkDataSize < 8) { + return false; + } + + uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4; + for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { + if (i == 1) { + // Skip this index, it refers to the minorVersion, + // not a brand. + continue; + } + + uint32_t brand; + if (source->readAt( + chunkDataOffset + 4 * i, &brand, 4) < 4) { + return false; + } + + brand = ntohl(brand); + + if (isCompatibleBrand(brand)) { + foundGoodFileType = true; + break; + } + } + + if (!foundGoodFileType) { + return false; + } + + break; + } + + case FOURCC('m', 'o', 'o', 'v'): + { + moovAtomEndOffset = offset + chunkSize; + + done = true; + break; + } + + default: + break; + } + + offset += chunkSize; + } + + if (!foundGoodFileType) { + return false; + } + + *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; + *confidence = 0.4f; + + if (moovAtomEndOffset >= 0) { + *meta = new AMessage; + (*meta)->setInt64("meta-data-size", moovAtomEndOffset); + + ALOGV("found metadata size: %lld", moovAtomEndOffset); + } + + return true; +} + +bool SniffMPEG4( + const sp<DataSource> &source, String8 *mimeType, float *confidence, + sp<AMessage> *meta) { + if (BetterSniffMPEG4(source, mimeType, confidence, meta)) { + return true; + } + + if (LegacySniffMPEG4(source, mimeType, confidence)) { + ALOGW("Identified supported mpeg4 through LegacySniffMPEG4."); + return true; + } + + return false; +} + +} // namespace android |