1 files changed, 513 insertions, 93 deletions
diff --git a/media/libstagefright/MPEG4Extractor.cpp b/media/libstagefright/MPEG4Extractor.cpp
index e9ecc45..e09ecc2 100644
--- a/media/libstagefright/MPEG4Extractor.cpp
+++ b/media/libstagefright/MPEG4Extractor.cpp
@@ -16,17 +16,19 @@
 
 //#define LOG_NDEBUG 0
 #define LOG_TAG "MPEG4Extractor"
-#include <utils/Log.h>
-
-#include "include/MPEG4Extractor.h"
-#include "include/SampleTable.h"
-#include "include/ESDS.h"
 
 #include <ctype.h>
+#include <inttypes.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 
+#include <utils/Log.h>
+
+#include "include/MPEG4Extractor.h"
+#include "include/SampleTable.h"
+#include "include/ESDS.h"
+
 #include <media/stagefright/foundation/ABitReader.h>
 #include <media/stagefright/foundation/ABuffer.h>
 #include <media/stagefright/foundation/ADebug.h>
@@ -51,11 +53,13 @@ namespace android {
 class MPEG4Source : public MediaSource {
 public:
     // Caller retains ownership of both "dataSource" and "sampleTable".
-    MPEG4Source(const sp<MetaData> &format,
+    MPEG4Source(const sp<MPEG4Extractor> &owner,
+                const sp<MetaData> &format,
                 const sp<DataSource> &dataSource,
                 int32_t timeScale,
                 const sp<SampleTable> &sampleTable,
                 Vector<SidxEntry> &sidx,
+                const Trex *trex,
                 off64_t firstMoofOffset);
 
     virtual status_t start(MetaData *params = NULL);
@@ -72,6 +76,8 @@ protected:
 private:
     Mutex mLock;
 
+    // keep the MPEG4Extractor around, since we're referencing its data
+    sp<MPEG4Extractor> mOwner;
     sp<MetaData> mFormat;
     sp<DataSource> mDataSource;
     int32_t mTimescale;
@@ -79,6 +85,7 @@ private:
     uint32_t mCurrentSampleIndex;
     uint32_t mCurrentFragmentIndex;
     Vector<SidxEntry> &mSegments;
+    const Trex *mTrex;
     off64_t mFirstMoofOffset;
     off64_t mCurrentMoofOffset;
     off64_t mNextMoofOffset;
@@ -100,6 +107,7 @@ private:
     uint64_t* mCurrentSampleInfoOffsets;
 
     bool mIsAVC;
+    bool mIsHEVC;
     size_t mNALLengthSize;
 
     bool mStarted;
@@ -145,6 +153,7 @@ private:
         off64_t offset;
         size_t size;
         uint32_t duration;
+        int32_t compositionOffset;
         uint8_t iv[16];
         Vector<size_t> clearsizes;
         Vector<size_t> encryptedsizes;
@@ -264,7 +273,7 @@ static void hexdump(const void *_data, size_t size) {
     const uint8_t *data = (const uint8_t *)_data;
     size_t offset = 0;
     while (offset < size) {
-        printf("0x%04x  ", offset);
+        printf("0x%04zx  ", offset);
 
         size_t n = size - offset;
         if (n > 16) {
@@ -321,6 +330,9 @@ static const char *FourCC2MIME(uint32_t fourcc) {
         case FOURCC('a', 'v', 'c', '1'):
             return MEDIA_MIMETYPE_VIDEO_AVC;
 
+        case FOURCC('h', 'v', 'c', '1'):
+        case FOURCC('h', 'e', 'v', '1'):
+            return MEDIA_MIMETYPE_VIDEO_HEVC;
         default:
             CHECK(!"should not be here.");
             return NULL;
@@ -343,8 +355,7 @@ static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t
 }
 
 MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source)
-    : mSidxDuration(0),
-      mMoofOffset(0),
+    : mMoofOffset(0),
       mDataSource(source),
       mInitCheck(NO_INIT),
       mHasVideo(false),
@@ -409,7 +420,7 @@ size_t MPEG4Extractor::countTracks() {
         track = track->next;
     }
 
-    ALOGV("MPEG4Extractor::countTracks: %d tracks", n);
+    ALOGV("MPEG4Extractor::countTracks: %zu tracks", n);
     return n;
 }
 
@@ -482,8 +493,18 @@ status_t MPEG4Extractor::readMetaData() {
     off64_t offset = 0;
     status_t err;
     while (true) {
+        off64_t orig_offset = offset;
         err = parseChunk(&offset, 0);
-        if (err == OK) {
+
+        if (err != OK && err != UNKNOWN_ERROR) {
+            break;
+        } else if (offset <= orig_offset) {
+            // only continue parsing if the offset was advanced,
+            // otherwise we might end up in an infinite loop
+            ALOGE("did not advance: 0x%lld->0x%lld", orig_offset, offset);
+            err = ERROR_MALFORMED;
+            break;
+        } else if (err == OK) {
             continue;
         }
 
@@ -492,12 +513,12 @@ status_t MPEG4Extractor::readMetaData() {
             break;
         }
         uint32_t chunk_type = ntohl(hdr[1]);
-        if (chunk_type == FOURCC('s', 'i', 'd', 'x')) {
-            // parse the sidx box too
-            continue;
-        } else if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
+        if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
             // store the offset of the first segment
             mMoofOffset = offset;
+        } else if (chunk_type != FOURCC('m', 'd', 'a', 't')) {
+            // keep parsing until we get to the data
+            continue;
         }
         break;
     }
@@ -509,8 +530,6 @@ status_t MPEG4Extractor::readMetaData() {
         } else {
             mFileMetaData->setCString(kKeyMIMEType, "audio/mp4");
         }
-
-        mInitCheck = OK;
     } else {
         mInitCheck = err;
     }
@@ -575,7 +594,8 @@ static int32_t readSize(off64_t offset,
     return size;
 }
 
-status_t MPEG4Extractor::parseDrmSINF(off64_t *offset, off64_t data_offset) {
+status_t MPEG4Extractor::parseDrmSINF(
+        off64_t * /* offset */, off64_t data_offset) {
     uint8_t updateIdTag;
     if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
         return ERROR_IO;
@@ -764,8 +784,25 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
             // The smallest valid chunk is 16 bytes long in this case.
             return ERROR_MALFORMED;
         }
+    } else if (chunk_size == 0) {
+        if (depth == 0) {
+            // atom extends to end of file
+            off64_t sourceSize;
+            if (mDataSource->getSize(&sourceSize) == OK) {
+                chunk_size = (sourceSize - *offset);
+            } else {
+                // XXX could we just pick a "sufficiently large" value here?
+                ALOGE("atom size is 0, and data source has no size");
+                return ERROR_MALFORMED;
+            }
+        } else {
+            // not allowed for non-toplevel atoms, skip it
+            *offset += 4;
+            return OK;
+        }
     } else if (chunk_size < 8) {
         // The smallest valid chunk is 8 bytes long.
+        ALOGE("invalid chunk size: %" PRIu64, chunk_size);
         return ERROR_MALFORMED;
     }
 
@@ -776,7 +813,7 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
 #if 0
     static const char kWhitespace[] = "                                        ";
     const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
-    printf("%sfound chunk '%s' of size %lld\n", indent, chunk, chunk_size);
+    printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size);
 
     char buffer[256];
     size_t n = chunk_size;
@@ -832,7 +869,7 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
         case FOURCC('e', 'd', 't', 's'):
         {
             if (chunk_type == FOURCC('s', 't', 'b', 'l')) {
-                ALOGV("sampleTable chunk is %d bytes long.", (size_t)chunk_size);
+                ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size);
 
                 if (mDataSource->flags()
                         & (DataSource::kWantsPrefetching
@@ -919,6 +956,8 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
 
         case FOURCC('e', 'l', 's', 't'):
         {
+            *offset += chunk_size;
+
             // See 14496-12 8.6.6
             uint8_t version;
             if (mDataSource->readAt(data_offset, &version, 1) < 1) {
@@ -981,12 +1020,13 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
                     mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples);
                 }
             }
-            *offset += chunk_size;
             break;
         }
 
         case FOURCC('f', 'r', 'm', 'a'):
         {
+            *offset += chunk_size;
+
             uint32_t original_fourcc;
             if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
                 return ERROR_IO;
@@ -1000,12 +1040,13 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
                 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
                 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
             }
-            *offset += chunk_size;
             break;
         }
 
         case FOURCC('t', 'e', 'n', 'c'):
         {
+            *offset += chunk_size;
+
             if (chunk_size < 32) {
                 return ERROR_MALFORMED;
             }
@@ -1050,23 +1091,25 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
             mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId);
             mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize);
             mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16);
-            *offset += chunk_size;
             break;
         }
 
         case FOURCC('t', 'k', 'h', 'd'):
         {
+            *offset += chunk_size;
+
             status_t err;
             if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
                 return err;
             }
 
-            *offset += chunk_size;
             break;
         }
 
         case FOURCC('p', 's', 's', 'h'):
         {
+            *offset += chunk_size;
+
             PsshInfo pssh;
 
             if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
@@ -1095,12 +1138,13 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
             }
             mPssh.push_back(pssh);
 
-            *offset += chunk_size;
             break;
         }
 
         case FOURCC('m', 'd', 'h', 'd'):
         {
+            *offset += chunk_size;
+
             if (chunk_data_size < 4) {
                 return ERROR_MALFORMED;
             }
@@ -1131,6 +1175,8 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
 
             mLastTrack->timescale = ntohl(timescale);
 
+            // 14496-12 says all ones means indeterminate, but some files seem to use
+            // 0 instead. We treat both the same.
             int64_t duration = 0;
             if (version == 1) {
                 if (mDataSource->readAt(
@@ -1138,7 +1184,9 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
                         < (ssize_t)sizeof(duration)) {
                     return ERROR_IO;
                 }
-                duration = ntoh64(duration);
+                if (duration != -1) {
+                    duration = ntoh64(duration);
+                }
             } else {
                 uint32_t duration32;
                 if (mDataSource->readAt(
@@ -1146,13 +1194,14 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
                         < (ssize_t)sizeof(duration32)) {
                     return ERROR_IO;
                 }
-                // ffmpeg sets duration to -1, which is incorrect.
                 if (duration32 != 0xffffffff) {
                     duration = ntohl(duration32);
                 }
             }
-            mLastTrack->meta->setInt64(
-                    kKeyDuration, (duration * 1000000) / mLastTrack->timescale);
+            if (duration != 0) {
+                mLastTrack->meta->setInt64(
+                        kKeyDuration, (duration * 1000000) / mLastTrack->timescale);
+            }
 
             uint8_t lang[2];
             off64_t lang_offset;
@@ -1181,7 +1230,6 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
             mLastTrack->meta->setCString(
                     kKeyMediaLanguage, lang_code);
 
-            *offset += chunk_size;
             break;
         }
 
@@ -1291,6 +1339,8 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
         case FOURCC('H', '2', '6', '3'):
         case FOURCC('h', '2', '6', '3'):
         case FOURCC('a', 'v', 'c', '1'):
+        case FOURCC('h', 'v', 'c', '1'):
+        case FOURCC('h', 'e', 'v', '1'):
         {
             mHasVideo = true;
 
@@ -1348,11 +1398,12 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
                 mLastTrack->sampleTable->setChunkOffsetParams(
                         chunk_type, data_offset, chunk_data_size);
 
+            *offset += chunk_size;
+
             if (err != OK) {
                 return err;
             }
 
-            *offset += chunk_size;
             break;
         }
 
@@ -1362,11 +1413,12 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
                 mLastTrack->sampleTable->setSampleToChunkParams(
                         data_offset, chunk_data_size);
 
+            *offset += chunk_size;
+
             if (err != OK) {
                 return err;
             }
 
-            *offset += chunk_size;
             break;
         }
 
@@ -1377,6 +1429,8 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
                 mLastTrack->sampleTable->setSampleSizeParams(
                         chunk_type, data_offset, chunk_data_size);
 
+            *offset += chunk_size;
+
             if (err != OK) {
                 return err;
             }
@@ -1429,7 +1483,6 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
                 }
                 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size);
             }
-            *offset += chunk_size;
 
             // NOTE: setting another piece of metadata invalidates any pointers (such as the
             // mimetype) previously obtained, so don't cache them.
@@ -1453,6 +1506,8 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
 
         case FOURCC('s', 't', 't', 's'):
         {
+            *offset += chunk_size;
+
             status_t err =
                 mLastTrack->sampleTable->setTimeToSampleParams(
                         data_offset, chunk_data_size);
@@ -1461,12 +1516,13 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
                 return err;
             }
 
-            *offset += chunk_size;
             break;
         }
 
         case FOURCC('c', 't', 't', 's'):
         {
+            *offset += chunk_size;
+
             status_t err =
                 mLastTrack->sampleTable->setCompositionTimeToSampleParams(
                         data_offset, chunk_data_size);
@@ -1475,12 +1531,13 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
                 return err;
             }
 
-            *offset += chunk_size;
             break;
         }
 
         case FOURCC('s', 't', 's', 's'):
         {
+            *offset += chunk_size;
+
             status_t err =
                 mLastTrack->sampleTable->setSyncSampleParams(
                         data_offset, chunk_data_size);
@@ -1489,13 +1546,14 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
                 return err;
             }
 
-            *offset += chunk_size;
             break;
         }
 
         // @xyz
         case FOURCC('\xA9', 'x', 'y', 'z'):
         {
+            *offset += chunk_size;
+
             // Best case the total data length inside "@xyz" box
             // would be 8, for instance "@xyz" + "\x00\x04\x15\xc7" + "0+0/",
             // where "\x00\x04" is the text string length with value = 4,
@@ -1524,12 +1582,13 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
 
             buffer[location_length] = '\0';
             mFileMetaData->setCString(kKeyLocation, buffer);
-            *offset += chunk_size;
             break;
         }
 
         case FOURCC('e', 's', 'd', 's'):
         {
+            *offset += chunk_size;
+
             if (chunk_data_size < 4) {
                 return ERROR_MALFORMED;
             }
@@ -1567,12 +1626,13 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
                 }
             }
 
-            *offset += chunk_size;
             break;
         }
 
         case FOURCC('a', 'v', 'c', 'C'):
         {
+            *offset += chunk_size;
+
             sp<ABuffer> buffer = new ABuffer(chunk_data_size);
 
             if (mDataSource->readAt(
@@ -1583,12 +1643,27 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
             mLastTrack->meta->setData(
                     kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size);
 
+            break;
+        }
+        case FOURCC('h', 'v', 'c', 'C'):
+        {
+            sp<ABuffer> buffer = new ABuffer(chunk_data_size);
+
+            if (mDataSource->readAt(
+                        data_offset, buffer->data(), chunk_data_size) < chunk_data_size) {
+                return ERROR_IO;
+            }
+
+            mLastTrack->meta->setData(
+                    kKeyHVCC, kTypeHVCC, buffer->data(), chunk_data_size);
+
             *offset += chunk_size;
             break;
         }
 
         case FOURCC('d', '2', '6', '3'):
         {
+            *offset += chunk_size;
             /*
              * d263 contains a fixed 7 bytes part:
              *   vendor - 4 bytes
@@ -1614,7 +1689,6 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
 
             mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size);
 
-            *offset += chunk_size;
             break;
         }
 
@@ -1622,11 +1696,13 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
         {
             uint8_t buffer[4];
             if (chunk_data_size < (off64_t)sizeof(buffer)) {
+                *offset += chunk_size;
                 return ERROR_MALFORMED;
             }
 
             if (mDataSource->readAt(
                         data_offset, buffer, 4) < 4) {
+                *offset += chunk_size;
                 return ERROR_IO;
             }
 
@@ -1660,6 +1736,8 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
         case FOURCC('n', 'a', 'm', 'e'):
         case FOURCC('d', 'a', 't', 'a'):
         {
+            *offset += chunk_size;
+
             if (mPath.size() == 6 && underMetaDataPath(mPath)) {
                 status_t err = parseITunesMetaData(data_offset, chunk_data_size);
 
@@ -1668,17 +1746,18 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
                 }
             }
 
-            *offset += chunk_size;
             break;
         }
 
         case FOURCC('m', 'v', 'h', 'd'):
         {
-            if (chunk_data_size < 24) {
+            *offset += chunk_size;
+
+            if (chunk_data_size < 32) {
                 return ERROR_MALFORMED;
             }
 
-            uint8_t header[24];
+            uint8_t header[32];
             if (mDataSource->readAt(
                         data_offset, header, sizeof(header))
                     < (ssize_t)sizeof(header)) {
@@ -1686,14 +1765,27 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
             }
 
             uint64_t creationTime;
+            uint64_t duration = 0;
             if (header[0] == 1) {
                 creationTime = U64_AT(&header[4]);
                 mHeaderTimescale = U32_AT(&header[20]);
+                duration = U64_AT(&header[24]);
+                if (duration == 0xffffffffffffffff) {
+                    duration = 0;
+                }
             } else if (header[0] != 0) {
                 return ERROR_MALFORMED;
             } else {
                 creationTime = U32_AT(&header[4]);
                 mHeaderTimescale = U32_AT(&header[12]);
+                uint32_t d32 = U32_AT(&header[16]);
+                if (d32 == 0xffffffff) {
+                    d32 = 0;
+                }
+                duration = d32;
+            }
+            if (duration != 0) {
+                mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale);
             }
 
             String8 s;
@@ -1701,7 +1793,50 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
 
             mFileMetaData->setCString(kKeyDate, s.string());
 
+            break;
+        }
+
+        case FOURCC('m', 'e', 'h', 'd'):
+        {
             *offset += chunk_size;
+
+            if (chunk_data_size < 8) {
+                return ERROR_MALFORMED;
+            }
+
+            uint8_t flags[4];
+            if (mDataSource->readAt(
+                        data_offset, flags, sizeof(flags))
+                    < (ssize_t)sizeof(flags)) {
+                return ERROR_IO;
+            }
+
+            uint64_t duration = 0;
+            if (flags[0] == 1) {
+                // 64 bit
+                if (chunk_data_size < 12) {
+                    return ERROR_MALFORMED;
+                }
+                mDataSource->getUInt64(data_offset + 4, &duration);
+                if (duration == 0xffffffffffffffff) {
+                    duration = 0;
+                }
+            } else if (flags[0] == 0) {
+                // 32 bit
+                uint32_t d32;
+                mDataSource->getUInt32(data_offset + 4, &d32);
+                if (d32 == 0xffffffff) {
+                    d32 = 0;
+                }
+                duration = d32;
+            } else {
+                return ERROR_MALFORMED;
+            }
+
+            if (duration != 0) {
+                mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale);
+            }
+
             break;
         }
 
@@ -1722,6 +1857,8 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
 
         case FOURCC('h', 'd', 'l', 'r'):
         {
+            *offset += chunk_size;
+
             uint32_t buffer;
             if (mDataSource->readAt(
                         data_offset + 8, &buffer, 4) < 4) {
@@ -1736,7 +1873,26 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
                 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP);
             }
 
+            break;
+        }
+
+        case FOURCC('t', 'r', 'e', 'x'):
+        {
             *offset += chunk_size;
+
+            if (chunk_data_size < 24) {
+                return ERROR_IO;
+            }
+            uint32_t duration;
+            Trex trex;
+            if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) ||
+                !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) ||
+                !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) ||
+                !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) ||
+                !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) {
+                return ERROR_IO;
+            }
+            mTrex.add(trex);
             break;
         }
 
@@ -1753,6 +1909,7 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
             if ((chunk_size > SIZE_MAX) || (SIZE_MAX - chunk_size <= size)) {
                 return ERROR_MALFORMED;
             }
+
             uint8_t *buffer = new (std::nothrow) uint8_t[size + chunk_size];
             if (buffer == NULL) {
                 return ERROR_MALFORMED;
@@ -1767,6 +1924,8 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
                 delete[] buffer;
                 buffer = NULL;
 
+                // advance read pointer so we don't end up reading this again
+                *offset += chunk_size;
                 return ERROR_IO;
             }
 
@@ -1781,6 +1940,8 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
 
         case FOURCC('c', 'o', 'v', 'r'):
         {
+            *offset += chunk_size;
+
             if (mFileMetaData != NULL) {
                 ALOGV("chunk_data_size = %lld and data_offset = %lld",
                         chunk_data_size, data_offset);
@@ -1803,7 +1964,6 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
                     buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
             }
 
-            *offset += chunk_size;
             break;
         }
 
@@ -1814,25 +1974,27 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
         case FOURCC('a', 'l', 'b', 'm'):
         case FOURCC('y', 'r', 'r', 'c'):
         {
+            *offset += chunk_size;
+
             status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
 
             if (err != OK) {
                 return err;
             }
 
-            *offset += chunk_size;
             break;
         }
 
         case FOURCC('I', 'D', '3', '2'):
         {
+            *offset += chunk_size;
+
             if (chunk_data_size < 6) {
                 return ERROR_MALFORMED;
             }
 
             parseID3v2MetaData(data_offset + 6);
 
-            *offset += chunk_size;
             break;
         }
 
@@ -1924,7 +2086,7 @@ status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
         offset += 16;
         size -= 16;
     }
-    ALOGV("sidx pres/off: %Ld/%Ld", earliestPresentationTime, firstOffset);
+    ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset);
 
     if (size < 4) {
         return -EINVAL;
@@ -1956,9 +2118,10 @@ status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
             ALOGW("sub-sidx boxes not supported yet");
         }
         bool sap = d3 & 0x80000000;
-        bool saptype = d3 >> 28;
-        if (!sap || saptype > 2) {
-            ALOGW("not a stream access point, or unsupported type");
+        uint32_t saptype = (d3 >> 28) & 7;
+        if (!sap || (saptype != 1 && saptype != 2)) {
+            // type 1 and 2 are sync samples
+            ALOGW("not a stream access point, or unsupported type: %08x", d3);
         }
         total_duration += d2;
         offset += 12;
@@ -1969,12 +2132,11 @@ status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
         mSidxEntries.add(se);
     }
 
-    mSidxDuration = total_duration * 1000000 / timeScale;
-    ALOGV("duration: %lld", mSidxDuration);
+    uint64_t sidxDuration = total_duration * 1000000 / timeScale;
 
     int64_t metaDuration;
     if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) {
-        mLastTrack->meta->setInt64(kKeyDuration, mSidxDuration);
+        mLastTrack->meta->setInt64(kKeyDuration, sidxDuration);
     }
     return OK;
 }
@@ -2261,11 +2423,14 @@ status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) {
 }
 
 status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) {
-    if (size < 4) {
+    if (size < 4 || size == SIZE_MAX) {
         return ERROR_MALFORMED;
     }
 
-    uint8_t *buffer = new uint8_t[size];
+    uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
+    if (buffer == NULL) {
+        return ERROR_MALFORMED;
+    }
     if (mDataSource->readAt(
                 offset, buffer, size) != (ssize_t)size) {
         delete[] buffer;
@@ -2331,6 +2496,10 @@ status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int dept
         int len16 = 0; // Number of UTF-16 characters
 
         // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00
+        if (size < 6) {
+            return ERROR_MALFORMED;
+        }
+
         if (size - 6 >= 4) {
             len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator
             framedata = (char16_t *)(buffer + 6);
@@ -2354,6 +2523,7 @@ status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int dept
         }
 
         if (isUTF8) {
+            buffer[size] = 0;
             mFileMetaData->setCString(metadataKey, (const char *)buffer + 6);
         } else {
             // Convert from UTF-16 string to UTF-8 string.
@@ -2444,11 +2614,24 @@ sp<MediaSource> MPEG4Extractor::getTrack(size_t index) {
         return NULL;
     }
 
-    ALOGV("getTrack called, pssh: %d", mPssh.size());
 
-    return new MPEG4Source(
+    Trex *trex = NULL;
+    int32_t trackId;
+    if (track->meta->findInt32(kKeyTrackID, &trackId)) {
+        for (size_t i = 0; i < mTrex.size(); i++) {
+            Trex *t = &mTrex.editItemAt(index);
+            if (t->track_ID == (uint32_t) trackId) {
+                trex = t;
+                break;
+            }
+        }
+    }
+
+    ALOGV("getTrack called, pssh: %zu", mPssh.size());
+
+    return new MPEG4Source(this,
             track->meta, mDataSource, track->timescale, track->sampleTable,
-            mSidxEntries, mMoofOffset);
+            mSidxEntries, trex, mMoofOffset);
 }
 
 // static
@@ -2464,6 +2647,11 @@ status_t MPEG4Extractor::verifyTrack(Track *track) {
                 || type != kTypeAVCC) {
             return ERROR_MALFORMED;
         }
+    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
+        if (!track->meta->findData(kKeyHVCC, &type, &data, &size)
+                    || type != kTypeHVCC) {
+            return ERROR_MALFORMED;
+        }
     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
             || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
         if (!track->meta->findData(kKeyESDS, &type, &data, &size)
@@ -2472,14 +2660,67 @@ status_t MPEG4Extractor::verifyTrack(Track *track) {
         }
     }
 
-    if (!track->sampleTable->isValid()) {
+    if (track->sampleTable == NULL || !track->sampleTable->isValid()) {
         // Make sure we have all the metadata we need.
+        ALOGE("stbl atom missing/invalid.");
         return ERROR_MALFORMED;
     }
 
     return OK;
 }
 
+typedef enum {
+    //AOT_NONE             = -1,
+    //AOT_NULL_OBJECT      = 0,
+    //AOT_AAC_MAIN         = 1, /**< Main profile                              */
+    AOT_AAC_LC           = 2,   /**< Low Complexity object                     */
+    //AOT_AAC_SSR          = 3,
+    //AOT_AAC_LTP          = 4,
+    AOT_SBR              = 5,
+    //AOT_AAC_SCAL         = 6,
+    //AOT_TWIN_VQ          = 7,
+    //AOT_CELP             = 8,
+    //AOT_HVXC             = 9,
+    //AOT_RSVD_10          = 10, /**< (reserved)                                */
+    //AOT_RSVD_11          = 11, /**< (reserved)                                */
+    //AOT_TTSI             = 12, /**< TTSI Object                               */
+    //AOT_MAIN_SYNTH       = 13, /**< Main Synthetic object                     */
+    //AOT_WAV_TAB_SYNTH    = 14, /**< Wavetable Synthesis object                */
+    //AOT_GEN_MIDI         = 15, /**< General MIDI object                       */
+    //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
+    AOT_ER_AAC_LC        = 17,   /**< Error Resilient(ER) AAC Low Complexity    */
+    //AOT_RSVD_18          = 18, /**< (reserved)                                */
+    //AOT_ER_AAC_LTP       = 19, /**< Error Resilient(ER) AAC LTP object        */
+    AOT_ER_AAC_SCAL      = 20,   /**< Error Resilient(ER) AAC Scalable object   */
+    //AOT_ER_TWIN_VQ       = 21, /**< Error Resilient(ER) TwinVQ object         */
+    AOT_ER_BSAC          = 22,   /**< Error Resilient(ER) BSAC object           */
+    AOT_ER_AAC_LD        = 23,   /**< Error Resilient(ER) AAC LowDelay object   */
+    //AOT_ER_CELP          = 24, /**< Error Resilient(ER) CELP object           */
+    //AOT_ER_HVXC          = 25, /**< Error Resilient(ER) HVXC object           */
+    //AOT_ER_HILN          = 26, /**< Error Resilient(ER) HILN object           */
+    //AOT_ER_PARA          = 27, /**< Error Resilient(ER) Parametric object     */
+    //AOT_RSVD_28          = 28, /**< might become SSC                          */
+    AOT_PS               = 29,   /**< PS, Parametric Stereo (includes SBR)      */
+    //AOT_MPEGS            = 30, /**< MPEG Surround                             */
+
+    AOT_ESCAPE           = 31,   /**< Signal AOT uses more than 5 bits          */
+
+    //AOT_MP3ONMP4_L1      = 32, /**< MPEG-Layer1 in mp4                        */
+    //AOT_MP3ONMP4_L2      = 33, /**< MPEG-Layer2 in mp4                        */
+    //AOT_MP3ONMP4_L3      = 34, /**< MPEG-Layer3 in mp4                        */
+    //AOT_RSVD_35          = 35, /**< might become DST                          */
+    //AOT_RSVD_36          = 36, /**< might become ALS                          */
+    //AOT_AAC_SLS          = 37, /**< AAC + SLS                                 */
+    //AOT_SLS              = 38, /**< SLS                                       */
+    //AOT_ER_AAC_ELD       = 39, /**< AAC Enhanced Low Delay                    */
+
+    //AOT_USAC             = 42, /**< USAC                                      */
+    //AOT_SAOC             = 43, /**< SAOC                                      */
+    //AOT_LD_MPEGS         = 44, /**< Low Delay MPEG Surround                   */
+
+    //AOT_RSVD50           = 50,  /**< Interim AOT for Rsvd50                   */
+} AUDIO_OBJECT_TYPE;
+
 status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
         const void *esds_data, size_t esds_size) {
     ESDS esds(esds_data, esds_size);
@@ -2562,7 +2803,7 @@ status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
         sampleRate = kSamplingRate[freqIndex];
     }
 
-    if (objectType == 5 || objectType == 29) { // SBR specific config per 14496-3 table 1.13
+    if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13
         uint32_t extFreqIndex = br.getBits(4);
         int32_t extSampleRate;
         if (extFreqIndex == 15) {
@@ -2580,6 +2821,131 @@ status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
         //      mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate);
     }
 
+    switch (numChannels) {
+        // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
+        case 0:
+        case 1:// FC
+        case 2:// FL FR
+        case 3:// FC, FL FR
+        case 4:// FC, FL FR, RC
+        case 5:// FC, FL FR, SL SR
+        case 6:// FC, FL FR, SL SR, LFE
+            //numChannels already contains the right value
+            break;
+        case 11:// FC, FL FR, SL SR, RC, LFE
+            numChannels = 7;
+            break;
+        case 7: // FC, FCL FCR, FL FR, SL SR, LFE
+        case 12:// FC, FL  FR,  SL SR, RL RR, LFE
+        case 14:// FC, FL  FR,  SL SR, LFE, FHL FHR
+            numChannels = 8;
+            break;
+        default:
+            return ERROR_UNSUPPORTED;
+    }
+
+    {
+        if (objectType == AOT_SBR || objectType == AOT_PS) {
+            objectType = br.getBits(5);
+
+            if (objectType == AOT_ESCAPE) {
+                objectType = 32 + br.getBits(6);
+            }
+        }
+        if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
+                objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
+                objectType == AOT_ER_BSAC) {
+            const int32_t frameLengthFlag = br.getBits(1);
+
+            const int32_t dependsOnCoreCoder = br.getBits(1);
+
+            if (dependsOnCoreCoder ) {
+                const int32_t coreCoderDelay = br.getBits(14);
+            }
+
+            int32_t extensionFlag = -1;
+            if (br.numBitsLeft() > 0) {
+                extensionFlag = br.getBits(1);
+            } else {
+                switch (objectType) {
+                // 14496-3 4.5.1.1 extensionFlag
+                case AOT_AAC_LC:
+                    extensionFlag = 0;
+                    break;
+                case AOT_ER_AAC_LC:
+                case AOT_ER_AAC_SCAL:
+                case AOT_ER_BSAC:
+                case AOT_ER_AAC_LD:
+                    extensionFlag = 1;
+                    break;
+                default:
+                    TRESPASS();
+                    break;
+                }
+                ALOGW("csd missing extension flag; assuming %d for object type %u.",
+                        extensionFlag, objectType);
+            }
+
+            if (numChannels == 0) {
+                int32_t channelsEffectiveNum = 0;
+                int32_t channelsNum = 0;
+                const int32_t ElementInstanceTag = br.getBits(4);
+                const int32_t Profile = br.getBits(2);
+                const int32_t SamplingFrequencyIndex = br.getBits(4);
+                const int32_t NumFrontChannelElements = br.getBits(4);
+                const int32_t NumSideChannelElements = br.getBits(4);
+                const int32_t NumBackChannelElements = br.getBits(4);
+                const int32_t NumLfeChannelElements = br.getBits(2);
+                const int32_t NumAssocDataElements = br.getBits(3);
+                const int32_t NumValidCcElements = br.getBits(4);
+
+                const int32_t MonoMixdownPresent = br.getBits(1);
+                if (MonoMixdownPresent != 0) {
+                    const int32_t MonoMixdownElementNumber = br.getBits(4);
+                }
+
+                const int32_t StereoMixdownPresent = br.getBits(1);
+                if (StereoMixdownPresent != 0) {
+                    const int32_t StereoMixdownElementNumber = br.getBits(4);
+                }
+
+                const int32_t MatrixMixdownIndexPresent = br.getBits(1);
+                if (MatrixMixdownIndexPresent != 0) {
+                    const int32_t MatrixMixdownIndex = br.getBits(2);
+                    const int32_t PseudoSurroundEnable = br.getBits(1);
+                }
+
+                int i;
+                for (i=0; i < NumFrontChannelElements; i++) {
+                    const int32_t FrontElementIsCpe = br.getBits(1);
+                    const int32_t FrontElementTagSelect = br.getBits(4);
+                    channelsNum += FrontElementIsCpe ? 2 : 1;
+                }
+
+                for (i=0; i < NumSideChannelElements; i++) {
+                    const int32_t SideElementIsCpe = br.getBits(1);
+                    const int32_t SideElementTagSelect = br.getBits(4);
+                    channelsNum += SideElementIsCpe ? 2 : 1;
+                }
+
+                for (i=0; i < NumBackChannelElements; i++) {
+                    const int32_t BackElementIsCpe = br.getBits(1);
+                    const int32_t BackElementTagSelect = br.getBits(4);
+                    channelsNum += BackElementIsCpe ? 2 : 1;
+                }
+                channelsEffectiveNum = channelsNum;
+
+                for (i=0; i < NumLfeChannelElements; i++) {
+                    const int32_t LfeElementTagSelect = br.getBits(4);
+                    channelsNum += 1;
+                }
+                ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
+                ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
+                numChannels = channelsNum;
+            }
+        }
+    }
+
     if (numChannels == 0) {
         return ERROR_UNSUPPORTED;
     }
@@ -2610,19 +2976,23 @@ status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
 ////////////////////////////////////////////////////////////////////////////////
 
 MPEG4Source::MPEG4Source(
+        const sp<MPEG4Extractor> &owner,
         const sp<MetaData> &format,
         const sp<DataSource> &dataSource,
         int32_t timeScale,
         const sp<SampleTable> &sampleTable,
         Vector<SidxEntry> &sidx,
+        const Trex *trex,
         off64_t firstMoofOffset)
-    : mFormat(format),
+    : mOwner(owner),
+      mFormat(format),
       mDataSource(dataSource),
       mTimescale(timeScale),
       mSampleTable(sampleTable),
       mCurrentSampleIndex(0),
       mCurrentFragmentIndex(0),
       mSegments(sidx),
+      mTrex(trex),
       mFirstMoofOffset(firstMoofOffset),
       mCurrentMoofOffset(firstMoofOffset),
       mCurrentTime(0),
@@ -2631,6 +3001,7 @@ MPEG4Source::MPEG4Source(
       mCurrentSampleInfoOffsetsAllocSize(0),
       mCurrentSampleInfoOffsets(NULL),
       mIsAVC(false),
+      mIsHEVC(false),
       mNALLengthSize(0),
       mStarted(false),
       mGroup(NULL),
@@ -2638,6 +3009,8 @@ MPEG4Source::MPEG4Source(
       mWantsNALFragments(false),
       mSrcBuffer(NULL) {
 
+    memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo));
+
     mFormat->findInt32(kKeyCryptoMode, &mCryptoMode);
     mDefaultIVSize = 0;
     mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize);
@@ -2655,6 +3028,7 @@ MPEG4Source::MPEG4Source(
     CHECK(success);
 
     mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
+    mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC);
 
     if (mIsAVC) {
         uint32_t type;
@@ -2669,6 +3043,18 @@ MPEG4Source::MPEG4Source(
 
         // The number of bytes used to encode the length of a NAL unit.
         mNALLengthSize = 1 + (ptr[4] & 3);
+    } else if (mIsHEVC) {
+        uint32_t type;
+        const void *data;
+        size_t size;
+        CHECK(format->findData(kKeyHVCC, &type, &data, &size));
+
+        const uint8_t *ptr = (const uint8_t *)data;
+
+        CHECK(size >= 7);
+        CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
+
+        mNALLengthSize = 1 + (ptr[14 + 7] & 3);
     }
 
     CHECK(format->findInt32(kKeyTrackID, &mTrackId));
@@ -2792,9 +3178,20 @@ status_t MPEG4Source::parseChunk(off64_t *offset) {
                 }
             }
             if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
-                // *offset points to the mdat box following this moof
-                parseChunk(offset); // doesn't actually parse it, just updates offset
-                mNextMoofOffset = *offset;
+                // *offset points to the box following this moof. Find the next moof from there.
+
+                while (true) {
+                    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
+                        return ERROR_END_OF_STREAM;
+                    }
+                    chunk_size = ntohl(hdr[0]);
+                    chunk_type = ntohl(hdr[1]);
+                    if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
+                        mNextMoofOffset = *offset;
+                        break;
+                    }
+                    *offset += chunk_size;
+                }
             }
             break;
         }
@@ -2853,7 +3250,8 @@ status_t MPEG4Source::parseChunk(off64_t *offset) {
     return OK;
 }
 
-status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size) {
+status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(
+        off64_t offset, off64_t /* size */) {
     ALOGV("parseSampleAuxiliaryInformationSizes");
     // 14496-12 8.7.12
     uint8_t version;
@@ -2915,7 +3313,8 @@ status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(off64_t offset, off64
     return OK;
 }
 
-status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size) {
+status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(
+        off64_t offset, off64_t /* size */) {
     ALOGV("parseSampleAuxiliaryInformationOffsets");
     // 14496-12 8.7.13
     uint8_t version;
@@ -3214,8 +3613,8 @@ status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
     } else if (mTrackFragmentHeaderInfo.mFlags
             & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
         sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
-    } else {
-        sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
+    } else if (mTrex) {
+        sampleDuration = mTrex->default_sample_duration;
     }
 
     if (flags & kSampleSizePresent) {
@@ -3242,7 +3641,7 @@ status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
         sampleCtsOffset = 0;
     }
 
-    if (size < sampleCount * bytesPerSample) {
+    if (size < (off64_t)sampleCount * bytesPerSample) {
         return -EINVAL;
     }
 
@@ -3276,7 +3675,7 @@ status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
             offset += 4;
         }
 
-        ALOGV("adding sample %d at offset 0x%08llx, size %u, duration %u, "
+        ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, "
               " flags 0x%08x", i + 1,
                 dataOffset, sampleSize, sampleDuration,
                 (flags & kFirstSampleFlagsPresent) && i == 0
@@ -3284,6 +3683,7 @@ status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
         tmp.offset = dataOffset;
         tmp.size = sampleSize;
         tmp.duration = sampleDuration;
+        tmp.compositionOffset = sampleCtsOffset;
         mCurrentSamples.add(tmp);
 
         dataOffset += sampleSize;
@@ -3355,7 +3755,7 @@ status_t MPEG4Source::read(
 
         uint32_t sampleIndex;
         status_t err = mSampleTable->findSampleAtTime(
-                seekTimeUs * mTimescale / 1000000,
+                seekTimeUs, 1000000, mTimescale,
                 &sampleIndex, findFlags);
 
         if (mode == ReadOptions::SEEK_CLOSEST) {
@@ -3417,7 +3817,7 @@ status_t MPEG4Source::read(
 
     off64_t offset;
     size_t size;
-    uint32_t cts;
+    uint32_t cts, stts;
     bool isSyncSample;
     bool newBuffer = false;
     if (mBuffer == NULL) {
@@ -3425,7 +3825,7 @@ status_t MPEG4Source::read(
 
         status_t err =
             mSampleTable->getMetaDataForSample(
-                    mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample);
+                    mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts);
 
         if (err != OK) {
             return err;
@@ -3443,7 +3843,7 @@ status_t MPEG4Source::read(
         }
     }
 
-    if (!mIsAVC || mWantsNALFragments) {
+    if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) {
         if (newBuffer) {
             ssize_t num_bytes_read =
                 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
@@ -3460,6 +3860,8 @@ status_t MPEG4Source::read(
             mBuffer->meta_data()->clear();
             mBuffer->meta_data()->setInt64(
                     kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
+            mBuffer->meta_data()->setInt64(
+                    kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
 
             if (targetSampleTimeUs >= 0) {
                 mBuffer->meta_data()->setInt64(
@@ -3473,7 +3875,7 @@ status_t MPEG4Source::read(
             ++mCurrentSampleIndex;
         }
 
-        if (!mIsAVC) {
+        if (!mIsAVC && !mIsHEVC) {
             *out = mBuffer;
             mBuffer = NULL;
 
@@ -3545,12 +3947,12 @@ status_t MPEG4Source::read(
             size_t dstOffset = 0;
 
             while (srcOffset < size) {
-                bool isMalFormed = !isInRange(0u, size, srcOffset, mNALLengthSize);
+                bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
                 size_t nalLength = 0;
                 if (!isMalFormed) {
                     nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
                     srcOffset += mNALLengthSize;
-                    isMalFormed = !isInRange(0u, size, srcOffset, nalLength);
+                    isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength);
                 }
 
                 if (isMalFormed) {
@@ -3582,6 +3984,8 @@ status_t MPEG4Source::read(
         mBuffer->meta_data()->clear();
         mBuffer->meta_data()->setInt64(
                 kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
+        mBuffer->meta_data()->setInt64(
+                kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
 
         if (targetSampleTimeUs >= 0) {
             mBuffer->meta_data()->setInt64(
@@ -3624,7 +4028,7 @@ status_t MPEG4Source::fragmentedRead(
                 const SidxEntry *se = &mSegments[i];
                 if (totalTime + se->mDurationUs > seekTimeUs) {
                     // The requested time is somewhere in this segment
-                    if ((mode == ReadOptions::SEEK_NEXT_SYNC) ||
+                    if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) ||
                         (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
                         (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
                         // requested next sync, or closest sync and it was closer to the end of
@@ -3637,11 +4041,19 @@ status_t MPEG4Source::fragmentedRead(
                 totalTime += se->mDurationUs;
                 totalOffset += se->mSize;
             }
-        mCurrentMoofOffset = totalOffset;
-        mCurrentSamples.clear();
-        mCurrentSampleIndex = 0;
-        parseChunk(&totalOffset);
-        mCurrentTime = totalTime * mTimescale / 1000000ll;
+            mCurrentMoofOffset = totalOffset;
+            mCurrentSamples.clear();
+            mCurrentSampleIndex = 0;
+            parseChunk(&totalOffset);
+            mCurrentTime = totalTime * mTimescale / 1000000ll;
+        } else {
+            // without sidx boxes, we can only seek to 0
+            mCurrentMoofOffset = mFirstMoofOffset;
+            mCurrentSamples.clear();
+            mCurrentSampleIndex = 0;
+            off64_t tmp = mCurrentMoofOffset;
+            parseChunk(&tmp);
+            mCurrentTime = 0;
         }
 
         if (mBuffer != NULL) {
@@ -3653,7 +4065,7 @@ status_t MPEG4Source::fragmentedRead(
     }
 
     off64_t offset = 0;
-    size_t size;
+    size_t size = 0;
     uint32_t cts = 0;
     bool isSyncSample = false;
     bool newBuffer = false;
@@ -3661,22 +4073,24 @@ status_t MPEG4Source::fragmentedRead(
         newBuffer = true;
 
         if (mCurrentSampleIndex >= mCurrentSamples.size()) {
-            // move to next fragment
-            Sample lastSample = mCurrentSamples[mCurrentSamples.size() - 1];
-            off64_t nextMoof = mNextMoofOffset; // lastSample.offset + lastSample.size;
+            // move to next fragment if there is one
+            if (mNextMoofOffset <= mCurrentMoofOffset) {
+                return ERROR_END_OF_STREAM;
+            }
+            off64_t nextMoof = mNextMoofOffset;
             mCurrentMoofOffset = nextMoof;
             mCurrentSamples.clear();
             mCurrentSampleIndex = 0;
             parseChunk(&nextMoof);
-                if (mCurrentSampleIndex >= mCurrentSamples.size()) {
-                    return ERROR_END_OF_STREAM;
-                }
+            if (mCurrentSampleIndex >= mCurrentSamples.size()) {
+                return ERROR_END_OF_STREAM;
+            }
         }
 
         const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
         offset = smpl->offset;
         size = smpl->size;
-        cts = mCurrentTime;
+        cts = mCurrentTime + smpl->compositionOffset;
         mCurrentTime += smpl->duration;
         isSyncSample = (mCurrentSampleIndex == 0); // XXX
 
@@ -3708,7 +4122,7 @@ status_t MPEG4Source::fragmentedRead(
         bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16);
     }
 
-    if (!mIsAVC || mWantsNALFragments) {
+    if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) {
         if (newBuffer) {
             if (!isInRange((size_t)0u, mBuffer->size(), size)) {
                 mBuffer->release();
@@ -3733,6 +4147,8 @@ status_t MPEG4Source::fragmentedRead(
             mBuffer->set_range(0, size);
             mBuffer->meta_data()->setInt64(
                     kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
+            mBuffer->meta_data()->setInt64(
+                    kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
 
             if (targetSampleTimeUs >= 0) {
                 mBuffer->meta_data()->setInt64(
@@ -3746,7 +4162,7 @@ status_t MPEG4Source::fragmentedRead(
             ++mCurrentSampleIndex;
         }
 
-        if (!mIsAVC) {
+        if (!mIsAVC && !mIsHEVC) {
             *out = mBuffer;
             mBuffer = NULL;
 
@@ -3880,6 +4296,8 @@ status_t MPEG4Source::fragmentedRead(
 
         mBuffer->meta_data()->setInt64(
                 kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
+        mBuffer->meta_data()->setInt64(
+                kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
 
         if (targetSampleTimeUs >= 0) {
             mBuffer->meta_data()->setInt64(
@@ -3942,6 +4360,8 @@ static bool isCompatibleBrand(uint32_t fourcc) {
         FOURCC('i', 's', 'o', 'm'),
         FOURCC('i', 's', 'o', '2'),
         FOURCC('a', 'v', 'c', '1'),
+        FOURCC('h', 'v', 'c', '1'),
+        FOURCC('h', 'e', 'v', '1'),
         FOURCC('3', 'g', 'p', '4'),
         FOURCC('m', 'p', '4', '1'),
         FOURCC('m', 'p', '4', '2'),
@@ -4013,7 +4433,7 @@ static bool BetterSniffMPEG4(
 
         char chunkstring[5];
         MakeFourCCString(chunkType, chunkstring);
-        ALOGV("saw chunk type %s, size %lld @ %lld", chunkstring, chunkSize, offset);
+        ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld", chunkstring, chunkSize, offset);
         switch (chunkType) {
             case FOURCC('f', 't', 'y', 'p'):
             {