1 files changed, 250 insertions, 46 deletions
diff --git a/media/libstagefright/MPEG4Extractor.cpp b/media/libstagefright/MPEG4Extractor.cpp
index 362cd6b..2a3fa04 100644
--- a/media/libstagefright/MPEG4Extractor.cpp
+++ b/media/libstagefright/MPEG4Extractor.cpp
@@ -488,12 +488,12 @@ status_t MPEG4Extractor::readMetaData() {
             break;
         }
         uint32_t chunk_type = ntohl(hdr[1]);
-        if (chunk_type == FOURCC('s', 'i', 'd', 'x')) {
-            // parse the sidx box too
-            continue;
-        } else if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
+        if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
             // store the offset of the first segment
             mMoofOffset = offset;
+        } else if (chunk_type != FOURCC('m', 'd', 'a', 't')) {
+            // keep parsing until we get to the data
+            continue;
         }
         break;
     }
@@ -913,6 +913,8 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
 
         case FOURCC('e', 'l', 's', 't'):
         {
+            *offset += chunk_size;
+
             // See 14496-12 8.6.6
             uint8_t version;
             if (mDataSource->readAt(data_offset, &version, 1) < 1) {
@@ -975,12 +977,13 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
                     mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples);
                 }
             }
-            *offset += chunk_size;
             break;
         }
 
         case FOURCC('f', 'r', 'm', 'a'):
         {
+            *offset += chunk_size;
+
             uint32_t original_fourcc;
             if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
                 return ERROR_IO;
@@ -994,12 +997,13 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
                 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
                 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
             }
-            *offset += chunk_size;
             break;
         }
 
         case FOURCC('t', 'e', 'n', 'c'):
         {
+            *offset += chunk_size;
+
             if (chunk_size < 32) {
                 return ERROR_MALFORMED;
             }
@@ -1044,23 +1048,25 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
             mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId);
             mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize);
             mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16);
-            *offset += chunk_size;
             break;
         }
 
         case FOURCC('t', 'k', 'h', 'd'):
         {
+            *offset += chunk_size;
+
             status_t err;
             if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
                 return err;
             }
 
-            *offset += chunk_size;
             break;
         }
 
         case FOURCC('p', 's', 's', 'h'):
         {
+            *offset += chunk_size;
+
             PsshInfo pssh;
 
             if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
@@ -1086,12 +1092,13 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
             }
             mPssh.push_back(pssh);
 
-            *offset += chunk_size;
             break;
         }
 
         case FOURCC('m', 'd', 'h', 'd'):
         {
+            *offset += chunk_size;
+
             if (chunk_data_size < 4) {
                 return ERROR_MALFORMED;
             }
@@ -1172,7 +1179,6 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
             mLastTrack->meta->setCString(
                     kKeyMediaLanguage, lang_code);
 
-            *offset += chunk_size;
             break;
         }
 
@@ -1339,11 +1345,12 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
                 mLastTrack->sampleTable->setChunkOffsetParams(
                         chunk_type, data_offset, chunk_data_size);
 
+            *offset += chunk_size;
+
             if (err != OK) {
                 return err;
             }
 
-            *offset += chunk_size;
             break;
         }
 
@@ -1353,11 +1360,12 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
                 mLastTrack->sampleTable->setSampleToChunkParams(
                         data_offset, chunk_data_size);
 
+            *offset += chunk_size;
+
             if (err != OK) {
                 return err;
             }
 
-            *offset += chunk_size;
             break;
         }
 
@@ -1368,6 +1376,8 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
                 mLastTrack->sampleTable->setSampleSizeParams(
                         chunk_type, data_offset, chunk_data_size);
 
+            *offset += chunk_size;
+
             if (err != OK) {
                 return err;
             }
@@ -1408,7 +1418,6 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
                 }
                 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size);
             }
-            *offset += chunk_size;
 
             // NOTE: setting another piece of metadata invalidates any pointers (such as the
             // mimetype) previously obtained, so don't cache them.
@@ -1432,6 +1441,8 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
 
         case FOURCC('s', 't', 't', 's'):
         {
+            *offset += chunk_size;
+
             status_t err =
                 mLastTrack->sampleTable->setTimeToSampleParams(
                         data_offset, chunk_data_size);
@@ -1440,12 +1451,13 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
                 return err;
             }
 
-            *offset += chunk_size;
             break;
         }
 
         case FOURCC('c', 't', 't', 's'):
         {
+            *offset += chunk_size;
+
             status_t err =
                 mLastTrack->sampleTable->setCompositionTimeToSampleParams(
                         data_offset, chunk_data_size);
@@ -1454,12 +1466,13 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
                 return err;
             }
 
-            *offset += chunk_size;
             break;
         }
 
         case FOURCC('s', 't', 's', 's'):
         {
+            *offset += chunk_size;
+
             status_t err =
                 mLastTrack->sampleTable->setSyncSampleParams(
                         data_offset, chunk_data_size);
@@ -1468,13 +1481,14 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
                 return err;
             }
 
-            *offset += chunk_size;
             break;
         }
 
         // @xyz
         case FOURCC('\xA9', 'x', 'y', 'z'):
         {
+            *offset += chunk_size;
+
             // Best case the total data length inside "@xyz" box
             // would be 8, for instance "@xyz" + "\x00\x04\x15\xc7" + "0+0/",
             // where "\x00\x04" is the text string length with value = 4,
@@ -1503,12 +1517,13 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
 
             buffer[location_length] = '\0';
             mFileMetaData->setCString(kKeyLocation, buffer);
-            *offset += chunk_size;
             break;
         }
 
         case FOURCC('e', 's', 'd', 's'):
         {
+            *offset += chunk_size;
+
             if (chunk_data_size < 4) {
                 return ERROR_MALFORMED;
             }
@@ -1546,12 +1561,13 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
                 }
             }
 
-            *offset += chunk_size;
             break;
         }
 
         case FOURCC('a', 'v', 'c', 'C'):
         {
+            *offset += chunk_size;
+
             sp<ABuffer> buffer = new ABuffer(chunk_data_size);
 
             if (mDataSource->readAt(
@@ -1562,12 +1578,12 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
             mLastTrack->meta->setData(
                     kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size);
 
-            *offset += chunk_size;
             break;
         }
 
         case FOURCC('d', '2', '6', '3'):
         {
+            *offset += chunk_size;
             /*
              * d263 contains a fixed 7 bytes part:
              *   vendor - 4 bytes
@@ -1593,7 +1609,6 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
 
             mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size);
 
-            *offset += chunk_size;
             break;
         }
 
@@ -1601,11 +1616,13 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
         {
             uint8_t buffer[4];
             if (chunk_data_size < (off64_t)sizeof(buffer)) {
+                *offset += chunk_size;
                 return ERROR_MALFORMED;
             }
 
             if (mDataSource->readAt(
                         data_offset, buffer, 4) < 4) {
+                *offset += chunk_size;
                 return ERROR_IO;
             }
 
@@ -1639,6 +1656,8 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
         case FOURCC('n', 'a', 'm', 'e'):
         case FOURCC('d', 'a', 't', 'a'):
         {
+            *offset += chunk_size;
+
             if (mPath.size() == 6 && underMetaDataPath(mPath)) {
                 status_t err = parseITunesMetaData(data_offset, chunk_data_size);
 
@@ -1647,12 +1666,13 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
                 }
             }
 
-            *offset += chunk_size;
             break;
         }
 
         case FOURCC('m', 'v', 'h', 'd'):
         {
+            *offset += chunk_size;
+
             if (chunk_data_size < 24) {
                 return ERROR_MALFORMED;
             }
@@ -1680,7 +1700,6 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
 
             mFileMetaData->setCString(kKeyDate, s.string());
 
-            *offset += chunk_size;
             break;
         }
 
@@ -1701,6 +1720,8 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
 
         case FOURCC('h', 'd', 'l', 'r'):
         {
+            *offset += chunk_size;
+
             uint32_t buffer;
             if (mDataSource->readAt(
                         data_offset + 8, &buffer, 4) < 4) {
@@ -1715,7 +1736,6 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
                 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP);
             }
 
-            *offset += chunk_size;
             break;
         }
 
@@ -1740,6 +1760,8 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
                 delete[] buffer;
                 buffer = NULL;
 
+                // advance read pointer so we don't end up reading this again
+                *offset += chunk_size;
                 return ERROR_IO;
             }
 
@@ -1754,6 +1776,8 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
 
         case FOURCC('c', 'o', 'v', 'r'):
         {
+            *offset += chunk_size;
+
             if (mFileMetaData != NULL) {
                 ALOGV("chunk_data_size = %lld and data_offset = %lld",
                         chunk_data_size, data_offset);
@@ -1768,7 +1792,6 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
                     buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
             }
 
-            *offset += chunk_size;
             break;
         }
 
@@ -1779,25 +1802,27 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
         case FOURCC('a', 'l', 'b', 'm'):
         case FOURCC('y', 'r', 'r', 'c'):
         {
+            *offset += chunk_size;
+
             status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
 
             if (err != OK) {
                 return err;
             }
 
-            *offset += chunk_size;
             break;
         }
 
         case FOURCC('I', 'D', '3', '2'):
         {
+            *offset += chunk_size;
+
             if (chunk_data_size < 6) {
                 return ERROR_MALFORMED;
             }
 
             parseID3v2MetaData(data_offset + 6);
 
-            *offset += chunk_size;
             break;
         }
 
@@ -1921,9 +1946,10 @@ status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
             ALOGW("sub-sidx boxes not supported yet");
         }
         bool sap = d3 & 0x80000000;
-        bool saptype = d3 >> 28;
-        if (!sap || saptype > 2) {
-            ALOGW("not a stream access point, or unsupported type");
+        uint32_t saptype = (d3 >> 28) & 7;
+        if (!sap || (saptype != 1 && saptype != 2)) {
+            // type 1 and 2 are sync samples
+            ALOGW("not a stream access point, or unsupported type: %08x", d3);
         }
         total_duration += d2;
         offset += 12;
@@ -2442,6 +2468,58 @@ status_t MPEG4Extractor::verifyTrack(Track *track) {
     return OK;
 }
 
+typedef enum {
+    //AOT_NONE             = -1,
+    //AOT_NULL_OBJECT      = 0,
+    //AOT_AAC_MAIN         = 1, /**< Main profile                              */
+    AOT_AAC_LC           = 2,   /**< Low Complexity object                     */
+    //AOT_AAC_SSR          = 3,
+    //AOT_AAC_LTP          = 4,
+    AOT_SBR              = 5,
+    //AOT_AAC_SCAL         = 6,
+    //AOT_TWIN_VQ          = 7,
+    //AOT_CELP             = 8,
+    //AOT_HVXC             = 9,
+    //AOT_RSVD_10          = 10, /**< (reserved)                                */
+    //AOT_RSVD_11          = 11, /**< (reserved)                                */
+    //AOT_TTSI             = 12, /**< TTSI Object                               */
+    //AOT_MAIN_SYNTH       = 13, /**< Main Synthetic object                     */
+    //AOT_WAV_TAB_SYNTH    = 14, /**< Wavetable Synthesis object                */
+    //AOT_GEN_MIDI         = 15, /**< General MIDI object                       */
+    //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
+    AOT_ER_AAC_LC        = 17,   /**< Error Resilient(ER) AAC Low Complexity    */
+    //AOT_RSVD_18          = 18, /**< (reserved)                                */
+    //AOT_ER_AAC_LTP       = 19, /**< Error Resilient(ER) AAC LTP object        */
+    AOT_ER_AAC_SCAL      = 20,   /**< Error Resilient(ER) AAC Scalable object   */
+    //AOT_ER_TWIN_VQ       = 21, /**< Error Resilient(ER) TwinVQ object         */
+    AOT_ER_BSAC          = 22,   /**< Error Resilient(ER) BSAC object           */
+    AOT_ER_AAC_LD        = 23,   /**< Error Resilient(ER) AAC LowDelay object   */
+    //AOT_ER_CELP          = 24, /**< Error Resilient(ER) CELP object           */
+    //AOT_ER_HVXC          = 25, /**< Error Resilient(ER) HVXC object           */
+    //AOT_ER_HILN          = 26, /**< Error Resilient(ER) HILN object           */
+    //AOT_ER_PARA          = 27, /**< Error Resilient(ER) Parametric object     */
+    //AOT_RSVD_28          = 28, /**< might become SSC                          */
+    AOT_PS               = 29,   /**< PS, Parametric Stereo (includes SBR)      */
+    //AOT_MPEGS            = 30, /**< MPEG Surround                             */
+
+    AOT_ESCAPE           = 31,   /**< Signal AOT uses more than 5 bits          */
+
+    //AOT_MP3ONMP4_L1      = 32, /**< MPEG-Layer1 in mp4                        */
+    //AOT_MP3ONMP4_L2      = 33, /**< MPEG-Layer2 in mp4                        */
+    //AOT_MP3ONMP4_L3      = 34, /**< MPEG-Layer3 in mp4                        */
+    //AOT_RSVD_35          = 35, /**< might become DST                          */
+    //AOT_RSVD_36          = 36, /**< might become ALS                          */
+    //AOT_AAC_SLS          = 37, /**< AAC + SLS                                 */
+    //AOT_SLS              = 38, /**< SLS                                       */
+    //AOT_ER_AAC_ELD       = 39, /**< AAC Enhanced Low Delay                    */
+
+    //AOT_USAC             = 42, /**< USAC                                      */
+    //AOT_SAOC             = 43, /**< SAOC                                      */
+    //AOT_LD_MPEGS         = 44, /**< Low Delay MPEG Surround                   */
+
+    //AOT_RSVD50           = 50,  /**< Interim AOT for Rsvd50                   */
+} AUDIO_OBJECT_TYPE;
+
 status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
         const void *esds_data, size_t esds_size) {
     ESDS esds(esds_data, esds_size);
@@ -2524,7 +2602,7 @@ status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
         sampleRate = kSamplingRate[freqIndex];
     }
 
-    if (objectType == 5 || objectType == 29) { // SBR specific config per 14496-3 table 1.13
+    if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13
         uint32_t extFreqIndex = br.getBits(4);
         int32_t extSampleRate;
         if (extFreqIndex == 15) {
@@ -2542,6 +2620,111 @@ status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
         //      mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate);
     }
 
+    switch (numChannels) {
+        // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
+        case 0:
+        case 1:// FC
+        case 2:// FL FR
+        case 3:// FC, FL FR
+        case 4:// FC, FL FR, RC
+        case 5:// FC, FL FR, SL SR
+        case 6:// FC, FL FR, SL SR, LFE
+            //numChannels already contains the right value
+            break;
+        case 11:// FC, FL FR, SL SR, RC, LFE
+            numChannels = 7;
+            break;
+        case 7: // FC, FCL FCR, FL FR, SL SR, LFE
+        case 12:// FC, FL  FR,  SL SR, RL RR, LFE
+        case 14:// FC, FL  FR,  SL SR, LFE, FHL FHR
+            numChannels = 8;
+            break;
+        default:
+            return ERROR_UNSUPPORTED;
+    }
+
+    {
+        if (objectType == AOT_SBR || objectType == AOT_PS) {
+            const int32_t extensionSamplingFrequency = br.getBits(4);
+            objectType = br.getBits(5);
+
+            if (objectType == AOT_ESCAPE) {
+                objectType = 32 + br.getBits(6);
+            }
+        }
+        if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
+                objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
+                objectType == AOT_ER_BSAC) {
+            const int32_t frameLengthFlag = br.getBits(1);
+
+            const int32_t dependsOnCoreCoder = br.getBits(1);
+
+            if (dependsOnCoreCoder ) {
+                const int32_t coreCoderDelay = br.getBits(14);
+            }
+
+            const int32_t extensionFlag = br.getBits(1);
+
+            if (numChannels == 0 ) {
+                int32_t channelsEffectiveNum = 0;
+                int32_t channelsNum = 0;
+                const int32_t ElementInstanceTag = br.getBits(4);
+                const int32_t Profile = br.getBits(2);
+                const int32_t SamplingFrequencyIndex = br.getBits(4);
+                const int32_t NumFrontChannelElements = br.getBits(4);
+                const int32_t NumSideChannelElements = br.getBits(4);
+                const int32_t NumBackChannelElements = br.getBits(4);
+                const int32_t NumLfeChannelElements = br.getBits(2);
+                const int32_t NumAssocDataElements = br.getBits(3);
+                const int32_t NumValidCcElements = br.getBits(4);
+
+                const int32_t MonoMixdownPresent = br.getBits(1);
+                if (MonoMixdownPresent != 0) {
+                    const int32_t MonoMixdownElementNumber = br.getBits(4);
+                }
+
+                const int32_t StereoMixdownPresent = br.getBits(1);
+                if (StereoMixdownPresent != 0) {
+                    const int32_t StereoMixdownElementNumber = br.getBits(4);
+                }
+
+                const int32_t MatrixMixdownIndexPresent = br.getBits(1);
+                if (MatrixMixdownIndexPresent != 0) {
+                    const int32_t MatrixMixdownIndex = br.getBits(2);
+                    const int32_t PseudoSurroundEnable = br.getBits(1);
+                }
+
+                int i;
+                for (i=0; i < NumFrontChannelElements; i++) {
+                    const int32_t FrontElementIsCpe = br.getBits(1);
+                    const int32_t FrontElementTagSelect = br.getBits(4);
+                    channelsNum += FrontElementIsCpe ? 2 : 1;
+                }
+
+                for (i=0; i < NumSideChannelElements; i++) {
+                    const int32_t SideElementIsCpe = br.getBits(1);
+                    const int32_t SideElementTagSelect = br.getBits(4);
+                    channelsNum += SideElementIsCpe ? 2 : 1;
+                }
+
+                for (i=0; i < NumBackChannelElements; i++) {
+                    const int32_t BackElementIsCpe = br.getBits(1);
+                    const int32_t BackElementTagSelect = br.getBits(4);
+                    channelsNum += BackElementIsCpe ? 2 : 1;
+                }
+                channelsEffectiveNum = channelsNum;
+
+                for (i=0; i < NumLfeChannelElements; i++) {
+                    const int32_t LfeElementTagSelect = br.getBits(4);
+                    channelsNum += 1;
+                }
+                ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
+                ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
+                numChannels = channelsNum;
+            }
+        }
+    }
+
     if (numChannels == 0) {
         return ERROR_UNSUPPORTED;
     }
@@ -2742,9 +2925,20 @@ status_t MPEG4Source::parseChunk(off64_t *offset) {
                 }
             }
             if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
-                // *offset points to the mdat box following this moof
-                parseChunk(offset); // doesn't actually parse it, just updates offset
-                mNextMoofOffset = *offset;
+                // *offset points to the box following this moof. Find the next moof from there.
+
+                while (true) {
+                    if (mDataSource->readAt(*offset, hdr, 8) < 8) {
+                        return ERROR_END_OF_STREAM;
+                    }
+                    chunk_size = ntohl(hdr[0]);
+                    chunk_type = ntohl(hdr[1]);
+                    if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
+                        mNextMoofOffset = *offset;
+                        break;
+                    }
+                    *offset += chunk_size;
+                }
             }
             break;
         }
@@ -3549,7 +3743,7 @@ status_t MPEG4Source::fragmentedRead(
                 const SidxEntry *se = &mSegments[i];
                 if (totalTime + se->mDurationUs > seekTimeUs) {
                     // The requested time is somewhere in this segment
-                    if ((mode == ReadOptions::SEEK_NEXT_SYNC) ||
+                    if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) ||
                         (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
                         (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
                         // requested next sync, or closest sync and it was closer to the end of
@@ -3562,11 +3756,19 @@ status_t MPEG4Source::fragmentedRead(
                 totalTime += se->mDurationUs;
                 totalOffset += se->mSize;
             }
-        mCurrentMoofOffset = totalOffset;
-        mCurrentSamples.clear();
-        mCurrentSampleIndex = 0;
-        parseChunk(&totalOffset);
-        mCurrentTime = totalTime * mTimescale / 1000000ll;
+            mCurrentMoofOffset = totalOffset;
+            mCurrentSamples.clear();
+            mCurrentSampleIndex = 0;
+            parseChunk(&totalOffset);
+            mCurrentTime = totalTime * mTimescale / 1000000ll;
+        } else {
+            // without sidx boxes, we can only seek to 0
+            mCurrentMoofOffset = mFirstMoofOffset;
+            mCurrentSamples.clear();
+            mCurrentSampleIndex = 0;
+            off64_t tmp = mCurrentMoofOffset;
+            parseChunk(&tmp);
+            mCurrentTime = 0;
         }
 
         if (mBuffer != NULL) {
@@ -3578,7 +3780,7 @@ status_t MPEG4Source::fragmentedRead(
     }
 
     off64_t offset = 0;
-    size_t size;
+    size_t size = 0;
     uint32_t cts = 0;
     bool isSyncSample = false;
     bool newBuffer = false;
@@ -3586,16 +3788,18 @@ status_t MPEG4Source::fragmentedRead(
         newBuffer = true;
 
         if (mCurrentSampleIndex >= mCurrentSamples.size()) {
-            // move to next fragment
-            Sample lastSample = mCurrentSamples[mCurrentSamples.size() - 1];
-            off64_t nextMoof = mNextMoofOffset; // lastSample.offset + lastSample.size;
+            // move to next fragment if there is one
+            if (mNextMoofOffset <= mCurrentMoofOffset) {
+                return ERROR_END_OF_STREAM;
+            }
+            off64_t nextMoof = mNextMoofOffset;
             mCurrentMoofOffset = nextMoof;
             mCurrentSamples.clear();
             mCurrentSampleIndex = 0;
             parseChunk(&nextMoof);
-                if (mCurrentSampleIndex >= mCurrentSamples.size()) {
-                    return ERROR_END_OF_STREAM;
-                }
+            if (mCurrentSampleIndex >= mCurrentSamples.size()) {
+                return ERROR_END_OF_STREAM;
+            }
         }
 
         const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];