6 files changed, 197 insertions, 164 deletions
diff --git a/services/audioflinger/AudioFlinger.cpp b/services/audioflinger/AudioFlinger.cpp
index 26dac95..c0c34f7 100644
--- a/services/audioflinger/AudioFlinger.cpp
+++ b/services/audioflinger/AudioFlinger.cpp
@@ -525,7 +525,7 @@ sp<IAudioTrack> AudioFlinger::createTrack(
         }
 
         // Look for sync events awaiting for a session to be used.
-        for (int i = 0; i < (int)mPendingSyncEvents.size(); i++) {
+        for (size_t i = 0; i < mPendingSyncEvents.size(); i++) {
             if (mPendingSyncEvents[i]->triggerSession() == lSessionId) {
                 if (thread->isValidSyncEvent(mPendingSyncEvents[i])) {
                     if (lStatus == NO_ERROR) {
@@ -831,7 +831,7 @@ status_t AudioFlinger::setStreamMute(audio_stream_type_t stream, bool muted)
 
     AutoMutex lock(mLock);
     mStreamTypes[stream].mute = muted;
-    for (uint32_t i = 0; i < mPlaybackThreads.size(); i++)
+    for (size_t i = 0; i < mPlaybackThreads.size(); i++)
         mPlaybackThreads.valueAt(i)->setStreamMute(stream, muted);
 
     return NO_ERROR;
diff --git a/services/audioflinger/AudioPolicyService.cpp b/services/audioflinger/AudioPolicyService.cpp
index a37272d..6ea5324 100644
--- a/services/audioflinger/AudioPolicyService.cpp
+++ b/services/audioflinger/AudioPolicyService.cpp
@@ -678,6 +678,10 @@ AudioPolicyService::AudioCommandThread::~AudioCommandThread()
     if (!mAudioCommands.isEmpty()) {
         release_wake_lock(mName.string());
     }
+    for (size_t k=0; k < mAudioCommands.size(); k++) {
+        delete mAudioCommands[k]->mParam;
+        delete mAudioCommands[k];
+    }
     mAudioCommands.clear();
     delete mpToneGenerator;
 }
@@ -849,7 +853,7 @@ void AudioPolicyService::AudioCommandThread::startToneCommand(ToneGenerator::ton
     ToneData *data = new ToneData();
     data->mType = type;
     data->mStream = stream;
-    command->mParam = (void *)data;
+    command->mParam = data;
     Mutex::Autolock _l(mLock);
     insertCommand_l(command);
     ALOGV("AudioCommandThread() adding tone start type %d, stream %d", type, stream);
@@ -950,7 +954,7 @@ void AudioPolicyService::AudioCommandThread::stopOutputCommand(audio_io_handle_t
     data->mIO = output;
     data->mStream = stream;
     data->mSession = session;
-    command->mParam = (void *)data;
+    command->mParam = data;
     Mutex::Autolock _l(mLock);
     insertCommand_l(command);
     ALOGV("AudioCommandThread() adding stop output %d", output);
@@ -963,7 +967,7 @@ void AudioPolicyService::AudioCommandThread::releaseOutputCommand(audio_io_handl
     command->mCommand = RELEASE_OUTPUT;
     ReleaseOutputData *data = new ReleaseOutputData();
     data->mIO = output;
-    command->mParam = (void *)data;
+    command->mParam = data;
     Mutex::Autolock _l(mLock);
     insertCommand_l(command);
     ALOGV("AudioCommandThread() adding release output %d", output);
@@ -1052,6 +1056,10 @@ void AudioPolicyService::AudioCommandThread::insertCommand_l(AudioCommand *comma
         for (size_t k = i + 1; k < mAudioCommands.size(); k++) {
             if (mAudioCommands[k] == removedCommands[j]) {
                 ALOGV("suppressing command: %d", mAudioCommands[k]->mCommand);
+                // for commands that are not filtered,
+                // command->mParam is deleted in threadLoop
+                delete mAudioCommands[k]->mParam;
+                delete mAudioCommands[k];
                 mAudioCommands.removeAt(k);
                 break;
             }
diff --git a/services/audioflinger/AudioPolicyService.h b/services/audioflinger/AudioPolicyService.h
index ae053a9..a38160f 100644
--- a/services/audioflinger/AudioPolicyService.h
+++ b/services/audioflinger/AudioPolicyService.h
@@ -197,6 +197,8 @@ private:
                     void        insertCommand_l(AudioCommand *command, int delayMs = 0);
 
     private:
+        class AudioCommandData;
+
         // descriptor for requested tone playback event
         class AudioCommand {
 
@@ -211,41 +213,48 @@ private:
             Condition mCond; // condition for status return
             status_t mStatus; // command status
             bool mWaitStatus; // true if caller is waiting for status
-            void *mParam;     // command parameter (ToneData, VolumeData, ParametersData)
+            AudioCommandData *mParam;     // command specific parameter data
+        };
+
+        class AudioCommandData {
+        public:
+            virtual ~AudioCommandData() {}
+        protected:
+            AudioCommandData() {}
         };
 
-        class ToneData {
+        class ToneData : public AudioCommandData {
         public:
             ToneGenerator::tone_type mType; // tone type (START_TONE only)
             audio_stream_type_t mStream;    // stream type (START_TONE only)
         };
 
-        class VolumeData {
+        class VolumeData : public AudioCommandData {
         public:
             audio_stream_type_t mStream;
             float mVolume;
             audio_io_handle_t mIO;
         };
 
-        class ParametersData {
+        class ParametersData : public AudioCommandData {
         public:
             audio_io_handle_t mIO;
             String8 mKeyValuePairs;
         };
 
-        class VoiceVolumeData {
+        class VoiceVolumeData : public AudioCommandData {
         public:
             float mVolume;
         };
 
-        class StopOutputData {
+        class StopOutputData : public AudioCommandData {
         public:
             audio_io_handle_t mIO;
             audio_stream_type_t mStream;
             int mSession;
         };
 
-        class ReleaseOutputData {
+        class ReleaseOutputData : public AudioCommandData {
         public:
             audio_io_handle_t mIO;
         };
diff --git a/services/audioflinger/AudioResamplerSinc.cpp b/services/audioflinger/AudioResamplerSinc.cpp
index 207f26b..e50b192 100644
--- a/services/audioflinger/AudioResamplerSinc.cpp
+++ b/services/audioflinger/AudioResamplerSinc.cpp
@@ -17,6 +17,7 @@
 #define LOG_TAG "AudioResamplerSinc"
 //#define LOG_NDEBUG 0
 
+#define __STDC_CONSTANT_MACROS
 #include <malloc.h>
 #include <string.h>
 #include <stdlib.h>
@@ -37,12 +38,14 @@
 #define USE_INLINE_ASSEMBLY (false)
 #endif
 
-#if USE_INLINE_ASSEMBLY && defined(__ARM_NEON__)
-#define USE_NEON (true)
+#if defined(__aarch64__) || defined(__ARM_NEON__)
+#include <arm_neon.h>
+#define USE_NEON
 #else
-#define USE_NEON (false)
+#undef USE_NEON
 #endif
 
+#define UNUSED(x) ((void)(x))
 
 namespace android {
 // ----------------------------------------------------------------------------
@@ -634,8 +637,8 @@ void AudioResamplerSinc::read(
 }
 
 template<int CHANNELS>
-void AudioResamplerSinc::filterCoefficient(
-        int32_t* out, uint32_t phase, const int16_t *samples, uint32_t vRL)
+void AudioResamplerSinc::filterCoefficient(int32_t* out, uint32_t phase,
+         const int16_t *samples, uint32_t vRL)
 {
     // NOTE: be very careful when modifying the code here. register
     // pressure is very high and a small change might cause the compiler
@@ -662,160 +665,171 @@ void AudioResamplerSinc::filterCoefficient(
 
     size_t count = offset;
 
-    if (!USE_NEON) {
-        int32_t l = 0;
-        int32_t r = 0;
-        for (size_t i=0 ; i<count ; i++) {
-            interpolate<CHANNELS>(l, r, coefsP++, offset, lerpP, sP);
-            sP -= CHANNELS;
-            interpolate<CHANNELS>(l, r, coefsN++, offset, lerpN, sN);
-            sN += CHANNELS;
-        }
-        out[0] += 2 * mulRL(1, l, vRL);
-        out[1] += 2 * mulRL(0, r, vRL);
-    } else if (CHANNELS == 1) {
+#ifndef USE_NEON
+    int32_t l = 0;
+    int32_t r = 0;
+    for (size_t i=0 ; i<count ; i++) {
+        interpolate<CHANNELS>(l, r, coefsP++, offset, lerpP, sP);
+        sP -= CHANNELS;
+        interpolate<CHANNELS>(l, r, coefsN++, offset, lerpN, sN);
+        sN += CHANNELS;
+    }
+    out[0] += 2 * mulRL(1, l, vRL);
+    out[1] += 2 * mulRL(0, r, vRL);
+#else
+    UNUSED(vRL);
+    if (CHANNELS == 1) {
         int32_t const* coefsP1 = coefsP + offset;
         int32_t const* coefsN1 = coefsN + offset;
         sP -= CHANNELS*3;
-        asm (
-            "vmov.32        d2[0], %[lerpP]          \n"    // load the positive phase
-            "vmov.32        d2[1], %[lerpN]          \n"    // load the negative phase
-            "veor           q0, q0, q0               \n"    // result, initialize to 0
-            "vshl.s32       d2, d2, #16              \n"    // convert to 32 bits
-
-            "1:                                      \n"
-            "vld1.16        { d4}, [%[sP]]           \n"    // load 4 16-bits stereo samples
-            "vld1.32        { q8}, [%[coefsP0]:128]! \n"    // load 4 32-bits coefs
-            "vld1.32        { q9}, [%[coefsP1]:128]! \n"    // load 4 32-bits coefs for interpolation
-            "vld1.16        { d6}, [%[sN]]!          \n"    // load 4 16-bits stereo samples
-            "vld1.32        {q10}, [%[coefsN0]:128]! \n"    // load 4 32-bits coefs
-            "vld1.32        {q11}, [%[coefsN1]:128]! \n"    // load 4 32-bits coefs for interpolation
-
-            "vrev64.16      d4, d4                   \n"    // reverse 2 frames of the positive side
-
-            "vsub.s32        q9,  q9,  q8            \n"    // interpolate (step1) 1st set of coefs
-            "vsub.s32       q11, q11, q10            \n"    // interpolate (step1) 2nd set of coets
-            "vshll.s16      q12,  d4, #15            \n"    // extend samples to 31 bits
-
-            "vqrdmulh.s32    q9,  q9, d2[0]          \n"    // interpolate (step2) 1st set of coefs
-            "vqrdmulh.s32   q11, q11, d2[1]          \n"    // interpolate (step3) 2nd set of coefs
-            "vshll.s16      q14,  d6, #15            \n"    // extend samples to 31 bits
-
-            "vadd.s32        q8,  q8,  q9            \n"    // interpolate (step3) 1st set
-            "vadd.s32       q10, q10, q11            \n"    // interpolate (step4) 2nd set
-            "subs           %[count], %[count], #4   \n"    // update loop counter
-
-            "vqrdmulh.s32   q12, q12, q8             \n"    // multiply samples by interpolated coef
-            "vqrdmulh.s32   q14, q14, q10            \n"    // multiply samples by interpolated coef
-            "sub            %[sP], %[sP], #8         \n"    // move pointer to next set of samples
-
-            "vadd.s32       q0, q0, q12              \n"    // accumulate result
-            "vadd.s32       q0, q0, q14              \n"    // accumulate result
-
-            "bne            1b                       \n"    // loop
-
-            "vld1.s32       {d2}, [%[vLR]]           \n"    // load volumes
-            "vld1.s32       {d3}, %[out]             \n"    // load the output
-            "vpadd.s32      d0, d0, d1               \n"    // add all 4 partial sums
-            "vpadd.s32      d0, d0, d0               \n"    // together
-            "vdup.i32       d0, d0[0]                \n"    // interleave L,R channels
-            "vqrdmulh.s32   d0, d0, d2               \n"    // apply volume
-            "vadd.s32       d3, d3, d0               \n"    // accumulate result
-            "vst1.s32       {d3}, %[out]             \n"    // store result
-
-            : [out]     "=Uv" (out[0]),
-              [count]   "+r" (count),
-              [coefsP0] "+r" (coefsP),
-              [coefsP1] "+r" (coefsP1),
-              [coefsN0] "+r" (coefsN),
-              [coefsN1] "+r" (coefsN1),
-              [sP]      "+r" (sP),
-              [sN]      "+r" (sN)
-            : [lerpP]   "r" (lerpP),
-              [lerpN]   "r" (lerpN),
-              [vLR]     "r" (mVolumeSIMD)
-            : "cc", "memory",
-              "q0", "q1", "q2", "q3",
-              "q8", "q9", "q10", "q11",
-              "q12", "q14"
-        );
+
+        int32x4_t sum;
+        int32x2_t lerpPN;
+        lerpPN = vdup_n_s32(0);
+        lerpPN = vld1_lane_s32((int32_t *)&lerpP, lerpPN, 0);
+        lerpPN = vld1_lane_s32((int32_t *)&lerpN, lerpPN, 1);
+        lerpPN = vshl_n_s32(lerpPN, 16);
+        sum = vdupq_n_s32(0);
+
+        int16x4_t sampleP, sampleN;
+        int32x4_t samplePExt, sampleNExt;
+        int32x4_t coefsPV0, coefsPV1, coefsNV0, coefsNV1;
+
+        coefsP = (const int32_t*)__builtin_assume_aligned(coefsP, 16);
+        coefsN = (const int32_t*)__builtin_assume_aligned(coefsN, 16);
+        coefsP1 = (const int32_t*)__builtin_assume_aligned(coefsP1, 16);
+        coefsN1 = (const int32_t*)__builtin_assume_aligned(coefsN1, 16);
+        for (; count > 0; count -= 4) {
+            sampleP = vld1_s16(sP);
+            sampleN = vld1_s16(sN);
+            coefsPV0 = vld1q_s32(coefsP);
+            coefsNV0 = vld1q_s32(coefsN);
+            coefsPV1 = vld1q_s32(coefsP1);
+            coefsNV1 = vld1q_s32(coefsN1);
+            sP -= 4;
+            sN += 4;
+            coefsP += 4;
+            coefsN += 4;
+            coefsP1 += 4;
+            coefsN1 += 4;
+
+            sampleP = vrev64_s16(sampleP);
+
+            // interpolate (step1)
+            coefsPV1 = vsubq_s32(coefsPV1, coefsPV0);
+            coefsNV1 = vsubq_s32(coefsNV1, coefsNV0);
+            samplePExt = vshll_n_s16(sampleP, 15);
+            // interpolate (step2)
+            coefsPV1 = vqrdmulhq_lane_s32(coefsPV1, lerpPN, 0);
+            coefsNV1 = vqrdmulhq_lane_s32(coefsNV1, lerpPN, 1);
+            sampleNExt = vshll_n_s16(sampleN, 15);
+            // interpolate (step3)
+            coefsPV0 = vaddq_s32(coefsPV0, coefsPV1);
+            coefsNV0 = vaddq_s32(coefsNV0, coefsNV1);
+
+            samplePExt = vqrdmulhq_s32(samplePExt, coefsPV0);
+            sampleNExt = vqrdmulhq_s32(sampleNExt, coefsNV0);
+            sum = vaddq_s32(sum, samplePExt);
+            sum = vaddq_s32(sum, sampleNExt);
+        }
+        int32x2_t volumesV, outV;
+        volumesV = vld1_s32(mVolumeSIMD);
+        outV = vld1_s32(out);
+
+        //add all 4 partial sums
+        int32x2_t sumLow, sumHigh;
+        sumLow = vget_low_s32(sum);
+        sumHigh = vget_high_s32(sum);
+        sumLow = vpadd_s32(sumLow, sumHigh);
+        sumLow = vpadd_s32(sumLow, sumLow);
+
+        sumLow = vqrdmulh_s32(sumLow, volumesV);
+        outV = vadd_s32(outV, sumLow);
+        vst1_s32(out, outV);
     } else if (CHANNELS == 2) {
         int32_t const* coefsP1 = coefsP + offset;
         int32_t const* coefsN1 = coefsN + offset;
         sP -= CHANNELS*3;
-        asm (
-            "vmov.32        d2[0], %[lerpP]          \n"    // load the positive phase
-            "vmov.32        d2[1], %[lerpN]          \n"    // load the negative phase
-            "veor           q0, q0, q0               \n"    // result, initialize to 0
-            "veor           q4, q4, q4               \n"    // result, initialize to 0
-            "vshl.s32       d2, d2, #16              \n"    // convert to 32 bits
-
-            "1:                                      \n"
-            "vld2.16        {d4,d5}, [%[sP]]         \n"    // load 4 16-bits stereo samples
-            "vld1.32        { q8}, [%[coefsP0]:128]! \n"    // load 4 32-bits coefs
-            "vld1.32        { q9}, [%[coefsP1]:128]! \n"    // load 4 32-bits coefs for interpolation
-            "vld2.16        {d6,d7}, [%[sN]]!        \n"    // load 4 16-bits stereo samples
-            "vld1.32        {q10}, [%[coefsN0]:128]! \n"    // load 4 32-bits coefs
-            "vld1.32        {q11}, [%[coefsN1]:128]! \n"    // load 4 32-bits coefs for interpolation
-
-            "vrev64.16      d4, d4                   \n"    // reverse 2 frames of the positive side
-            "vrev64.16      d5, d5                   \n"    // reverse 2 frames of the positive side
-
-            "vsub.s32        q9,  q9,  q8            \n"    // interpolate (step1) 1st set of coefs
-            "vsub.s32       q11, q11, q10            \n"    // interpolate (step1) 2nd set of coets
-            "vshll.s16      q12,  d4, #15            \n"    // extend samples to 31 bits
-            "vshll.s16      q13,  d5, #15            \n"    // extend samples to 31 bits
-
-            "vqrdmulh.s32    q9,  q9, d2[0]          \n"    // interpolate (step2) 1st set of coefs
-            "vqrdmulh.s32   q11, q11, d2[1]          \n"    // interpolate (step3) 2nd set of coefs
-            "vshll.s16      q14,  d6, #15            \n"    // extend samples to 31 bits
-            "vshll.s16      q15,  d7, #15            \n"    // extend samples to 31 bits
-
-            "vadd.s32        q8,  q8,  q9            \n"    // interpolate (step3) 1st set
-            "vadd.s32       q10, q10, q11            \n"    // interpolate (step4) 2nd set
-            "subs           %[count], %[count], #4   \n"    // update loop counter
-
-            "vqrdmulh.s32   q12, q12, q8             \n"    // multiply samples by interpolated coef
-            "vqrdmulh.s32   q13, q13, q8             \n"    // multiply samples by interpolated coef
-            "vqrdmulh.s32   q14, q14, q10            \n"    // multiply samples by interpolated coef
-            "vqrdmulh.s32   q15, q15, q10            \n"    // multiply samples by interpolated coef
-            "sub            %[sP], %[sP], #16        \n"    // move pointer to next set of samples
-
-            "vadd.s32       q0, q0, q12              \n"    // accumulate result
-            "vadd.s32       q4, q4, q13              \n"    // accumulate result
-            "vadd.s32       q0, q0, q14              \n"    // accumulate result
-            "vadd.s32       q4, q4, q15              \n"    // accumulate result
-
-            "bne            1b                       \n"    // loop
-
-            "vld1.s32       {d2}, [%[vLR]]           \n"    // load volumes
-            "vld1.s32       {d3}, %[out]             \n"    // load the output
-            "vpadd.s32      d0, d0, d1               \n"    // add all 4 partial sums from q0
-            "vpadd.s32      d8, d8, d9               \n"    // add all 4 partial sums from q4
-            "vpadd.s32      d0, d0, d0               \n"    // together
-            "vpadd.s32      d8, d8, d8               \n"    // together
-            "vtrn.s32       d0, d8                   \n"    // interlace L,R channels
-            "vqrdmulh.s32   d0, d0, d2               \n"    // apply volume
-            "vadd.s32       d3, d3, d0               \n"    // accumulate result
-            "vst1.s32       {d3}, %[out]             \n"    // store result
-
-            : [out]     "=Uv" (out[0]),
-              [count]   "+r" (count),
-              [coefsP0] "+r" (coefsP),
-              [coefsP1] "+r" (coefsP1),
-              [coefsN0] "+r" (coefsN),
-              [coefsN1] "+r" (coefsN1),
-              [sP]      "+r" (sP),
-              [sN]      "+r" (sN)
-            : [lerpP]   "r" (lerpP),
-              [lerpN]   "r" (lerpN),
-              [vLR]     "r" (mVolumeSIMD)
-            : "cc", "memory",
-              "q0", "q1", "q2", "q3", "q4",
-              "q8", "q9", "q10", "q11",
-              "q12", "q13", "q14", "q15"
-        );
+
+        int32x4_t sum0, sum1;
+        int32x2_t lerpPN;
+
+        lerpPN = vdup_n_s32(0);
+        lerpPN = vld1_lane_s32((int32_t *)&lerpP, lerpPN, 0);
+        lerpPN = vld1_lane_s32((int32_t *)&lerpN, lerpPN, 1);
+        lerpPN = vshl_n_s32(lerpPN, 16);
+        sum0 = vdupq_n_s32(0);
+        sum1 = vdupq_n_s32(0);
+
+        int16x4x2_t sampleP, sampleN;
+        int32x4x2_t samplePExt, sampleNExt;
+        int32x4_t coefsPV0, coefsPV1, coefsNV0, coefsNV1;
+
+        coefsP = (const int32_t*)__builtin_assume_aligned(coefsP, 16);
+        coefsN = (const int32_t*)__builtin_assume_aligned(coefsN, 16);
+        coefsP1 = (const int32_t*)__builtin_assume_aligned(coefsP1, 16);
+        coefsN1 = (const int32_t*)__builtin_assume_aligned(coefsN1, 16);
+        for (; count > 0; count -= 4) {
+            sampleP = vld2_s16(sP);
+            sampleN = vld2_s16(sN);
+            coefsPV0 = vld1q_s32(coefsP);
+            coefsNV0 = vld1q_s32(coefsN);
+            coefsPV1 = vld1q_s32(coefsP1);
+            coefsNV1 = vld1q_s32(coefsN1);
+            sP -= 8;
+            sN += 8;
+            coefsP += 4;
+            coefsN += 4;
+            coefsP1 += 4;
+            coefsN1 += 4;
+
+            sampleP.val[0] = vrev64_s16(sampleP.val[0]);
+            sampleP.val[1] = vrev64_s16(sampleP.val[1]);
+
+            // interpolate (step1)
+            coefsPV1 = vsubq_s32(coefsPV1, coefsPV0);
+            coefsNV1 = vsubq_s32(coefsNV1, coefsNV0);
+            samplePExt.val[0] = vshll_n_s16(sampleP.val[0], 15);
+            samplePExt.val[1] = vshll_n_s16(sampleP.val[1], 15);
+            // interpolate (step2)
+            coefsPV1 = vqrdmulhq_lane_s32(coefsPV1, lerpPN, 0);
+            coefsNV1 = vqrdmulhq_lane_s32(coefsNV1, lerpPN, 1);
+            sampleNExt.val[0] = vshll_n_s16(sampleN.val[0], 15);
+            sampleNExt.val[1] = vshll_n_s16(sampleN.val[1], 15);
+            // interpolate (step3)
+            coefsPV0 = vaddq_s32(coefsPV0, coefsPV1);
+            coefsNV0 = vaddq_s32(coefsNV0, coefsNV1);
+
+            samplePExt.val[0] = vqrdmulhq_s32(samplePExt.val[0], coefsPV0);
+            samplePExt.val[1] = vqrdmulhq_s32(samplePExt.val[1], coefsPV0);
+            sampleNExt.val[0] = vqrdmulhq_s32(sampleNExt.val[0], coefsNV0);
+            sampleNExt.val[1] = vqrdmulhq_s32(sampleNExt.val[1], coefsNV0);
+            sum0 = vaddq_s32(sum0, samplePExt.val[0]);
+            sum1 = vaddq_s32(sum1, samplePExt.val[1]);
+            sum0 = vaddq_s32(sum0, sampleNExt.val[0]);
+            sum1 = vaddq_s32(sum1, sampleNExt.val[1]);
+        }
+        int32x2_t volumesV, outV;
+        volumesV = vld1_s32(mVolumeSIMD);
+        outV = vld1_s32(out);
+
+        //add all 4 partial sums
+        int32x2_t sumLow0, sumHigh0, sumLow1, sumHigh1;
+        sumLow0 = vget_low_s32(sum0);
+        sumHigh0 = vget_high_s32(sum0);
+        sumLow1 = vget_low_s32(sum1);
+        sumHigh1 = vget_high_s32(sum1);
+        sumLow0 = vpadd_s32(sumLow0, sumHigh0);
+        sumLow0 = vpadd_s32(sumLow0, sumLow0);
+        sumLow1 = vpadd_s32(sumLow1, sumHigh1);
+        sumLow1 = vpadd_s32(sumLow1, sumLow1);
+
+        sumLow0 = vtrn_s32(sumLow0, sumLow1).val[0];
+        sumLow0 = vqrdmulh_s32(sumLow0, volumesV);
+        outV = vadd_s32(outV, sumLow0);
+        vst1_s32(out, outV);
     }
+#endif
 }
 
 template<int CHANNELS>
diff --git a/services/audioflinger/Tracks.cpp b/services/audioflinger/Tracks.cpp
index fccc7b8..b59333f 100644
--- a/services/audioflinger/Tracks.cpp
+++ b/services/audioflinger/Tracks.cpp
@@ -895,7 +895,7 @@ bool AudioFlinger::PlaybackThread::Track::presentationComplete(size_t framesWrit
 
 void AudioFlinger::PlaybackThread::Track::triggerEvents(AudioSystem::sync_event_t type)
 {
-    for (int i = 0; i < (int)mSyncEvents.size(); i++) {
+    for (size_t i = 0; i < mSyncEvents.size(); i++) {
         if (mSyncEvents[i]->type() == type) {
             mSyncEvents[i]->trigger();
             mSyncEvents.removeAt(i);
diff --git a/services/camera/libcameraservice/device3/Camera3Device.cpp b/services/camera/libcameraservice/device3/Camera3Device.cpp
index 7e11a3b..1d4768c 100644
--- a/services/camera/libcameraservice/device3/Camera3Device.cpp
+++ b/services/camera/libcameraservice/device3/Camera3Device.cpp
@@ -654,6 +654,8 @@ status_t Camera3Device::createZslStream(
     }
     mInputStream = newStream;
 
+    mNeedConfig = true;
+
     *id = mNextStreamId++;
     *zslStream = newStream;