3 files changed, 96 insertions, 65 deletions
diff --git a/services/audioflinger/AudioMixer.cpp b/services/audioflinger/AudioMixer.cpp
index 0d4b358..836f550 100644
--- a/services/audioflinger/AudioMixer.cpp
+++ b/services/audioflinger/AudioMixer.cpp
@@ -341,11 +341,46 @@ AudioMixer::RemixBufferProvider::RemixBufferProvider(audio_channel_mask_t inputC
     ALOGV("RemixBufferProvider(%p)(%#x, %#x, %#x) %zu %zu",
             this, format, inputChannelMask, outputChannelMask,
             mInputChannels, mOutputChannels);
-    // TODO: consider channel representation in index array formulation
-    // We ignore channel representation, and just use the bits.
-    memcpy_by_index_array_initialization(mIdxAry, ARRAY_SIZE(mIdxAry),
-            audio_channel_mask_get_bits(outputChannelMask),
-            audio_channel_mask_get_bits(inputChannelMask));
+
+    const audio_channel_representation_t inputRepresentation =
+            audio_channel_mask_get_representation(inputChannelMask);
+    const audio_channel_representation_t outputRepresentation =
+            audio_channel_mask_get_representation(outputChannelMask);
+    const uint32_t inputBits = audio_channel_mask_get_bits(inputChannelMask);
+    const uint32_t outputBits = audio_channel_mask_get_bits(outputChannelMask);
+
+    switch (inputRepresentation) {
+    case AUDIO_CHANNEL_REPRESENTATION_POSITION:
+        switch (outputRepresentation) {
+        case AUDIO_CHANNEL_REPRESENTATION_POSITION:
+            memcpy_by_index_array_initialization(mIdxAry, ARRAY_SIZE(mIdxAry),
+                    outputBits, inputBits);
+            return;
+        case AUDIO_CHANNEL_REPRESENTATION_INDEX:
+            // TODO: output channel index mask not currently allowed
+            // fall through
+        default:
+            break;
+        }
+        break;
+    case AUDIO_CHANNEL_REPRESENTATION_INDEX:
+        switch (outputRepresentation) {
+        case AUDIO_CHANNEL_REPRESENTATION_POSITION:
+            memcpy_by_index_array_initialization_src_index(mIdxAry, ARRAY_SIZE(mIdxAry),
+                    outputBits, inputBits);
+            return;
+        case AUDIO_CHANNEL_REPRESENTATION_INDEX:
+            // TODO: output channel index mask not currently allowed
+            // fall through
+        default:
+            break;
+        }
+        break;
+    default:
+        break;
+    }
+    LOG_ALWAYS_FATAL("invalid channel mask conversion from %#x to %#x",
+            inputChannelMask, outputChannelMask);
 }
 
 void AudioMixer::RemixBufferProvider::copyFrames(void *dst, const void *src, size_t frames)
@@ -605,7 +640,10 @@ status_t AudioMixer::track_t::prepareForDownmix()
                     && mMixerChannelMask == AUDIO_CHANNEL_OUT_STEREO)) {
         return NO_ERROR;
     }
-    if (DownmixerBufferProvider::isMultichannelCapable()) {
+    // DownmixerBufferProvider is only used for position masks.
+    if (audio_channel_mask_get_representation(channelMask)
+                == AUDIO_CHANNEL_REPRESENTATION_POSITION
+            && DownmixerBufferProvider::isMultichannelCapable()) {
         DownmixerBufferProvider* pDbp = new DownmixerBufferProvider(channelMask,
                 mMixerChannelMask,
                 AUDIO_FORMAT_PCM_16_BIT /* TODO: use mMixerInFormat, now only PCM 16 */,
diff --git a/services/audioflinger/AudioResamplerFirProcessNeon.h b/services/audioflinger/AudioResamplerFirProcessNeon.h
index d4fa7ad..29ff179 100644
--- a/services/audioflinger/AudioResamplerFirProcessNeon.h
+++ b/services/audioflinger/AudioResamplerFirProcessNeon.h
@@ -115,13 +115,13 @@ inline void ProcessL<2, 16>(int32_t* const out,
 
         "1:                                      \n"
 
-        "vld2.16        {q2, q3}, [%[sP]]        \n"// (3+0d) load 8 16-bits stereo samples
-        "vld2.16        {q5, q6}, [%[sN]]!       \n"// (3) load 8 16-bits stereo samples
+        "vld2.16        {q2, q3}, [%[sP]]        \n"// (3+0d) load 8 16-bits stereo frames
+        "vld2.16        {q5, q6}, [%[sN]]!       \n"// (3) load 8 16-bits stereo frames
         "vld1.16        {q8}, [%[coefsP0]:128]!  \n"// (1) load 8 16-bits coefs
         "vld1.16        {q10}, [%[coefsN0]:128]! \n"// (1) load 8 16-bits coefs
 
-        "vrev64.16      q2, q2                   \n"// (1) reverse 8 frames of the left positive
-        "vrev64.16      q3, q3                   \n"// (0 combines+) reverse right positive
+        "vrev64.16      q2, q2                   \n"// (1) reverse 8 samples of positive left
+        "vrev64.16      q3, q3                   \n"// (0 combines+) reverse positive right
 
         "vmlal.s16      q0, d4, d17              \n"// (1) multiply (reversed) samples left
         "vmlal.s16      q0, d5, d16              \n"// (1) multiply (reversed) samples left
@@ -247,8 +247,8 @@ inline void Process<2, 16>(int32_t* const out,
 
         "1:                                      \n"
 
-        "vld2.16        {q2, q3}, [%[sP]]        \n"// (3+0d) load 8 16-bits stereo samples
-        "vld2.16        {q5, q6}, [%[sN]]!       \n"// (3) load 8 16-bits stereo samples
+        "vld2.16        {q2, q3}, [%[sP]]        \n"// (3+0d) load 8 16-bits stereo frames
+        "vld2.16        {q5, q6}, [%[sN]]!       \n"// (3) load 8 16-bits stereo frames
         "vld1.16        {q8}, [%[coefsP0]:128]!  \n"// (1) load 8 16-bits coefs
         "vld1.16        {q9}, [%[coefsP1]:128]!  \n"// (1) load 8 16-bits coefs for interpolation
         "vld1.16        {q10}, [%[coefsN1]:128]! \n"// (1) load 8 16-bits coefs
@@ -260,8 +260,8 @@ inline void Process<2, 16>(int32_t* const out,
         "vqrdmulh.s16   q9, q9, d2[0]            \n"// (2) interpolate (step2) 1st set of coefs
         "vqrdmulh.s16   q11, q11, d2[0]          \n"// (2) interpolate (step2) 2nd set of coefs
 
-        "vrev64.16      q2, q2                   \n"// (1) reverse 8 frames of the left positive
-        "vrev64.16      q3, q3                   \n"// (1) reverse 8 frames of the right positive
+        "vrev64.16      q2, q2                   \n"// (1) reverse 8 samples of positive left
+        "vrev64.16      q3, q3                   \n"// (1) reverse 8 samples of positive right
 
         "vadd.s16       q8, q8, q9               \n"// (1+1d) interpolate (step3) 1st set
         "vadd.s16       q10, q10, q11            \n"// (1+1d) interpolate (step3) 2nd set
@@ -323,7 +323,7 @@ inline void ProcessL<1, 16>(int32_t* const out,
         "vld1.32        {q8, q9}, [%[coefsP0]:128]!   \n"// load 8 32-bits coefs
         "vld1.32        {q10, q11}, [%[coefsN0]:128]! \n"// load 8 32-bits coefs
 
-        "vrev64.16      q2, q2                        \n"// reverse 8 frames of the positive side
+        "vrev64.16      q2, q2                        \n"// reverse 8 samples of the positive side
 
         "vshll.s16      q12, d4, #15                  \n"// extend samples to 31 bits
         "vshll.s16      q13, d5, #15                  \n"// extend samples to 31 bits
@@ -331,10 +331,10 @@ inline void ProcessL<1, 16>(int32_t* const out,
         "vshll.s16      q14, d6, #15                  \n"// extend samples to 31 bits
         "vshll.s16      q15, d7, #15                  \n"// extend samples to 31 bits
 
-        "vqrdmulh.s32   q12, q12, q9                  \n"// multiply samples by interpolated coef
-        "vqrdmulh.s32   q13, q13, q8                  \n"// multiply samples by interpolated coef
-        "vqrdmulh.s32   q14, q14, q10                 \n"// multiply samples by interpolated coef
-        "vqrdmulh.s32   q15, q15, q11                 \n"// multiply samples by interpolated coef
+        "vqrdmulh.s32   q12, q12, q9                  \n"// multiply samples
+        "vqrdmulh.s32   q13, q13, q8                  \n"// multiply samples
+        "vqrdmulh.s32   q14, q14, q10                 \n"// multiply samples
+        "vqrdmulh.s32   q15, q15, q11                 \n"// multiply samples
 
         "vadd.s32       q0, q0, q12                   \n"// accumulate result
         "vadd.s32       q13, q13, q14                 \n"// accumulate result
@@ -380,13 +380,13 @@ inline void ProcessL<2, 16>(int32_t* const out,
 
         "1:                                           \n"
 
-        "vld2.16        {q2, q3}, [%[sP]]             \n"// load 4 16-bits stereo samples
-        "vld2.16        {q5, q6}, [%[sN]]!            \n"// load 4 16-bits stereo samples
-        "vld1.32        {q8, q9}, [%[coefsP0]:128]!   \n"// load 4 32-bits coefs
-        "vld1.32        {q10, q11}, [%[coefsN0]:128]! \n"// load 4 32-bits coefs
+        "vld2.16        {q2, q3}, [%[sP]]             \n"// load 8 16-bits stereo frames
+        "vld2.16        {q5, q6}, [%[sN]]!            \n"// load 8 16-bits stereo frames
+        "vld1.32        {q8, q9}, [%[coefsP0]:128]!   \n"// load 8 32-bits coefs
+        "vld1.32        {q10, q11}, [%[coefsN0]:128]! \n"// load 8 32-bits coefs
 
-        "vrev64.16      q2, q2                        \n"// reverse 8 frames of the positive side
-        "vrev64.16      q3, q3                        \n"// reverse 8 frames of the positive side
+        "vrev64.16      q2, q2                        \n"// reverse 8 samples of positive left
+        "vrev64.16      q3, q3                        \n"// reverse 8 samples of positive right
 
         "vshll.s16      q12,  d4, #15                 \n"// extend samples to 31 bits
         "vshll.s16      q13,  d5, #15                 \n"// extend samples to 31 bits
@@ -394,15 +394,15 @@ inline void ProcessL<2, 16>(int32_t* const out,
         "vshll.s16      q14,  d10, #15                \n"// extend samples to 31 bits
         "vshll.s16      q15,  d11, #15                \n"// extend samples to 31 bits
 
-        "vqrdmulh.s32   q12, q12, q9                  \n"// multiply samples by interpolated coef
-        "vqrdmulh.s32   q13, q13, q8                  \n"// multiply samples by interpolated coef
-        "vqrdmulh.s32   q14, q14, q10                 \n"// multiply samples by interpolated coef
-        "vqrdmulh.s32   q15, q15, q11                 \n"// multiply samples by interpolated coef
+        "vqrdmulh.s32   q12, q12, q9                  \n"// multiply samples by coef
+        "vqrdmulh.s32   q13, q13, q8                  \n"// multiply samples by coef
+        "vqrdmulh.s32   q14, q14, q10                 \n"// multiply samples by coef
+        "vqrdmulh.s32   q15, q15, q11                 \n"// multiply samples by coef
 
         "vadd.s32       q0, q0, q12                   \n"// accumulate result
         "vadd.s32       q13, q13, q14                 \n"// accumulate result
-        "vadd.s32       q0, q0, q15                   \n"// (+1) accumulate result
-        "vadd.s32       q0, q0, q13                   \n"// (+1) accumulate result
+        "vadd.s32       q0, q0, q15                   \n"// accumulate result
+        "vadd.s32       q0, q0, q13                   \n"// accumulate result
 
         "vshll.s16      q12,  d6, #15                 \n"// extend samples to 31 bits
         "vshll.s16      q13,  d7, #15                 \n"// extend samples to 31 bits
@@ -410,15 +410,15 @@ inline void ProcessL<2, 16>(int32_t* const out,
         "vshll.s16      q14,  d12, #15                \n"// extend samples to 31 bits
         "vshll.s16      q15,  d13, #15                \n"// extend samples to 31 bits
 
-        "vqrdmulh.s32   q12, q12, q9                  \n"// multiply samples by interpolated coef
-        "vqrdmulh.s32   q13, q13, q8                  \n"// multiply samples by interpolated coef
-        "vqrdmulh.s32   q14, q14, q10                 \n"// multiply samples by interpolated coef
-        "vqrdmulh.s32   q15, q15, q11                 \n"// multiply samples by interpolated coef
+        "vqrdmulh.s32   q12, q12, q9                  \n"// multiply samples by coef
+        "vqrdmulh.s32   q13, q13, q8                  \n"// multiply samples by coef
+        "vqrdmulh.s32   q14, q14, q10                 \n"// multiply samples by coef
+        "vqrdmulh.s32   q15, q15, q11                 \n"// multiply samples by coef
 
         "vadd.s32       q4, q4, q12                   \n"// accumulate result
         "vadd.s32       q13, q13, q14                 \n"// accumulate result
-        "vadd.s32       q4, q4, q15                   \n"// (+1) accumulate result
-        "vadd.s32       q4, q4, q13                   \n"// (+1) accumulate result
+        "vadd.s32       q4, q4, q15                   \n"// accumulate result
+        "vadd.s32       q4, q4, q13                   \n"// accumulate result
 
         "subs           %[count], %[count], #8        \n"// update loop counter
         "sub            %[sP], %[sP], #32             \n"// move pointer to next set of samples
@@ -485,7 +485,7 @@ inline void Process<1, 16>(int32_t* const out,
         "vadd.s32       q10, q10, q14                 \n"// interpolate (step3)
         "vadd.s32       q11, q11, q15                 \n"// interpolate (step3)
 
-        "vrev64.16      q2, q2                        \n"// reverse 8 frames of the positive side
+        "vrev64.16      q2, q2                        \n"// reverse 8 samples of the positive side
 
         "vshll.s16      q12,  d4, #15                 \n"// extend samples to 31 bits
         "vshll.s16      q13,  d5, #15                 \n"// extend samples to 31 bits
@@ -549,8 +549,8 @@ inline void Process<2, 16>(int32_t* const out,
 
         "1:                                           \n"
 
-        "vld2.16        {q2, q3}, [%[sP]]             \n"// load 4 16-bits stereo samples
-        "vld2.16        {q5, q6}, [%[sN]]!            \n"// load 4 16-bits stereo samples
+        "vld2.16        {q2, q3}, [%[sP]]             \n"// load 8 16-bits stereo frames
+        "vld2.16        {q5, q6}, [%[sN]]!            \n"// load 8 16-bits stereo frames
         "vld1.32        {q8, q9}, [%[coefsP0]:128]!   \n"// load 8 32-bits coefs
         "vld1.32        {q12, q13}, [%[coefsP1]:128]! \n"// load 8 32-bits coefs
         "vld1.32        {q10, q11}, [%[coefsN1]:128]! \n"// load 8 32-bits coefs
@@ -571,8 +571,8 @@ inline void Process<2, 16>(int32_t* const out,
         "vadd.s32       q10, q10, q14                 \n"// interpolate (step3)
         "vadd.s32       q11, q11, q15                 \n"// interpolate (step3)
 
-        "vrev64.16      q2, q2                        \n"// reverse 8 frames of the positive side
-        "vrev64.16      q3, q3                        \n"// reverse 8 frames of the positive side
+        "vrev64.16      q2, q2                        \n"// reverse 8 samples of positive left
+        "vrev64.16      q3, q3                        \n"// reverse 8 samples of positive right
 
         "vshll.s16      q12,  d4, #15                 \n"// extend samples to 31 bits
         "vshll.s16      q13,  d5, #15                 \n"// extend samples to 31 bits
@@ -587,8 +587,8 @@ inline void Process<2, 16>(int32_t* const out,
 
         "vadd.s32       q0, q0, q12                   \n"// accumulate result
         "vadd.s32       q13, q13, q14                 \n"// accumulate result
-        "vadd.s32       q0, q0, q15                   \n"// (+1) accumulate result
-        "vadd.s32       q0, q0, q13                   \n"// (+1) accumulate result
+        "vadd.s32       q0, q0, q15                   \n"// accumulate result
+        "vadd.s32       q0, q0, q13                   \n"// accumulate result
 
         "vshll.s16      q12,  d6, #15                 \n"// extend samples to 31 bits
         "vshll.s16      q13,  d7, #15                 \n"// extend samples to 31 bits
@@ -603,8 +603,8 @@ inline void Process<2, 16>(int32_t* const out,
 
         "vadd.s32       q4, q4, q12                   \n"// accumulate result
         "vadd.s32       q13, q13, q14                 \n"// accumulate result
-        "vadd.s32       q4, q4, q15                   \n"// (+1) accumulate result
-        "vadd.s32       q4, q4, q13                   \n"// (+1) accumulate result
+        "vadd.s32       q4, q4, q15                   \n"// accumulate result
+        "vadd.s32       q4, q4, q13                   \n"// accumulate result
 
         "subs           %[count], %[count], #8        \n"// update loop counter
         "sub            %[sP], %[sP], #32             \n"// move pointer to next set of samples
diff --git a/services/audioflinger/Threads.cpp b/services/audioflinger/Threads.cpp
index 384bd25..40ab0af 100644
--- a/services/audioflinger/Threads.cpp
+++ b/services/audioflinger/Threads.cpp
@@ -174,18 +174,6 @@ static int sFastTrackMultiplier = kFastTrackMultiplier;
 // and that all "fast" AudioRecord clients read from.  In either case, the size can be small.
 static const size_t kRecordThreadReadOnlyHeapSize = 0x2000;
 
-// Returns the source frames needed to resample to destination frames.  This is not a precise
-// value and depends on the resampler (and possibly how it handles rounding internally).
-// If srcSampleRate and dstSampleRate are equal, then it returns destination frames, which
-// may not be a true if the resampler is asynchronous.
-static inline size_t sourceFramesNeeded(
-        uint32_t srcSampleRate, size_t dstFramesRequired, uint32_t dstSampleRate) {
-    // +1 for rounding - always do this even if matched ratio
-    // +1 for additional sample needed for interpolation
-    return srcSampleRate == dstSampleRate ? dstFramesRequired :
-            size_t((uint64_t)dstFramesRequired * srcSampleRate / dstSampleRate + 1 + 1);
-}
-
 // ----------------------------------------------------------------------------
 
 static pthread_once_t sFastTrackMultiplierOnce = PTHREAD_ONCE_INIT;
@@ -1497,20 +1485,25 @@ sp<AudioFlinger::PlaybackThread::Track> AudioFlinger::PlaybackThread::createTrac
                 audio_is_linear_pcm(format),
                 channelMask, sampleRate, mSampleRate, hasFastMixer(), tid, mFastTrackAvailMask);
         *flags &= ~IAudioFlinger::TRACK_FAST;
-        // For compatibility with AudioTrack calculation, buffer depth is forced
-        // to be at least 2 x the normal mixer frame count and cover audio hardware latency.
-        // This is probably too conservative, but legacy application code may depend on it.
-        // If you change this calculation, also review the start threshold which is related.
+      }
+    }
+    // For normal PCM streaming tracks, update minimum frame count.
+    // For compatibility with AudioTrack calculation, buffer depth is forced
+    // to be at least 2 x the normal mixer frame count and cover audio hardware latency.
+    // This is probably too conservative, but legacy application code may depend on it.
+    // If you change this calculation, also review the start threshold which is related.
+    if (!(*flags & IAudioFlinger::TRACK_FAST)
+            && audio_is_linear_pcm(format) && sharedBuffer == 0) {
         uint32_t latencyMs = mOutput->stream->get_latency(mOutput->stream);
         uint32_t minBufCount = latencyMs / ((1000 * mNormalFrameCount) / mSampleRate);
         if (minBufCount < 2) {
             minBufCount = 2;
         }
-        size_t minFrameCount = mNormalFrameCount * minBufCount;
-        if (frameCount < minFrameCount) {
+        size_t minFrameCount =
+                minBufCount * sourceFramesNeeded(sampleRate, mNormalFrameCount, mSampleRate);
+        if (frameCount < minFrameCount) { // including frameCount == 0
             frameCount = minFrameCount;
         }
-      }
     }
     *pFrameCount = frameCount;