diff options
Diffstat (limited to 'services')
-rw-r--r-- | services/audioflinger/AudioResamplerFirProcess.h | 208 |
1 files changed, 146 insertions, 62 deletions
diff --git a/services/audioflinger/AudioResamplerFirProcess.h b/services/audioflinger/AudioResamplerFirProcess.h index 76d2d66..bb0f1c9 100644 --- a/services/audioflinger/AudioResamplerFirProcess.h +++ b/services/audioflinger/AudioResamplerFirProcess.h @@ -44,14 +44,14 @@ static inline void mac(float& l, float& r, TC coef, const float* samples) { l += *samples++ * coef; - r += *samples++ * coef; + r += *samples * coef; } template<typename TC> static inline void mac(float& l, TC coef, const float* samples) { - l += *samples++ * coef; + l += *samples * coef; } /* variant for output type TO = int32_t output samples */ @@ -69,62 +69,48 @@ float volumeAdjust(float value, float volume) } /* - * Calculates a single output frame (two samples). - * - * This function computes both the positive half FIR dot product and - * the negative half FIR dot product, accumulates, and then applies the volume. + * Helper template functions for loop unrolling accumulator operations. * - * This is a locked phase filter (it does not compute the interpolation). - * - * Use fir() to compute the proper coefficient pointers for a polyphase - * filter bank. + * Unrolling the loops achieves about 2x gain. + * Using a recursive template rather than an array of TO[] for the accumulator + * values is an additional 10-20% gain. */ -template <int CHANNELS, int STRIDE, typename TC, typename TI, typename TO> -static inline -void ProcessL(TO* const out, - int count, - const TC* coefsP, - const TC* coefsN, - const TI* sP, - const TI* sN, - const TO* const volumeLR) +template<int CHANNELS, typename TO> +class Accumulator : public Accumulator<CHANNELS-1, TO> // recursive { - COMPILE_TIME_ASSERT_FUNCTION_SCOPE(CHANNELS >= 1 && CHANNELS <= 2) - if (CHANNELS == 2) { - TO l = 0; - TO r = 0; - do { - mac(l, r, *coefsP++, sP); - sP -= CHANNELS; - mac(l, r, *coefsN++, sN); - sN += CHANNELS; - } while (--count > 0); - out[0] += volumeAdjust(l, volumeLR[0]); - out[1] += volumeAdjust(r, volumeLR[1]); - } else { /* CHANNELS == 1 */ - TO l = 0; - do { - mac(l, *coefsP++, sP); - sP -= CHANNELS; - mac(l, *coefsN++, sN); - sN += CHANNELS; - } while (--count > 0); - out[0] += volumeAdjust(l, volumeLR[0]); - out[1] += volumeAdjust(l, volumeLR[1]); +public: + inline void clear() { + value = 0; + Accumulator<CHANNELS-1, TO>::clear(); } -} + template<typename TC, typename TI> + inline void acc(TC coef, const TI*& data) { + mac(value, coef, data++); + Accumulator<CHANNELS-1, TO>::acc(coef, data); + } + inline void volume(TO*& out, TO gain) { + *out++ = volumeAdjust(value, gain); + Accumulator<CHANNELS-1, TO>::volume(out, gain); + } + + TO value; // one per recursive inherited base class +}; + +template<typename TO> +class Accumulator<0, TO> { +public: + inline void clear() { + } + template<typename TC, typename TI> + inline void acc(TC coef __unused, const TI*& data __unused) { + } + inline void volume(TO*& out __unused, TO gain __unused) { + } +}; /* - * Calculates a single output frame (two samples) interpolating phase. - * - * This function computes both the positive half FIR dot product and - * the negative half FIR dot product, accumulates, and then applies the volume. - * - * This is an interpolated phase filter. - * - * Use fir() to compute the proper coefficient pointers for a polyphase - * filter bank. + * Helper template functions for interpolating filter coefficients. */ template<typename TC, typename T> @@ -159,30 +145,98 @@ int32_t interpolate(int32_t coef_0, int32_t coef_1, uint32_t lerp) return mulAdd(static_cast<int16_t>(lerp), (coef_1-coef_0)<<1, coef_0); } -template <int CHANNELS, int STRIDE, typename TC, typename TI, typename TO, typename TINTERP> +/* class scope for passing in functions into templates */ +struct InterpCompute { + template<typename TC, typename TINTERP> + static inline + TC interpolatep(TC coef_0, TC coef_1, TINTERP lerp) { + return interpolate(coef_0, coef_1, lerp); + } + + template<typename TC, typename TINTERP> + static inline + TC interpolaten(TC coef_0, TC coef_1, TINTERP lerp) { + return interpolate(coef_0, coef_1, lerp); + } +}; + +struct InterpNull { + template<typename TC, typename TINTERP> + static inline + TC interpolatep(TC coef_0, TC coef_1 __unused, TINTERP lerp __unused) { + return coef_0; + } + + template<typename TC, typename TINTERP> + static inline + TC interpolaten(TC coef_0 __unused, TC coef_1, TINTERP lerp __unused) { + return coef_1; + } +}; + +/* + * Calculates a single output frame (two samples). + * + * The Process*() functions compute both the positive half FIR dot product and + * the negative half FIR dot product, accumulates, and then applies the volume. + * + * Use fir() to compute the proper coefficient pointers for a polyphase + * filter bank. + * + * ProcessBase() is the fundamental processing template function. + * + * ProcessL() calls ProcessBase() with TFUNC = InterpNull, for fixed/locked phase. + * Process() calls ProcessBase() with TFUNC = InterpCompute, for interpolated phase. + */ + +template <int CHANNELS, int STRIDE, typename TFUNC, typename TC, typename TI, typename TO, typename TINTERP> static inline -void Process(TO* const out, +void ProcessBase(TO* const out, int count, const TC* coefsP, const TC* coefsN, - const TC* coefsP1 __unused, - const TC* coefsN1 __unused, const TI* sP, const TI* sN, TINTERP lerpP, const TO* const volumeLR) { - COMPILE_TIME_ASSERT_FUNCTION_SCOPE(CHANNELS >= 1 && CHANNELS <= 2) - adjustLerp<TC, TINTERP>(lerpP); // coefficient type adjustment for interpolation + COMPILE_TIME_ASSERT_FUNCTION_SCOPE(CHANNELS > 0) - if (CHANNELS == 2) { + if (CHANNELS > 2) { + // TO accum[CHANNELS]; + Accumulator<CHANNELS, TO> accum; + + // for (int j = 0; j < CHANNELS; ++j) accum[j] = 0; + accum.clear(); + for (size_t i = 0; i < count; ++i) { + TC c = TFUNC::interpolatep(coefsP[0], coefsP[count], lerpP); + + // for (int j = 0; j < CHANNELS; ++j) mac(accum[j], c, sP + j); + const TI *tmp_data = sP; // tmp_ptr seems to work better + accum.acc(c, tmp_data); + + coefsP++; + sP -= CHANNELS; + c = TFUNC::interpolaten(coefsN[count], coefsN[0], lerpP); + + // for (int j = 0; j < CHANNELS; ++j) mac(accum[j], c, sN + j); + tmp_data = sN; // tmp_ptr seems faster than directly using sN + accum.acc(c, tmp_data); + + coefsN++; + sN += CHANNELS; + } + // for (int j = 0; j < CHANNELS; ++j) out[j] += volumeAdjust(accum[j], volumeLR[0]); + TO *tmp_out = out; // may remove if const out definition changes. + accum.volume(tmp_out, volumeLR[0]); + } else if (CHANNELS == 2) { TO l = 0; TO r = 0; for (size_t i = 0; i < count; ++i) { - mac(l, r, interpolate(coefsP[0], coefsP[count], lerpP), sP); + mac(l, r, TFUNC::interpolatep(coefsP[0], coefsP[count], lerpP), sP); coefsP++; sP -= CHANNELS; - mac(l, r, interpolate(coefsN[count], coefsN[0], lerpP), sN); + mac(l, r, TFUNC::interpolaten(coefsN[count], coefsN[0], lerpP), sN); coefsN++; sN += CHANNELS; } @@ -191,10 +245,10 @@ void Process(TO* const out, } else { /* CHANNELS == 1 */ TO l = 0; for (size_t i = 0; i < count; ++i) { - mac(l, interpolate(coefsP[0], coefsP[count], lerpP), sP); + mac(l, TFUNC::interpolatep(coefsP[0], coefsP[count], lerpP), sP); coefsP++; sP -= CHANNELS; - mac(l, interpolate(coefsN[count], coefsN[0], lerpP), sN); + mac(l, TFUNC::interpolaten(coefsN[count], coefsN[0], lerpP), sN); coefsN++; sN += CHANNELS; } @@ -203,6 +257,36 @@ void Process(TO* const out, } } +template <int CHANNELS, int STRIDE, typename TC, typename TI, typename TO> +static inline +void ProcessL(TO* const out, + int count, + const TC* coefsP, + const TC* coefsN, + const TI* sP, + const TI* sN, + const TO* const volumeLR) +{ + ProcessBase<CHANNELS, STRIDE, InterpNull>(out, count, coefsP, coefsN, sP, sN, 0, volumeLR); +} + +template <int CHANNELS, int STRIDE, typename TC, typename TI, typename TO, typename TINTERP> +static inline +void Process(TO* const out, + int count, + const TC* coefsP, + const TC* coefsN, + const TC* coefsP1 __unused, + const TC* coefsN1 __unused, + const TI* sP, + const TI* sN, + TINTERP lerpP, + const TO* const volumeLR) +{ + adjustLerp<TC, TINTERP>(lerpP); // coefficient type adjustment for interpolations + ProcessBase<CHANNELS, STRIDE, InterpCompute>(out, count, coefsP, coefsN, sP, sN, lerpP, volumeLR); +} + /* * Calculates a single output frame (two samples) from input sample pointer. * |