summaryrefslogtreecommitdiffstats
path: root/services/audioflinger/AudioResamplerSinc.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'services/audioflinger/AudioResamplerSinc.cpp')
-rw-r--r--services/audioflinger/AudioResamplerSinc.cpp255
1 files changed, 204 insertions, 51 deletions
diff --git a/services/audioflinger/AudioResamplerSinc.cpp b/services/audioflinger/AudioResamplerSinc.cpp
index 8d9168b..7d3681c 100644
--- a/services/audioflinger/AudioResamplerSinc.cpp
+++ b/services/audioflinger/AudioResamplerSinc.cpp
@@ -17,6 +17,7 @@
#define LOG_TAG "AudioResamplerSinc"
//#define LOG_NDEBUG 0
+#include <malloc.h>
#include <string.h>
#include <stdlib.h>
#include <dlfcn.h>
@@ -29,6 +30,20 @@
#include "AudioResamplerSinc.h"
+
+#if defined(__arm__) && !defined(__thumb__)
+#define USE_INLINE_ASSEMBLY (true)
+#else
+#define USE_INLINE_ASSEMBLY (false)
+#endif
+
+#if USE_INLINE_ASSEMBLY && defined(__ARM_NEON__)
+#define USE_NEON (true)
+#else
+#define USE_NEON (false)
+#endif
+
+
namespace android {
// ----------------------------------------------------------------------------
@@ -338,12 +353,16 @@ void AudioResamplerSinc::init_routine()
return;
}
- readResampleCoefficients = (readCoefficientsFn) dlsym(resampleCoeffLib,
- "readResamplerCoefficients");
- readResampleFirNumCoeffFn readResampleFirNumCoeff = (readResampleFirNumCoeffFn)
+ readResampleFirNumCoeffFn readResampleFirNumCoeff;
+ readResampleFirLerpIntBitsFn readResampleFirLerpIntBits;
+
+ readResampleCoefficients = (readCoefficientsFn)
+ dlsym(resampleCoeffLib, "readResamplerCoefficients");
+ readResampleFirNumCoeff = (readResampleFirNumCoeffFn)
dlsym(resampleCoeffLib, "readResampleFirNumCoeff");
- readResampleFirLerpIntBitsFn readResampleFirLerpIntBits = (readResampleFirLerpIntBitsFn)
+ readResampleFirLerpIntBits = (readResampleFirLerpIntBitsFn)
dlsym(resampleCoeffLib, "readResampleFirLerpIntBits");
+
if (!readResampleCoefficients || !readResampleFirNumCoeff || !readResampleFirLerpIntBits) {
readResampleCoefficients = NULL;
dlclose(resampleCoeffLib);
@@ -353,15 +372,14 @@ void AudioResamplerSinc::init_routine()
}
c = &veryHighQualityConstants;
- // we have 16 coefs samples per zero-crossing
c->coefsBits = readResampleFirLerpIntBits();
- ALOGV("coefsBits = %d", c->coefsBits);
c->cShift = kNumPhaseBits - c->coefsBits;
c->cMask = ((1<<c->coefsBits)-1) << c->cShift;
c->pShift = kNumPhaseBits - c->coefsBits - pLerpBits;
c->pMask = ((1<<pLerpBits)-1) << c->pShift;
// number of zero-crossing on each side
c->halfNumCoefs = readResampleFirNumCoeff();
+ ALOGV("coefsBits = %d", c->coefsBits);
ALOGV("halfNumCoefs = %d", c->halfNumCoefs);
// note that we "leak" resampleCoeffLib until the process exits
}
@@ -371,7 +389,7 @@ void AudioResamplerSinc::init_routine()
static inline
int32_t mulRL(int left, int32_t in, uint32_t vRL)
{
-#if defined(__arm__) && !defined(__thumb__)
+#if USE_INLINE_ASSEMBLY
int32_t out;
if (left) {
asm( "smultb %[out], %[in], %[vRL] \n"
@@ -394,7 +412,7 @@ int32_t mulRL(int left, int32_t in, uint32_t vRL)
static inline
int32_t mulAdd(int16_t in, int32_t v, int32_t a)
{
-#if defined(__arm__) && !defined(__thumb__)
+#if USE_INLINE_ASSEMBLY
int32_t out;
asm( "smlawb %[out], %[v], %[in], %[a] \n"
: [out]"=r"(out)
@@ -409,7 +427,7 @@ int32_t mulAdd(int16_t in, int32_t v, int32_t a)
static inline
int32_t mulAddRL(int left, uint32_t inRL, int32_t v, int32_t a)
{
-#if defined(__arm__) && !defined(__thumb__)
+#if USE_INLINE_ASSEMBLY
int32_t out;
if (left) {
asm( "smlawb %[out], %[v], %[inRL], %[a] \n"
@@ -434,7 +452,7 @@ int32_t mulAddRL(int left, uint32_t inRL, int32_t v, int32_t a)
AudioResamplerSinc::AudioResamplerSinc(int bitDepth,
int inChannelCount, int32_t sampleRate, src_quality quality)
: AudioResampler(bitDepth, inChannelCount, sampleRate, quality),
- mState(0)
+ mState(0), mImpulse(0), mRingFull(0), mFirCoefs(0)
{
/*
* Layout of the state buffer for 32 tap:
@@ -457,39 +475,34 @@ AudioResamplerSinc::AudioResamplerSinc(int bitDepth,
if (ok != 0) {
ALOGE("%s pthread_once failed: %d", __func__, ok);
}
- mConstants = (quality == VERY_HIGH_QUALITY) ? &veryHighQualityConstants : &highQualityConstants;
+ mConstants = (quality == VERY_HIGH_QUALITY) ?
+ &veryHighQualityConstants : &highQualityConstants;
}
-AudioResamplerSinc::~AudioResamplerSinc()
-{
- delete[] mState;
+AudioResamplerSinc::~AudioResamplerSinc() {
+ free(mState);
}
void AudioResamplerSinc::init() {
- const Constants *c = mConstants;
-
- const size_t numCoefs = 2*c->halfNumCoefs;
+ const Constants& c(*mConstants);
+ const size_t numCoefs = 2 * c.halfNumCoefs;
const size_t stateSize = numCoefs * mChannelCount * 2;
- mState = new int16_t[stateSize];
+ mState = (int16_t*)memalign(32, stateSize*sizeof(int16_t));
memset(mState, 0, sizeof(int16_t)*stateSize);
- mImpulse = mState + (c->halfNumCoefs-1)*mChannelCount;
+ mImpulse = mState + (c.halfNumCoefs-1)*mChannelCount;
mRingFull = mImpulse + (numCoefs+1)*mChannelCount;
}
void AudioResamplerSinc::resample(int32_t* out, size_t outFrameCount,
AudioBufferProvider* provider)
{
-
// FIXME store current state (up or down sample) and only load the coefs when the state
// changes. Or load two pointers one for up and one for down in the init function.
// Not critical now since the read functions are fast, but would be important if read was slow.
if (mConstants == &veryHighQualityConstants && readResampleCoefficients) {
- ALOGV("get coefficient from libmm-audio resampler library");
- mFirCoefs = (mInSampleRate <= mSampleRate) ? readResampleCoefficients(true) :
- readResampleCoefficients(false);
+ mFirCoefs = readResampleCoefficients( mInSampleRate <= mSampleRate );
} else {
- ALOGV("Use default coefficients");
mFirCoefs = (mInSampleRate <= mSampleRate) ? mFirCoefsUp : mFirCoefsDown;
}
@@ -502,7 +515,6 @@ void AudioResamplerSinc::resample(int32_t* out, size_t outFrameCount,
resample<2>(out, outFrameCount, provider);
break;
}
-
}
@@ -510,7 +522,8 @@ template<int CHANNELS>
void AudioResamplerSinc::resample(int32_t* out, size_t outFrameCount,
AudioBufferProvider* provider)
{
- const Constants *c = mConstants;
+ const Constants& c(*mConstants);
+ const size_t headOffset = c.halfNumCoefs*CHANNELS;
int16_t* impulse = mImpulse;
uint32_t vRL = mVolumeRL;
size_t inputIndex = mInputIndex;
@@ -545,11 +558,11 @@ void AudioResamplerSinc::resample(int32_t* out, size_t outFrameCount,
}
}
}
- int16_t *in = mBuffer.i16;
+ int16_t const * const in = mBuffer.i16;
const size_t frameCount = mBuffer.frameCount;
// Always read-in the first samples from the input buffer
- int16_t* head = impulse + c->halfNumCoefs*CHANNELS;
+ int16_t* head = impulse + headOffset;
for (size_t i=0 ; i<CHANNELS ; i++) {
head[i] = in[inputIndex*CHANNELS + i];
}
@@ -597,16 +610,17 @@ void AudioResamplerSinc::read(
int16_t*& impulse, uint32_t& phaseFraction,
const int16_t* in, size_t inputIndex)
{
- const Constants *c = mConstants;
- const uint32_t phaseIndex = phaseFraction >> kNumPhaseBits;
impulse += CHANNELS;
phaseFraction -= 1LU<<kNumPhaseBits;
+
+ const Constants& c(*mConstants);
if (CC_UNLIKELY(impulse >= mRingFull)) {
- const size_t stateSize = (c->halfNumCoefs*2)*CHANNELS;
+ const size_t stateSize = (c.halfNumCoefs*2)*CHANNELS;
memcpy(mState, mState+stateSize, sizeof(int16_t)*stateSize);
impulse -= stateSize;
}
- int16_t* head = impulse + c->halfNumCoefs*CHANNELS;
+
+ int16_t* head = impulse + c.halfNumCoefs*CHANNELS;
for (size_t i=0 ; i<CHANNELS ; i++) {
head[i] = in[inputIndex*CHANNELS + i];
}
@@ -616,39 +630,178 @@ template<int CHANNELS>
void AudioResamplerSinc::filterCoefficient(
int32_t& l, int32_t& r, uint32_t phase, const int16_t *samples, uint32_t vRL)
{
- const Constants *c = mConstants;
-
// compute the index of the coefficient on the positive side and
// negative side
- uint32_t indexP = ( phase & c->cMask) >> c->cShift;
- uint32_t indexN = (-phase & c->cMask) >> c->cShift;
- uint32_t lerpP = ( phase & c->pMask) >> c->pShift;
- uint32_t lerpN = (-phase & c->pMask) >> c->pShift;
+ const Constants& c(*mConstants);
+ uint32_t indexP = ( phase & c.cMask) >> c.cShift;
+ uint32_t indexN = (-phase & c.cMask) >> c.cShift;
+ uint32_t lerpP = ( phase & c.pMask) >> c.pShift;
+ uint32_t lerpN = (-phase & c.pMask) >> c.pShift;
if ((indexP == 0) && (lerpP == 0)) {
- indexN = c->cMask >> c->cShift;
- lerpN = c->pMask >> c->pShift;
+ indexN = c.cMask >> c.cShift;
+ lerpN = c.pMask >> c.pShift;
}
- const size_t offset = c->halfNumCoefs;
+ const size_t offset = c.halfNumCoefs;
indexP *= offset;
indexN *= offset;
- int32_t const* const coefs = mFirCoefs;
- int32_t const* coefsP = coefs + indexP;
- int32_t const* coefsN = coefs + indexN;
+ int32_t const* coefsP = mFirCoefs + indexP;
+ int32_t const* coefsN = mFirCoefs + indexN;
int16_t const* sP = samples;
int16_t const* sN = samples + CHANNELS;
l = 0;
r = 0;
size_t count = offset;
- for (size_t i=0 ; i<count ; i++) {
- interpolate<CHANNELS>(l, r, coefsP++, offset, lerpP, sP);
- sP -= CHANNELS;
- interpolate<CHANNELS>(l, r, coefsN++, offset, lerpN, sN);
- sN += CHANNELS;
+
+ if (!USE_NEON) {
+ for (size_t i=0 ; i<count ; i++) {
+ interpolate<CHANNELS>(l, r, coefsP++, offset, lerpP, sP);
+ sP -= CHANNELS;
+ interpolate<CHANNELS>(l, r, coefsN++, offset, lerpN, sN);
+ sN += CHANNELS;
+ }
+ l = 2 * mulRL(1, l, vRL);
+ r = 2 * mulRL(0, r, vRL);
+ } else if (CHANNELS == 1) {
+ int32_t const* coefsP1 = coefsP + offset;
+ int32_t const* coefsN1 = coefsN + offset;
+ sP -= CHANNELS*3;
+ asm (
+ "vmov.32 d2[0], %[lerpP] \n" // load the positive phase
+ "vmov.32 d2[1], %[lerpN] \n" // load the negative phase
+ "veor q0, q0 \n" // result, initialize to 0
+
+ "1: \n"
+ "vld1.16 { d4}, [%[sP]] \n" // load 4 16-bits stereo samples
+ "vld1.32 { q8}, [%[coefsP0]]! \n" // load 4 32-bits coefs
+ "vld1.32 { q9}, [%[coefsP1]]! \n" // load 4 32-bits coefs for interpolation
+ "vld1.16 { d6}, [%[sN]]! \n" // load 4 16-bits stereo samples
+ "vld1.32 {q10}, [%[coefsN0]]! \n" // load 4 32-bits coefs
+ "vld1.32 {q11}, [%[coefsN1]]! \n" // load 4 32-bits coefs for interpolation
+
+ "vrev64.16 d4, d4 \n" // reverse 2 frames of the positive side
+
+ "vsub.s32 q9, q9, q8 \n" // interpolate (step1) 1st set of coefs
+ "vsub.s32 q11, q11, q10 \n" // interpolate (step1) 2nd set of coets
+ "vshll.s16 q12, d4, #15 \n" // extend samples to 31 bits
+
+ "vqrdmulh.s32 q9, q9, d2[0] \n" // interpolate (step2) 1st set of coefs
+ "vqrdmulh.s32 q11, q11, d2[1] \n" // interpolate (step3) 2nd set of coefs
+ "vshll.s16 q14, d6, #15 \n" // extend samples to 31 bits
+
+ "vadd.s32 q8, q8, q9 \n" // interpolate (step3) 1st set
+ "vadd.s32 q10, q10, q11 \n" // interpolate (step4) 2nd set
+ "subs %[count], %[count], #4 \n" // update loop counter
+
+ "vqrdmulh.s32 q12, q12, q8 \n" // multiply samples by interpolated coef
+ "vqrdmulh.s32 q14, q14, q10 \n" // multiply samples by interpolated coef
+ "sub %[sP], %[sP], #8 \n" // move pointer to next set of samples
+
+ "vadd.s32 q0, q0, q12 \n" // accumulate result
+ "vadd.s32 q0, q0, q14 \n" // accumulate result
+
+ "bne 1b \n" // loop
+
+ "vpadd.s32 d0, d0, d1 \n" // add all 4 partial sums
+ "vpadd.s32 d0, d0, d0 \n" // together
+
+ "vmov.s32 %[l], d0[0] \n" // save result in ARM register
+
+ : [l] "=r" (l),
+ [count] "+r" (count),
+ [coefsP0] "+r" (coefsP),
+ [coefsP1] "+r" (coefsP1),
+ [coefsN0] "+r" (coefsN),
+ [coefsN1] "+r" (coefsN1),
+ [sP] "+r" (sP),
+ [sN] "+r" (sN)
+ : [lerpP] "r" (lerpP<<16),
+ [lerpN] "r" (lerpN<<16),
+ [vRL] "r" (vRL)
+ : "cc", "memory",
+ "q0", "q1", "q2", "q3",
+ "q8", "q9", "q10", "q11",
+ "q12", "q14"
+ );
+ l = 2 * mulRL(1, l, vRL);
+ r = l;
+ } else if (CHANNELS == 2) {
+ int32_t const* coefsP1 = coefsP + offset;
+ int32_t const* coefsN1 = coefsN + offset;
+ sP -= CHANNELS*3;
+ asm (
+ "vmov.32 d2[0], %[lerpP] \n" // load the positive phase
+ "vmov.32 d2[1], %[lerpN] \n" // load the negative phase
+ "veor q0, q0 \n" // result, initialize to 0
+ "veor q4, q4 \n" // result, initialize to 0
+
+ "1: \n"
+ "vld2.16 {d4,d5}, [%[sP]] \n" // load 4 16-bits stereo samples
+ "vld1.32 { q8}, [%[coefsP0]]! \n" // load 4 32-bits coefs
+ "vld1.32 { q9}, [%[coefsP1]]! \n" // load 4 32-bits coefs for interpolation
+ "vld2.16 {d6,d7}, [%[sN]]! \n" // load 4 16-bits stereo samples
+ "vld1.32 {q10}, [%[coefsN0]]! \n" // load 4 32-bits coefs
+ "vld1.32 {q11}, [%[coefsN1]]! \n" // load 4 32-bits coefs for interpolation
+
+ "vrev64.16 d4, d4 \n" // reverse 2 frames of the positive side
+ "vrev64.16 d5, d5 \n" // reverse 2 frames of the positive side
+
+ "vsub.s32 q9, q9, q8 \n" // interpolate (step1) 1st set of coefs
+ "vsub.s32 q11, q11, q10 \n" // interpolate (step1) 2nd set of coets
+ "vshll.s16 q12, d4, #15 \n" // extend samples to 31 bits
+ "vshll.s16 q13, d5, #15 \n" // extend samples to 31 bits
+
+ "vqrdmulh.s32 q9, q9, d2[0] \n" // interpolate (step2) 1st set of coefs
+ "vqrdmulh.s32 q11, q11, d2[1] \n" // interpolate (step3) 2nd set of coefs
+ "vshll.s16 q14, d6, #15 \n" // extend samples to 31 bits
+ "vshll.s16 q15, d7, #15 \n" // extend samples to 31 bits
+
+ "vadd.s32 q8, q8, q9 \n" // interpolate (step3) 1st set
+ "vadd.s32 q10, q10, q11 \n" // interpolate (step4) 2nd set
+ "subs %[count], %[count], #4 \n" // update loop counter
+
+ "vqrdmulh.s32 q12, q12, q8 \n" // multiply samples by interpolated coef
+ "vqrdmulh.s32 q13, q13, q8 \n" // multiply samples by interpolated coef
+ "vqrdmulh.s32 q14, q14, q10 \n" // multiply samples by interpolated coef
+ "vqrdmulh.s32 q15, q15, q10 \n" // multiply samples by interpolated coef
+ "sub %[sP], %[sP], #16 \n" // move pointer to next set of samples
+
+ "vadd.s32 q0, q0, q12 \n" // accumulate result
+ "vadd.s32 q4, q4, q13 \n" // accumulate result
+ "vadd.s32 q0, q0, q14 \n" // accumulate result
+ "vadd.s32 q4, q4, q15 \n" // accumulate result
+
+ "bne 1b \n" // loop
+
+ "vpadd.s32 d0, d0, d1 \n" // add all 4 partial sums
+ "vpadd.s32 d8, d8, d9 \n" // add all 4 partial sums
+ "vpadd.s32 d0, d0, d0 \n" // together
+ "vpadd.s32 d8, d8, d8 \n" // together
+
+ "vmov.s32 %[l], d0[0] \n" // save result in ARM register
+ "vmov.s32 %[r], d8[0] \n" // save result in ARM register
+
+ : [l] "=r" (l),
+ [r] "=r" (r),
+ [count] "+r" (count),
+ [coefsP0] "+r" (coefsP),
+ [coefsP1] "+r" (coefsP1),
+ [coefsN0] "+r" (coefsN),
+ [coefsN1] "+r" (coefsN1),
+ [sP] "+r" (sP),
+ [sN] "+r" (sN)
+ : [lerpP] "r" (lerpP<<16),
+ [lerpN] "r" (lerpN<<16),
+ [vRL] "r" (vRL)
+ : "cc", "memory",
+ "q0", "q1", "q2", "q3", "q4",
+ "q8", "q9", "q10", "q11",
+ "q12", "q13", "q14", "q15"
+ );
+ l = 2 * mulRL(1, l, vRL);
+ r = 2 * mulRL(0, r, vRL);
}
- l = 2 * mulRL(1, l, vRL);
- r = 2 * mulRL(0, r, vRL);
}
template<int CHANNELS>