services/audioflinger/AudioResamplerFirProcess.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333

/*
 * Copyright (C) 2013 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_H
#define ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_H

namespace android {

// depends on AudioResamplerFirOps.h

/* variant for input type TI = int16_t input samples */
template<typename TC>
static inline
void mac(int32_t& l, int32_t& r, TC coef, const int16_t* samples)
{
    uint32_t rl = *reinterpret_cast<const uint32_t*>(samples);
    l = mulAddRL(1, rl, coef, l);
    r = mulAddRL(0, rl, coef, r);
}

template<typename TC>
static inline
void mac(int32_t& l, TC coef, const int16_t* samples)
{
    l = mulAdd(samples[0], coef, l);
}

/* variant for input type TI = float input samples */
template<typename TC>
static inline
void mac(float& l, float& r, TC coef,  const float* samples)
{
    l += *samples++ * coef;
    r += *samples++ * coef;
}

template<typename TC>
static inline
void mac(float& l, TC coef,  const float* samples)
{
    l += *samples++ * coef;
}

/* variant for output type TO = int32_t output samples */
static inline
int32_t volumeAdjust(int32_t value, int32_t volume)
{
    return 2 * mulRL(0, value, volume);  // Note: only use top 16b
}

/* variant for output type TO = float output samples */
static inline
float volumeAdjust(float value, float volume)
{
    return value * volume;
}

/*
 * Calculates a single output frame (two samples).
 *
 * This function computes both the positive half FIR dot product and
 * the negative half FIR dot product, accumulates, and then applies the volume.
 *
 * This is a locked phase filter (it does not compute the interpolation).
 *
 * Use fir() to compute the proper coefficient pointers for a polyphase
 * filter bank.
 */

template <int CHANNELS, int STRIDE, typename TC, typename TI, typename TO>
static inline
void ProcessL(TO* const out,
        int count,
        const TC* coefsP,
        const TC* coefsN,
        const TI* sP,
        const TI* sN,
        const TO* const volumeLR)
{
    COMPILE_TIME_ASSERT_FUNCTION_SCOPE(CHANNELS >= 1 && CHANNELS <= 2)
    if (CHANNELS == 2) {
        TO l = 0;
        TO r = 0;
        do {
            mac(l, r, *coefsP++, sP);
            sP -= CHANNELS;
            mac(l, r, *coefsN++, sN);
            sN += CHANNELS;
        } while (--count > 0);
        out[0] += volumeAdjust(l, volumeLR[0]);
        out[1] += volumeAdjust(r, volumeLR[1]);
    } else { /* CHANNELS == 1 */
        TO l = 0;
        do {
            mac(l, *coefsP++, sP);
            sP -= CHANNELS;
            mac(l, *coefsN++, sN);
            sN += CHANNELS;
        } while (--count > 0);
        out[0] += volumeAdjust(l, volumeLR[0]);
        out[1] += volumeAdjust(l, volumeLR[1]);
    }
}

/*
 * Calculates a single output frame (two samples) interpolating phase.
 *
 * This function computes both the positive half FIR dot product and
 * the negative half FIR dot product, accumulates, and then applies the volume.
 *
 * This is an interpolated phase filter.
 *
 * Use fir() to compute the proper coefficient pointers for a polyphase
 * filter bank.
 */

template<typename TC, typename T>
void adjustLerp(T& lerpP __unused)
{
}

template<int32_t, typename T>
void adjustLerp(T& lerpP)
{
    lerpP >>= 16;   // lerpP is 32bit for NEON int32_t, but always 16 bit for non-NEON path
}

template<typename TC, typename TINTERP>
static inline
TC interpolate(TC coef_0, TC coef_1, TINTERP lerp)
{
    return lerp * (coef_1 - coef_0) + coef_0;
}

template<int16_t, uint32_t>
static inline
int16_t interpolate(int16_t coef_0, int16_t coef_1, uint32_t lerp)
{
    return (static_cast<int16_t>(lerp) * ((coef_1-coef_0)<<1)>>16) + coef_0;
}

template<int32_t, uint32_t>
static inline
int32_t interpolate(int32_t coef_0, int32_t coef_1, uint32_t lerp)
{
    return mulAdd(static_cast<int16_t>(lerp), (coef_1-coef_0)<<1, coef_0);
}

template <int CHANNELS, int STRIDE, typename TC, typename TI, typename TO, typename TINTERP>
static inline
void Process(TO* const out,
        int count,
        const TC* coefsP,
        const TC* coefsN,
        const TC* coefsP1 __unused,
        const TC* coefsN1 __unused,
        const TI* sP,
        const TI* sN,
        TINTERP lerpP,
        const TO* const volumeLR)
{
    COMPILE_TIME_ASSERT_FUNCTION_SCOPE(CHANNELS >= 1 && CHANNELS <= 2)
    adjustLerp<TC, TINTERP>(lerpP); // coefficient type adjustment for interpolation

    if (CHANNELS == 2) {
        TO l = 0;
        TO r = 0;
        for (size_t i = 0; i < count; ++i) {
            mac(l, r, interpolate(coefsP[0], coefsP[count], lerpP), sP);
            coefsP++;
            sP -= CHANNELS;
            mac(l, r, interpolate(coefsN[count], coefsN[0], lerpP), sN);
            coefsN++;
            sN += CHANNELS;
        }
        out[0] += volumeAdjust(l, volumeLR[0]);
        out[1] += volumeAdjust(r, volumeLR[1]);
    } else { /* CHANNELS == 1 */
        TO l = 0;
        for (size_t i = 0; i < count; ++i) {
            mac(l, interpolate(coefsP[0], coefsP[count], lerpP), sP);
            coefsP++;
            sP -= CHANNELS;
            mac(l, interpolate(coefsN[count], coefsN[0], lerpP), sN);
            coefsN++;
            sN += CHANNELS;
        }
        out[0] += volumeAdjust(l, volumeLR[0]);
        out[1] += volumeAdjust(l, volumeLR[1]);
    }
}

/*
 * Calculates a single output frame (two samples) from input sample pointer.
 *
 * This sets up the params for the accelerated Process() and ProcessL()
 * functions to do the appropriate dot products.
 *
 * @param out should point to the output buffer with space for at least one output frame.
 *
 * @param phase is the fractional distance between input frames for interpolation:
 * phase >= 0  && phase < phaseWrapLimit.  It can be thought of as a rational fraction
 * of phase/phaseWrapLimit.
 *
 * @param phaseWrapLimit is #polyphases<<coefShift, where #polyphases is the number of polyphases
 * in the polyphase filter. Likewise, #polyphases can be obtained as (phaseWrapLimit>>coefShift).
 *
 * @param coefShift gives the bit alignment of the polyphase index in the phase parameter.
 *
 * @param halfNumCoefs is the half the number of coefficients per polyphase filter. Since the
 * overall filterbank is odd-length symmetric, only halfNumCoefs need be stored.
 *
 * @param coefs is the polyphase filter bank, starting at from polyphase index 0, and ranging to
 * and including the #polyphases.  Each polyphase of the filter has half-length halfNumCoefs
 * (due to symmetry).  The total size of the filter bank in coefficients is
 * (#polyphases+1)*halfNumCoefs.
 *
 * The filter bank coefs should be aligned to a minimum of 16 bytes (preferrably to cache line).
 *
 * The coefs should be attenuated (to compensate for passband ripple)
 * if storing back into the native format.
 *
 * @param samples are unaligned input samples.  The position is in the "middle" of the
 * sample array with respect to the FIR filter:
 * the negative half of the filter is dot product from samples+1 to samples+halfNumCoefs;
 * the positive half of the filter is dot product from samples to samples-halfNumCoefs+1.
 *
 * @param volumeLR is a pointer to an array of two 32 bit volume values, one per stereo channel,
 * expressed as a S32 integer.  A negative value inverts the channel 180 degrees.
 * The pointer volumeLR should be aligned to a minimum of 8 bytes.
 * A typical value for volume is 0x1000 to align to a unity gain output of 20.12.
 *
 * In between calls to filterCoefficient, the phase is incremented by phaseIncrement, where
 * phaseIncrement is calculated as inputSampling * phaseWrapLimit / outputSampling.
 *
 * The filter polyphase index is given by indexP = phase >> coefShift. Due to
 * odd length symmetric filter, the polyphase index of the negative half depends on
 * whether interpolation is used.
 *
 * The fractional siting between the polyphase indices is given by the bits below coefShift:
 *
 * lerpP = phase << 32 - coefShift >> 1;  // for 32 bit unsigned phase multiply
 * lerpP = phase << 32 - coefShift >> 17; // for 16 bit unsigned phase multiply
 *
 * For integer types, this is expressed as:
 *
 * lerpP = phase << sizeof(phase)*8 - coefShift
 *              >> (sizeof(phase)-sizeof(*coefs))*8 + 1;
 *
 * For floating point, lerpP is the fractional phase scaled to [0.0, 1.0):
 *
 * lerpP = (phase << 32 - coefShift) / (1 << 32); // floating point equivalent
 */

template<int CHANNELS, bool LOCKED, int STRIDE, typename TC, typename TI, typename TO>
static inline
void fir(TO* const out,
        const uint32_t phase, const uint32_t phaseWrapLimit,
        const int coefShift, const int halfNumCoefs, const TC* const coefs,
        const TI* const samples, const TO* const volumeLR)
{
    // NOTE: be very careful when modifying the code here. register
    // pressure is very high and a small change might cause the compiler
    // to generate far less efficient code.
    // Always sanity check the result with objdump or test-resample.

    if (LOCKED) {
        // locked polyphase (no interpolation)
        // Compute the polyphase filter index on the positive and negative side.
        uint32_t indexP = phase >> coefShift;
        uint32_t indexN = (phaseWrapLimit - phase) >> coefShift;
        const TC* coefsP = coefs + indexP*halfNumCoefs;
        const TC* coefsN = coefs + indexN*halfNumCoefs;
        const TI* sP = samples;
        const TI* sN = samples + CHANNELS;

        // dot product filter.
        ProcessL<CHANNELS, STRIDE>(out,
                halfNumCoefs, coefsP, coefsN, sP, sN, volumeLR);
    } else {
        // interpolated polyphase
        // Compute the polyphase filter index on the positive and negative side.
        uint32_t indexP = phase >> coefShift;
        uint32_t indexN = (phaseWrapLimit - phase - 1) >> coefShift; // one's complement.
        const TC* coefsP = coefs + indexP*halfNumCoefs;
        const TC* coefsN = coefs + indexN*halfNumCoefs;
        const TC* coefsP1 = coefsP + halfNumCoefs;
        const TC* coefsN1 = coefsN + halfNumCoefs;
        const TI* sP = samples;
        const TI* sN = samples + CHANNELS;

        // Interpolation fraction lerpP derived by shifting all the way up and down
        // to clear the appropriate bits and align to the appropriate level
        // for the integer multiply.  The constants should resolve in compile time.
        //
        // The interpolated filter coefficient is derived as follows for the pos/neg half:
        //
        // interpolated[P] = index[P]*lerpP + index[P+1]*(1-lerpP)
        // interpolated[N] = index[N+1]*lerpP + index[N]*(1-lerpP)

        // on-the-fly interpolated dot product filter
        if (is_same<TC, float>::value || is_same<TC, double>::value) {
            static const TC scale = 1. / (65536. * 65536.); // scale phase bits to [0.0, 1.0)
            TC lerpP = TC(phase << (sizeof(phase)*8 - coefShift)) * scale;

            Process<CHANNELS, STRIDE>(out,
                    halfNumCoefs, coefsP, coefsN, coefsP1, coefsN1, sP, sN, lerpP, volumeLR);
        } else {
            uint32_t lerpP = phase << (sizeof(phase)*8 - coefShift)
                    >> ((sizeof(phase)-sizeof(*coefs))*8 + 1);

            Process<CHANNELS, STRIDE>(out,
                    halfNumCoefs, coefsP, coefsN, coefsP1, coefsN1, sP, sN, lerpP, volumeLR);
        }
    }
}

}; // namespace android

#endif /*ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_H*/