include/media/AudioResamplerPublic.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177

/*
 * Copyright (C) 2014 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef ANDROID_AUDIO_RESAMPLER_PUBLIC_H
#define ANDROID_AUDIO_RESAMPLER_PUBLIC_H

#include <stdint.h>
#include <math.h>

namespace android {

// AUDIO_RESAMPLER_DOWN_RATIO_MAX is the maximum ratio between the original
// audio sample rate and the target rate when downsampling,
// as permitted in the audio framework, e.g. AudioTrack and AudioFlinger.
// In practice, it is not recommended to downsample more than 6:1
// for best audio quality, even though the audio framework permits a larger
// downsampling ratio.
// TODO: replace with an API
#define AUDIO_RESAMPLER_DOWN_RATIO_MAX 256

// AUDIO_RESAMPLER_UP_RATIO_MAX is the maximum suggested ratio between the original
// audio sample rate and the target rate when upsampling.  It is loosely enforced by
// the system. One issue with large upsampling ratios is the approximation by
// an int32_t of the phase increments, making the resulting sample rate inexact.
#define AUDIO_RESAMPLER_UP_RATIO_MAX 65536

// AUDIO_TIMESTRETCH_SPEED_MIN and AUDIO_TIMESTRETCH_SPEED_MAX define the min and max time stretch
// speeds supported by the system. These are enforced by the system and values outside this range
// will result in a runtime error.
// Depending on the AudioPlaybackRate::mStretchMode, the effective limits might be narrower than
// the ones specified here
// AUDIO_TIMESTRETCH_SPEED_MIN_DELTA is the minimum absolute speed difference that might trigger a
// parameter update
#define AUDIO_TIMESTRETCH_SPEED_MIN    0.01f
#define AUDIO_TIMESTRETCH_SPEED_MAX    20.0f
#define AUDIO_TIMESTRETCH_SPEED_NORMAL 1.0f
#define AUDIO_TIMESTRETCH_SPEED_MIN_DELTA 0.0001f

// AUDIO_TIMESTRETCH_PITCH_MIN and AUDIO_TIMESTRETCH_PITCH_MAX define the min and max time stretch
// pitch shifting supported by the system. These are not enforced by the system and values
// outside this range might result in a pitch different than the one requested.
// Depending on the AudioPlaybackRate::mStretchMode, the effective limits might be narrower than
// the ones specified here.
// AUDIO_TIMESTRETCH_PITCH_MIN_DELTA is the minimum absolute pitch difference that might trigger a
// parameter update
#define AUDIO_TIMESTRETCH_PITCH_MIN    0.25f
#define AUDIO_TIMESTRETCH_PITCH_MAX    4.0f
#define AUDIO_TIMESTRETCH_PITCH_NORMAL 1.0f
#define AUDIO_TIMESTRETCH_PITCH_MIN_DELTA 0.0001f


//Determines the current algorithm used for stretching
enum AudioTimestretchStretchMode : int32_t {
    AUDIO_TIMESTRETCH_STRETCH_DEFAULT            = 0,
    AUDIO_TIMESTRETCH_STRETCH_SPEECH             = 1,
    //TODO: add more stretch modes/algorithms
};

//Limits for AUDIO_TIMESTRETCH_STRETCH_SPEECH mode
#define TIMESTRETCH_SONIC_SPEED_MIN 0.1f
#define TIMESTRETCH_SONIC_SPEED_MAX 6.0f

//Determines behavior of Timestretch if current algorithm can't perform
//with current parameters.
// FALLBACK_CUT_REPEAT: (internal only) for speed <1.0 will truncate frames
//    for speed > 1.0 will repeat frames
// FALLBACK_MUTE: will set all processed frames to zero
// FALLBACK_FAIL:  will stop program execution and log a fatal error
enum AudioTimestretchFallbackMode : int32_t {
    AUDIO_TIMESTRETCH_FALLBACK_CUT_REPEAT     = -1,
    AUDIO_TIMESTRETCH_FALLBACK_DEFAULT        = 0,
    AUDIO_TIMESTRETCH_FALLBACK_MUTE           = 1,
    AUDIO_TIMESTRETCH_FALLBACK_FAIL           = 2,
};

struct AudioPlaybackRate {
    float mSpeed;
    float mPitch;
    enum AudioTimestretchStretchMode  mStretchMode;
    enum AudioTimestretchFallbackMode mFallbackMode;
};

static const AudioPlaybackRate AUDIO_PLAYBACK_RATE_DEFAULT = {
        AUDIO_TIMESTRETCH_SPEED_NORMAL,
        AUDIO_TIMESTRETCH_PITCH_NORMAL,
        AUDIO_TIMESTRETCH_STRETCH_DEFAULT,
        AUDIO_TIMESTRETCH_FALLBACK_DEFAULT
};

static inline bool isAudioPlaybackRateEqual(const AudioPlaybackRate &pr1,
        const AudioPlaybackRate &pr2) {
    return fabs(pr1.mSpeed - pr2.mSpeed) < AUDIO_TIMESTRETCH_SPEED_MIN_DELTA &&
           fabs(pr1.mPitch - pr2.mPitch) < AUDIO_TIMESTRETCH_PITCH_MIN_DELTA &&
           pr2.mStretchMode == pr2.mStretchMode &&
           pr2.mFallbackMode == pr2.mFallbackMode;
}

static inline bool isAudioPlaybackRateValid(const AudioPlaybackRate &playbackRate) {
    if (playbackRate.mFallbackMode == AUDIO_TIMESTRETCH_FALLBACK_FAIL &&
            (playbackRate.mStretchMode == AUDIO_TIMESTRETCH_STRETCH_SPEECH ||
                    playbackRate.mStretchMode == AUDIO_TIMESTRETCH_STRETCH_DEFAULT)) {
        //test sonic specific constraints
        return playbackRate.mSpeed >= TIMESTRETCH_SONIC_SPEED_MIN &&
                playbackRate.mSpeed <= TIMESTRETCH_SONIC_SPEED_MAX &&
                playbackRate.mPitch >= AUDIO_TIMESTRETCH_PITCH_MIN &&
                playbackRate.mPitch <= AUDIO_TIMESTRETCH_PITCH_MAX;
    } else {
        return playbackRate.mSpeed >= AUDIO_TIMESTRETCH_SPEED_MIN &&
                playbackRate.mSpeed <= AUDIO_TIMESTRETCH_SPEED_MAX &&
                playbackRate.mPitch >= AUDIO_TIMESTRETCH_PITCH_MIN &&
                playbackRate.mPitch <= AUDIO_TIMESTRETCH_PITCH_MAX;
    }
}

// TODO: Consider putting these inlines into a class scope

// Returns the source frames needed to resample to destination frames.  This is not a precise
// value and depends on the resampler (and possibly how it handles rounding internally).
// Nevertheless, this should be an upper bound on the requirements of the resampler.
// If srcSampleRate and dstSampleRate are equal, then it returns destination frames, which
// may not be true if the resampler is asynchronous.
static inline size_t sourceFramesNeeded(
        uint32_t srcSampleRate, size_t dstFramesRequired, uint32_t dstSampleRate) {
    // +1 for rounding - always do this even if matched ratio (resampler may use phases not ratio)
    // +1 for additional sample needed for interpolation
    return srcSampleRate == dstSampleRate ? dstFramesRequired :
            size_t((uint64_t)dstFramesRequired * srcSampleRate / dstSampleRate + 1 + 1);
}

// An upper bound for the number of destination frames possible from srcFrames
// after sample rate conversion.  This may be used for buffer sizing.
static inline size_t destinationFramesPossible(size_t srcFrames, uint32_t srcSampleRate,
        uint32_t dstSampleRate) {
    if (srcSampleRate == dstSampleRate) {
        return srcFrames;
    }
    uint64_t dstFrames = (uint64_t)srcFrames * dstSampleRate / srcSampleRate;
    return dstFrames > 2 ? dstFrames - 2 : 0;
}

static inline size_t sourceFramesNeededWithTimestretch(
        uint32_t srcSampleRate, size_t dstFramesRequired, uint32_t dstSampleRate,
        float speed) {
    // required is the number of input frames the resampler needs
    size_t required = sourceFramesNeeded(srcSampleRate, dstFramesRequired, dstSampleRate);
    // to deliver this, the time stretcher requires:
    return required * (double)speed + 1 + 1; // accounting for rounding dependencies
}

// Identifies sample rates that we associate with music
// and thus eligible for better resampling and fast capture.
// This is somewhat less than 44100 to allow for pitch correction
// involving resampling as well as asynchronous resampling.
#define AUDIO_PROCESSING_MUSIC_RATE 40000

static inline bool isMusicRate(uint32_t sampleRate) {
    return sampleRate >= AUDIO_PROCESSING_MUSIC_RATE;
}

} // namespace android

// ---------------------------------------------------------------------------

#endif // ANDROID_AUDIO_RESAMPLER_PUBLIC_H