summaryrefslogtreecommitdiffstats
path: root/libs/common_time/clock_recovery.cpp
blob: 3a7c70c3e17d7b58327e4e424d83444d6c5c81bf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
/*
 * Copyright (C) 2011 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * A service that exchanges time synchronization information between
 * a master that defines a timeline and clients that follow the timeline.
 */

#define __STDC_LIMIT_MACROS
#define LOG_TAG "common_time"
#include <utils/Log.h>
#include <stdint.h>

#include <common_time/local_clock.h>
#include <assert.h>

#include "clock_recovery.h"
#include "common_clock.h"
#ifdef TIME_SERVICE_DEBUG
#include "diag_thread.h"
#endif

// Define log macro so we can make LOGV into LOGE when we are exclusively
// debugging this code.
#ifdef TIME_SERVICE_DEBUG
#define LOG_TS ALOGE
#else
#define LOG_TS ALOGV
#endif

namespace android {

ClockRecoveryLoop::ClockRecoveryLoop(LocalClock* local_clock,
                                     CommonClock* common_clock) {
    assert(NULL != local_clock);
    assert(NULL != common_clock);

    local_clock_  = local_clock;
    common_clock_ = common_clock;

    local_clock_can_slew_ = local_clock_->initCheck() &&
                           (local_clock_->setLocalSlew(0) == OK);
    tgt_correction_ = 0;
    cur_correction_ = 0;

    // Precompute the max rate at which we are allowed to change the VCXO
    // control.
    uint64_t N = 0x10000ull * 1000ull;
    uint64_t D = local_clock_->getLocalFreq() * kMinFullRangeSlewChange_mSec;
    LinearTransform::reduce(&N, &D);
    while ((N > INT32_MAX) || (D > UINT32_MAX)) {
        N >>= 1;
        D >>= 1;
        LinearTransform::reduce(&N, &D);
    }
    time_to_cur_slew_.a_to_b_numer = static_cast<int32_t>(N);
    time_to_cur_slew_.a_to_b_denom = static_cast<uint32_t>(D);

    reset(true, true);

#ifdef TIME_SERVICE_DEBUG
    diag_thread_ = new DiagThread(common_clock_, local_clock_);
    if (diag_thread_ != NULL) {
        status_t res = diag_thread_->startWorkThread();
        if (res != OK)
            ALOGW("Failed to start A@H clock recovery diagnostic thread.");
    } else
        ALOGW("Failed to allocate diagnostic thread.");
#endif
}

ClockRecoveryLoop::~ClockRecoveryLoop() {
#ifdef TIME_SERVICE_DEBUG
    diag_thread_->stopWorkThread();
#endif
}

// Constants.
const float ClockRecoveryLoop::dT = 1.0;
const float ClockRecoveryLoop::Kc = 1.0f;
const float ClockRecoveryLoop::Ti = 15.0f;
const float ClockRecoveryLoop::Tf = 0.05;
const float ClockRecoveryLoop::bias_Fc = 0.01;
const float ClockRecoveryLoop::bias_RC = (dT / (2 * 3.14159f * bias_Fc));
const float ClockRecoveryLoop::bias_Alpha = (dT / (bias_RC + dT));
const int64_t ClockRecoveryLoop::panic_thresh_ = 50000;
const int64_t ClockRecoveryLoop::control_thresh_ = 10000;
const float ClockRecoveryLoop::COmin = -100.0f;
const float ClockRecoveryLoop::COmax = 100.0f;
const uint32_t ClockRecoveryLoop::kMinFullRangeSlewChange_mSec = 300;
const int ClockRecoveryLoop::kSlewChangeStepPeriod_mSec = 10;


void ClockRecoveryLoop::reset(bool position, bool frequency) {
    Mutex::Autolock lock(&lock_);
    reset_l(position, frequency);
}

uint32_t ClockRecoveryLoop::findMinRTTNdx(DisciplineDataPoint* data,
                                          uint32_t count) {
    uint32_t min_rtt = 0;
    for (uint32_t i = 1; i < count; ++i)
        if (data[min_rtt].rtt > data[i].rtt)
            min_rtt = i;

    return min_rtt;
}

bool ClockRecoveryLoop::pushDisciplineEvent(int64_t local_time,
                                            int64_t nominal_common_time,
                                            int64_t rtt) {
    Mutex::Autolock lock(&lock_);

    int64_t local_common_time = 0;
    common_clock_->localToCommon(local_time, &local_common_time);
    int64_t raw_delta = nominal_common_time - local_common_time;

#ifdef TIME_SERVICE_DEBUG
    ALOGE("local=%lld, common=%lld, delta=%lld, rtt=%lld\n",
         local_common_time, nominal_common_time,
         raw_delta, rtt);
#endif

    // If we have not defined a basis for common time, then we need to use these
    // initial points to do so.  In order to avoid significant initial error
    // from a particularly bad startup data point, we collect the first N data
    // points and choose the best of them before moving on.
    if (!common_clock_->isValid()) {
        if (startup_filter_wr_ < kStartupFilterSize) {
            DisciplineDataPoint& d =  startup_filter_data_[startup_filter_wr_];
            d.local_time = local_time;
            d.nominal_common_time = nominal_common_time;
            d.rtt = rtt;
            startup_filter_wr_++;
        }

        if (startup_filter_wr_ == kStartupFilterSize) {
            uint32_t min_rtt = findMinRTTNdx(startup_filter_data_,
                    kStartupFilterSize);

            common_clock_->setBasis(
                    startup_filter_data_[min_rtt].local_time,
                    startup_filter_data_[min_rtt].nominal_common_time);
        }

        return true;
    }

    int64_t observed_common;
    int64_t delta;
    float delta_f, dCO;
    int32_t tgt_correction;

    if (OK != common_clock_->localToCommon(local_time, &observed_common)) {
        // Since we just checked to make certain that this conversion was valid,
        // and no one else in the system should be messing with it, if this
        // conversion is suddenly invalid, it is a good reason to panic.
        ALOGE("Failed to convert local time to common time in %s:%d",
                __PRETTY_FUNCTION__, __LINE__);
        return false;
    }

    // Implement a filter which should match NTP filtering behavior when a
    // client is associated with only one peer of lower stratum.  Basically,
    // always use the best of the N last data points, where best is defined as
    // lowest round trip time.  NTP uses an N of 8; we use a value of 6.
    //
    // TODO(johngro) : experiment with other filter strategies.  The goal here
    // is to mitigate the effects of high RTT data points which typically have
    // large asymmetries in the TX/RX legs.  Downside of the existing NTP
    // approach (particularly because of the PID controller we are using to
    // produce the control signal from the filtered data) are that the rate at
    // which discipline events are actually acted upon becomes irregular and can
    // become drawn out (the time between actionable event can go way up).  If
    // the system receives a strong high quality data point, the proportional
    // component of the controller can produce a strong correction which is left
    // in place for too long causing overshoot.  In addition, the integral
    // component of the system currently is an approximation based on the
    // assumption of a more or less homogeneous sampling of the error.  Its
    // unclear what the effect of undermining this assumption would be right
    // now.

    // Two ideas which come to mind immediately would be to...
    // 1) Keep a history of more data points (32 or so) and ignore data points
    //    whose RTT is more than a certain number of standard deviations outside
    //    of the norm.
    // 2) Eliminate the PID controller portion of this system entirely.
    //    Instead, move to a system which uses a very wide filter (128 data
    //    points or more) with a sum-of-least-squares line fitting approach to
    //    tracking the long term drift.  This would take the place of the I
    //    component in the current PID controller.  Also use a much more narrow
    //    outlier-rejector filter (as described in #1) to drive a short term
    //    correction factor similar to the P component of the PID controller.
    assert(filter_wr_ < kFilterSize);
    filter_data_[filter_wr_].local_time           = local_time;
    filter_data_[filter_wr_].observed_common_time = observed_common;
    filter_data_[filter_wr_].nominal_common_time  = nominal_common_time;
    filter_data_[filter_wr_].rtt                  = rtt;
    filter_data_[filter_wr_].point_used           = false;
    uint32_t current_point = filter_wr_;
    filter_wr_ = (filter_wr_ + 1) % kFilterSize;
    if (!filter_wr_)
        filter_full_ = true;

    uint32_t scan_end = filter_full_ ? kFilterSize : filter_wr_;
    uint32_t min_rtt = findMinRTTNdx(filter_data_, scan_end);
    // We only use packets with low RTTs for control. If the packet RTT
    // is less than the panic threshold, we can probably eat the jitter with the
    // control loop. Otherwise, take the packet only if it better than all
    // of the packets we have in the history. That way we try to track
    // something, even if it is noisy.
    if (current_point == min_rtt || rtt < control_thresh_) {
        delta_f = delta = nominal_common_time - observed_common;

        last_error_est_valid_ = true;
        last_error_est_usec_ = delta;

        // Compute the error then clamp to the panic threshold.  If we ever
        // exceed this amt of error, its time to panic and reset the system.
        // Given that the error in the measurement of the error could be as
        // high as the RTT of the data point, we don't actually panic until
        // the implied error (delta) is greater than the absolute panic
        // threashold plus the RTT.  IOW - we don't panic until we are
        // absoluely sure that our best case sync is worse than the absolute
        // panic threshold.
        int64_t effective_panic_thresh = panic_thresh_ + rtt;
        if ((delta > effective_panic_thresh) ||
            (delta < -effective_panic_thresh)) {
            // PANIC!!!
            reset_l(false, true);
            return false;
        }

    } else {
        // We do not have a good packet to look at, but we also do not want to
        // free-run the clock at some crazy slew rate. So we guess the
        // trajectory of the clock based on the last controller output and the
        // estimated bias of our clock against the master.
        // The net effect of this is that CO == CObias after some extended
        // period of no feedback.
        delta_f = last_delta_f_ - dT*(CO - CObias);
        delta = delta_f;
    }

    // Velocity form PI control equation.
    dCO = Kc * (1.0f + dT/Ti) * delta_f - Kc * last_delta_f_;
    CO += dCO * Tf; // Filter CO by applying gain <1 here.

    // Save error terms for later.
    last_delta_f_ = delta_f;

    // Clamp CO to +/- 100ppm.
    if (CO < COmin)
        CO = COmin;
    else if (CO > COmax)
        CO = COmax;

    // Update the controller bias.
    CObias = bias_Alpha * CO + (1.0f - bias_Alpha) * lastCObias;
    lastCObias = CObias;

    // Convert PPM to 16-bit int range. Add some guard band (-0.01) so we
    // don't get fp weirdness.
    tgt_correction = CO * 327.66;

    // If there was a change in the amt of correction to use, update the
    // system.
    setTargetCorrection_l(tgt_correction);

    LOG_TS("clock_loop %lld %f %f %f %d\n", raw_delta, delta_f, CO, CObias, tgt_correction);

#ifdef TIME_SERVICE_DEBUG
    diag_thread_->pushDisciplineEvent(
            local_time,
            observed_common,
            nominal_common_time,
            tgt_correction,
            rtt);
#endif

    return true;
}

int32_t ClockRecoveryLoop::getLastErrorEstimate() {
    Mutex::Autolock lock(&lock_);

    if (last_error_est_valid_)
        return last_error_est_usec_;
    else
        return ICommonClock::kErrorEstimateUnknown;
}

void ClockRecoveryLoop::reset_l(bool position, bool frequency) {
    assert(NULL != common_clock_);

    if (position) {
        common_clock_->resetBasis();
        startup_filter_wr_ = 0;
    }

    if (frequency) {
        last_error_est_valid_ = false;
        last_error_est_usec_ = 0;
        last_delta_f_ = 0.0;
        CO = 0.0f;
        lastCObias = CObias = 0.0f;
        setTargetCorrection_l(0);
        applySlew_l();
    }

    filter_wr_   = 0;
    filter_full_ = false;
}

void ClockRecoveryLoop::setTargetCorrection_l(int32_t tgt) {
    // When we make a change to the slew rate, we need to be careful to not
    // change it too quickly as it can anger some HDMI sinks out there, notably
    // some Sony panels from the 2010-2011 timeframe.  From experimenting with
    // some of these sinks, it seems like swinging from one end of the range to
    // another in less that 190mSec or so can start to cause trouble.  Adding in
    // a hefty margin, we limit the system to a full range sweep in no less than
    // 300mSec.
    if (tgt_correction_ != tgt) {
        int64_t now = local_clock_->getLocalTime();
        status_t res;

        tgt_correction_ = tgt;

        // Set up the transformation to figure out what the slew should be at
        // any given point in time in the future.
        time_to_cur_slew_.a_zero = now;
        time_to_cur_slew_.b_zero = cur_correction_;

        // Make sure the sign of the slope is headed in the proper direction.
        bool needs_increase = (cur_correction_ < tgt_correction_);
        bool is_increasing  = (time_to_cur_slew_.a_to_b_numer > 0);
        if (( needs_increase && !is_increasing) ||
            (!needs_increase &&  is_increasing)) {
            time_to_cur_slew_.a_to_b_numer = -time_to_cur_slew_.a_to_b_numer;
        }

        // Finally, figure out when the change will be finished and start the
        // slew operation.
        time_to_cur_slew_.doReverseTransform(tgt_correction_,
                                             &slew_change_end_time_);

        applySlew_l();
    }
}

bool ClockRecoveryLoop::applySlew_l() {
    bool ret = true;

    // If cur == tgt, there is no ongoing sleq rate change and we are already
    // finished.
    if (cur_correction_ == tgt_correction_)
        goto bailout;

    if (local_clock_can_slew_) {
        int64_t now = local_clock_->getLocalTime();
        int64_t tmp;

        if (now >= slew_change_end_time_) {
            cur_correction_ = tgt_correction_;
            next_slew_change_timeout_.setTimeout(-1);
        } else {
            time_to_cur_slew_.doForwardTransform(now, &tmp);

            if (tmp > INT16_MAX)
                cur_correction_ = INT16_MAX;
            else if (tmp < INT16_MIN)
                cur_correction_ = INT16_MIN;
            else
                cur_correction_ = static_cast<int16_t>(tmp);

            next_slew_change_timeout_.setTimeout(kSlewChangeStepPeriod_mSec);
            ret = false;
        }

        local_clock_->setLocalSlew(cur_correction_);
    } else {
        // Since we are not actually changing the rate of a HW clock, we don't
        // need to worry to much about changing the slew rate so fast that we
        // anger any downstream HDMI devices.
        cur_correction_ = tgt_correction_;
        next_slew_change_timeout_.setTimeout(-1);

        // The SW clock recovery implemented by the common clock class expects
        // values expressed in PPM. CO is in ppm.
        common_clock_->setSlew(local_clock_->getLocalTime(), CO);
    }

bailout:
    return ret;
}

int ClockRecoveryLoop::applyRateLimitedSlew() {
    Mutex::Autolock lock(&lock_);

    int ret = next_slew_change_timeout_.msecTillTimeout();
    if (!ret) {
        if (applySlew_l())
            next_slew_change_timeout_.setTimeout(-1);
        ret = next_slew_change_timeout_.msecTillTimeout();
    }

    return ret;
}

}  // namespace android