/* * Copyright (C) 2011 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * A service that exchanges time synchronization information between * a master that defines a timeline and clients that follow the timeline. */ #define __STDC_LIMIT_MACROS #define LOG_TAG "common_time" #include #include #include #include #include "clock_recovery.h" #include "common_clock.h" #ifdef TIME_SERVICE_DEBUG #include "diag_thread.h" #endif // Define log macro so we can make LOGV into LOGE when we are exclusively // debugging this code. #ifdef TIME_SERVICE_DEBUG #define LOG_TS ALOGE #else #define LOG_TS ALOGV #endif namespace android { ClockRecoveryLoop::ClockRecoveryLoop(LocalClock* local_clock, CommonClock* common_clock) { assert(NULL != local_clock); assert(NULL != common_clock); local_clock_ = local_clock; common_clock_ = common_clock; local_clock_can_slew_ = local_clock_->initCheck() && (local_clock_->setLocalSlew(0) == OK); reset(true, true); #ifdef TIME_SERVICE_DEBUG diag_thread_ = new DiagThread(common_clock_, local_clock_); if (diag_thread_ != NULL) { status_t res = diag_thread_->startWorkThread(); if (res != OK) ALOGW("Failed to start A@H clock recovery diagnostic thread."); } else ALOGW("Failed to allocate diagnostic thread."); #endif } ClockRecoveryLoop::~ClockRecoveryLoop() { #ifdef TIME_SERVICE_DEBUG diag_thread_->stopWorkThread(); #endif } // Constants. const float ClockRecoveryLoop::dT = 1.0; const float ClockRecoveryLoop::Kc = 1.0f; const float ClockRecoveryLoop::Ti = 15.0f; const float ClockRecoveryLoop::Tf = 0.05; const float ClockRecoveryLoop::bias_Fc = 0.01; const float ClockRecoveryLoop::bias_RC = (dT / (2 * 3.14159f * bias_Fc)); const float ClockRecoveryLoop::bias_Alpha = (dT / (bias_RC + dT)); const int64_t ClockRecoveryLoop::panic_thresh_ = 50000; const int64_t ClockRecoveryLoop::control_thresh_ = 10000; const float ClockRecoveryLoop::COmin = -100.0f; const float ClockRecoveryLoop::COmax = 100.0f; void ClockRecoveryLoop::reset(bool position, bool frequency) { Mutex::Autolock lock(&lock_); reset_l(position, frequency); } uint32_t ClockRecoveryLoop::findMinRTTNdx(DisciplineDataPoint* data, uint32_t count) { uint32_t min_rtt = 0; for (uint32_t i = 1; i < count; ++i) if (data[min_rtt].rtt > data[i].rtt) min_rtt = i; return min_rtt; } bool ClockRecoveryLoop::pushDisciplineEvent(int64_t local_time, int64_t nominal_common_time, int64_t rtt) { Mutex::Autolock lock(&lock_); int64_t local_common_time = 0; common_clock_->localToCommon(local_time, &local_common_time); int64_t raw_delta = nominal_common_time - local_common_time; #ifdef TIME_SERVICE_DEBUG ALOGE("local=%lld, common=%lld, delta=%lld, rtt=%lld\n", local_common_time, nominal_common_time, raw_delta, rtt); #endif // If we have not defined a basis for common time, then we need to use these // initial points to do so. In order to avoid significant initial error // from a particularly bad startup data point, we collect the first N data // points and choose the best of them before moving on. if (!common_clock_->isValid()) { if (startup_filter_wr_ < kStartupFilterSize) { DisciplineDataPoint& d = startup_filter_data_[startup_filter_wr_]; d.local_time = local_time; d.nominal_common_time = nominal_common_time; d.rtt = rtt; startup_filter_wr_++; } if (startup_filter_wr_ == kStartupFilterSize) { uint32_t min_rtt = findMinRTTNdx(startup_filter_data_, kStartupFilterSize); common_clock_->setBasis( startup_filter_data_[min_rtt].local_time, startup_filter_data_[min_rtt].nominal_common_time); } return true; } int64_t observed_common; int64_t delta; float delta_f, dCO; int32_t correction_cur; if (OK != common_clock_->localToCommon(local_time, &observed_common)) { // Since we just checked to make certain that this conversion was valid, // and no one else in the system should be messing with it, if this // conversion is suddenly invalid, it is a good reason to panic. ALOGE("Failed to convert local time to common time in %s:%d", __PRETTY_FUNCTION__, __LINE__); return false; } // Implement a filter which should match NTP filtering behavior when a // client is associated with only one peer of lower stratum. Basically, // always use the best of the N last data points, where best is defined as // lowest round trip time. NTP uses an N of 8; we use a value of 6. // // TODO(johngro) : experiment with other filter strategies. The goal here // is to mitigate the effects of high RTT data points which typically have // large asymmetries in the TX/RX legs. Downside of the existing NTP // approach (particularly because of the PID controller we are using to // produce the control signal from the filtered data) are that the rate at // which discipline events are actually acted upon becomes irregular and can // become drawn out (the time between actionable event can go way up). If // the system receives a strong high quality data point, the proportional // component of the controller can produce a strong correction which is left // in place for too long causing overshoot. In addition, the integral // component of the system currently is an approximation based on the // assumption of a more or less homogeneous sampling of the error. Its // unclear what the effect of undermining this assumption would be right // now. // Two ideas which come to mind immediately would be to... // 1) Keep a history of more data points (32 or so) and ignore data points // whose RTT is more than a certain number of standard deviations outside // of the norm. // 2) Eliminate the PID controller portion of this system entirely. // Instead, move to a system which uses a very wide filter (128 data // points or more) with a sum-of-least-squares line fitting approach to // tracking the long term drift. This would take the place of the I // component in the current PID controller. Also use a much more narrow // outlier-rejector filter (as described in #1) to drive a short term // correction factor similar to the P component of the PID controller. assert(filter_wr_ < kFilterSize); filter_data_[filter_wr_].local_time = local_time; filter_data_[filter_wr_].observed_common_time = observed_common; filter_data_[filter_wr_].nominal_common_time = nominal_common_time; filter_data_[filter_wr_].rtt = rtt; filter_data_[filter_wr_].point_used = false; uint32_t current_point = filter_wr_; filter_wr_ = (filter_wr_ + 1) % kFilterSize; if (!filter_wr_) filter_full_ = true; uint32_t scan_end = filter_full_ ? kFilterSize : filter_wr_; uint32_t min_rtt = findMinRTTNdx(filter_data_, scan_end); // We only use packets with low RTTs for control. If the packet RTT // is less than the panic threshold, we can probably eat the jitter with the // control loop. Otherwise, take the packet only if it better than all // of the packets we have in the history. That way we try to track // something, even if it is noisy. if (current_point == min_rtt || rtt < control_thresh_) { delta_f = delta = nominal_common_time - observed_common; // Compute the error then clamp to the panic threshold. If we ever // exceed this amt of error, its time to panic and reset the system. // Given that the error in the measurement of the error could be as // high as the RTT of the data point, we don't actually panic until // the implied error (delta) is greater than the absolute panic // threashold plus the RTT. IOW - we don't panic until we are // absoluely sure that our best case sync is worse than the absolute // panic threshold. int64_t effective_panic_thresh = panic_thresh_ + rtt; if ((delta > effective_panic_thresh) || (delta < -effective_panic_thresh)) { // PANIC!!! reset_l(false, true); return false; } } else { // We do not have a good packet to look at, but we also do not want to // free-run the clock at some crazy slew rate. So we guess the // trajectory of the clock based on the last controller output and the // estimated bias of our clock against the master. // The net effect of this is that CO == CObias after some extended // period of no feedback. delta_f = last_delta_f_ - dT*(CO - CObias); delta = delta_f; } // Velocity form PI control equation. dCO = Kc * (1.0f + dT/Ti) * delta_f - Kc * last_delta_f_; CO += dCO * Tf; // Filter CO by applying gain <1 here. // Save error terms for later. last_delta_f_ = delta_f; last_delta_ = delta; // Clamp CO to +/- 100ppm. if (CO < COmin) CO = COmin; else if (CO > COmax) CO = COmax; // Update the controller bias. CObias = bias_Alpha * CO + (1.0f - bias_Alpha) * lastCObias; lastCObias = CObias; // Convert PPM to 16-bit int range. Add some guard band (-0.01) so we // don't get fp weirdness. correction_cur = CO * 327.66; // If there was a change in the amt of correction to use, update the // system. if (correction_cur_ != correction_cur) { correction_cur_ = correction_cur; applySlew(); } LOG_TS("clock_loop %lld %f %f %f %d\n", raw_delta, delta_f, CO, CObias, correction_cur); #ifdef TIME_SERVICE_DEBUG diag_thread_->pushDisciplineEvent( local_time, observed_common, nominal_common_time, correction_cur, rtt); #endif return true; } int32_t ClockRecoveryLoop::getLastErrorEstimate() { Mutex::Autolock lock(&lock_); if (last_delta_valid_) return last_delta_; else return ICommonClock::kErrorEstimateUnknown; } void ClockRecoveryLoop::reset_l(bool position, bool frequency) { assert(NULL != common_clock_); if (position) { common_clock_->resetBasis(); startup_filter_wr_ = 0; } if (frequency) { last_delta_valid_ = false; last_delta_ = 0; last_delta_f_ = 0.0; correction_cur_ = 0x0; CO = 0.0f; lastCObias = CObias = 0.0f; applySlew(); } filter_wr_ = 0; filter_full_ = false; } void ClockRecoveryLoop::applySlew() { if (local_clock_can_slew_) { local_clock_->setLocalSlew(correction_cur_); } else { // The SW clock recovery implemented by the common clock class expects // values expressed in PPM. CO is in ppm. common_clock_->setSlew(local_clock_->getLocalTime(), CO); } } } // namespace android