WebRtc Video Receiver(八)-基于Kalma

2021-06-13  本文已影响0人  JeffreyLau

1)前言

2)Arrival-time model模型介绍

image-20210610215218805.png
d(i) = t(i) - t(i-1) - (T(i) - T(i-1))                                  (2.1.1)
     = L(i)/C(i) - L(i-1)/C(i-1) + w(i)
        L(i)-L(i-1)
     = -------------- + w(i)
           C(i)
     = dL(i)/C(i) + w(i)
w(i) = m(i) + v(i)
d(i) = dL(i)/C(i) + m(i) + v(i)

3)Kalman filter 建模及理论推导

3.1 卡尔曼滤波-建立状态空间方程

 theta(i) = A * theta(i-1) + u(i-1)                                     P(u) ~ (0,Q)         
          = theta(i-1) + u(i-1)                                          (3.1.1)
 Q(i) = E{u_bar(i) * u_bar(i)^T}
 diag(Q(i)) = [10^-13 10^-3]^T
theta_bar(i) = [1/C(i)  m(i)]^T   

3.2 卡尔曼滤波-建立观测方程

d(i) = H * theta(i) + v(i)                  P(V) ~ (0,R)                    (3.2.2)
h_bar(i) = [dL(i)  1]^T
H = h_bar(i)^T = [dL(i)  1]    
d(i) = h_bar(i)^T * theta_bar(i) + v(i)                                     (3.2.3)
variance var_v = sigma(v,i)^2
R(i) = E{v_bar(i) * v_bar(i)^T}  
     = var_v
d(i) = h_bar(i)^T * theta_bar(i) + v(i)
     = [dL(i)  1] * [1/C(i)  m(i)]^T + v(i)                                 (3.2.4)

3.3 卡尔曼滤波-预测计算先验估计

theta_hat^-(i) = theta_hat(i-1) + u(i-1)                                    (3.3.1)

3.4 卡尔曼滤波-预测计算先验估计误差协方差

e^-(i) = theta(i) - theta_hat^-(i)                      P(E(i)) ~ (0 , P)   (3.4.1)
P^-(i) = {e^-(i) * e^-(i)^T}
       = E{(theta(i) - theta_hat^-(i)) * (theta(i) - theta_hat^-(i))^T}     (3.4.2)
       = A * P(i-1) * A^T + Q                                               (3.4.3)
       = P(i-1) + Q 
       = E(i-1) + Q                                                         (3.4.4)

3.5 卡尔曼滤波-校正计算卡尔曼增益

                       P^-(i) * H^T
     k_bar(i) = ------------------------------------------------------
                    H * P^-(i) * H^T + R                   
              
                       P^-(i) * h_bar(i)
              = ------------------------------------------------------              (3.5.1)
                    h_bar(i)^T * P^-(i) * h_bar(i) + R                   
                     
                     ( E(i-1) + Q(i) ) * h_bar(i)
              = ------------------------------------------------------              (3.5.2)
                var_v_hat(i) + h_bar(i)^T * (E(i-1) + Q(i)) * h_bar(i)
The variance var_v(i) = sigma_v(i)^2 is estimated using an exponential averaging filter, modified for variable sampling rate
var_v_hat(i) = max(beta * var_v_hat(i-1) + (1-beta) * z(i)^2, 1)                    (3.5.3)
beta = (1-chi)^(30/(1000 * f_max))                                                  (3.5.4)

3.6 卡尔曼滤波-校正计算后验估计值

theta_hat(i) = theta_hat^-(i) + k_bar(i) * (d(i) - H * theta_hat^-(i))
             = theta_hat(i-1) + k_bar(i) * (d(i) - H * theta_hat(i-1))              (3.6.1)
             = theta_hat(i-1) + k_bar(i) * d(i) - k_bar(i) * H * theta_hat(i-1)     
             = (1 - k_bar(i) * H) * theta_hat(i-1)  + k_bar(i) * d(i)               (3.6.2)
k_bar(i) ~ [0 ~ 1/H]
z(i) = d(i) - h_bar(i)^T * theta_hat(i-1)                                           (3.6.3)
theta_hat(i) = theta_hat(i-1) + z(i) * k_bar(i)                                     (3.6.4)

3.7 卡尔曼滤波-更新误差协方差

e(i) = theta(i) - theta_hat(i)                      P(E(i)) ~ (0 , P)               (3.7.1)
P(i) = E{e(i) * e(i)^T}                                                             (3.7.2)
     = E{(theta(i) - theta_hat(i)) * (theta(i) - theta_hat(i))^T}                   (3.7.3)
     = E(i)
 P(i) = (I - k_bar(i) * H) * P^-(i)                                                 (3.7.4)
      = (I - k_bar(i) * h_bar(i)^T) * (E(i-1) + Q(i))                               (3.7.5)
      = E(i)

3.8 卡尔曼滤波-系统模型图

m(i) = (1 − K(i)) * m(i−1) + K(i) * (dm(i))

4)WebRTC中JitterDelay的计算和迭代过程

image-20210612222555068.png
bool VCMInterFrameDelay::CalculateDelay(uint32_t timestamp,
                                        int64_t* delay,
                                        int64_t currentWallClock) {
  .....
  // Compute the compensated timestamp difference and convert it to ms and round
  // it to closest integer.
  _dTS = static_cast<int64_t>(
      (timestamp + wrapAroundsSincePrev * (static_cast<int64_t>(1) << 32) -
       _prevTimestamp) /
          90.0 +
      0.5);\
  // frameDelay is the difference of dT and dTS -- i.e. the difference of the
  // wall clock time difference and the timestamp difference between two
  // following frames.
  *delay = static_cast<int64_t>(currentWallClock - _prevWallClock - _dTS);

  _prevTimestamp = timestamp;
  _prevWallClock = currentWallClock;
  return true;
}

4.1)计算JitterDelay

int VCMJitterEstimator::GetJitterEstimate(
    double rttMultiplier,
    absl::optional<double> rttMultAddCapMs) {
  //调用CalculateEstimate()计算当前的jitterDelay,OPERATING_SYSTEM_JITTER默认为10ms  
  //这就意味着默认最小的jittterDelay至少是10ms?     
  double jitterMS = CalculateEstimate() + OPERATING_SYSTEM_JITTER;
  uint64_t now = clock_->TimeInMicroseconds();
  //kNackCountTimeoutMs = 60000
  // FrameNacked会更新_latestNackTimestamp单位为微秒  
  //1分钟内若所有帧都未丢包则清除  
  if (now - _latestNackTimestamp > kNackCountTimeoutMs * 1000)
    _nackCount = 0;

  if (_filterJitterEstimate > jitterMS)
    jitterMS = _filterJitterEstimate;
  if (_nackCount >= _nackLimit) {//_nackLimit
    if (rttMultAddCapMs.has_value()) {
      jitterMS +=
          std::min(_rttFilter.RttMs() * rttMultiplier, rttMultAddCapMs.value());
    } else {
      jitterMS += _rttFilter.RttMs() * rttMultiplier;
    }
  }
  ....
  return rtc::checked_cast<int>(std::max(0.0, jitterMS) + 0.5);
}

// Calculates the current jitter estimate from the filtered estimates.
double VCMJitterEstimator::CalculateEstimate() {
  double ret = _theta[0] * (_maxFrameSize - _avgFrameSize) + NoiseThreshold();
  .......
  _prevEstimate = ret;
  return ret;
}
double VCMJitterEstimator::NoiseThreshold() const {
  double noiseThreshold = _noiseStdDevs * sqrt(_varNoise) - _noiseStdDevOffset;
  if (noiseThreshold < 1.0) {
    noiseThreshold = 1.0;
  }
  return noiseThreshold;
}

4.2)JitterDelay迭代更新机制

// Updates the estimates with the new measurements.
void VCMJitterEstimator::UpdateEstimate(int64_t frameDelayMS,
                                        uint32_t frameSizeBytes,
                                        bool incompleteFrame /* = false */) {
  //1)计算当前帧和上一帧的数据量之差
  int deltaFS = frameSizeBytes - _prevFrameSize;
  //2)计算_avgFrameSize平均每帧数据的大小 
  if (_fsCount < kFsAccuStartupSamples) {
    _fsSum += frameSizeBytes;
    _fsCount++;
  } else if (_fsCount == kFsAccuStartupSamples) {//kFsAccuStartupSamples取值为5超过5帧开始计算平均帧大小
    // Give the frame size filter.
    _avgFrameSize = static_cast<double>(_fsSum) / static_cast<double>(_fsCount);
    _fsCount++;
  }
  /*3)若当前输入帧的大小比平均每帧数据的数据量要大,则对其进行滑动平均处理,比如说如果当前是一个I帧,数据量显然会比较大,
    默认incompleteFrame为false,所以每帧都会计算平均值*/
  if (!incompleteFrame || frameSizeBytes > _avgFrameSize) {
    //滑动平均算法,_phi的取值为0.97,取接近前30帧数据大小的平均值,求得的avgFrameSize值为接近近30帧数据的平均大小  
    double avgFrameSize = _phi * _avgFrameSize + (1 - _phi) * frameSizeBytes;
    //如果I帧数据量会比较大,如下的判断会不成立,偏移太大不计赋值_avgFrameSize   
    if (frameSizeBytes < _avgFrameSize + 2 * sqrt(_varFrameSize)) {
      // Only update the average frame size if this sample wasn't a key frame.
      _avgFrameSize = avgFrameSize;
    }
    // Update the variance anyway since we want to capture cases where we only
    // get key frames.
    //3.1)此处更新平均帧大下的方差,默认方差为100,取其最大值,根据_varFrameSize可以得出在传输过程中每帧数据大小的均匀性
    //    若方差较大则说明帧的大小偏离平均帧大小的程度越大,则均匀性也越差
    _varFrameSize = VCM_MAX(
        _phi * _varFrameSize + (1 - _phi) * (frameSizeBytes - avgFrameSize) *
                                   (frameSizeBytes - avgFrameSize),
        1.0);
  }

  // Update max frameSize estimate.
  //4)计算最大帧数据量  
  _maxFrameSize =
      VCM_MAX(_psi * _maxFrameSize, static_cast<double>(frameSizeBytes));

  if (_prevFrameSize == 0) {
    _prevFrameSize = frameSizeBytes;
    return;
  }
  //赋值上一帧数据大小  
  _prevFrameSize = frameSizeBytes;

  // Cap frameDelayMS based on the current time deviation noise.
  /*5) 根据当前时间偏移噪声求frameDelayMS,_varNoise为噪声方差,默认4.0很显然该值在传输过程中会变化,
     time_deviation_upper_bound_为时间偏移上限值,默认为3.5,所以默认初始值计算出来max_time_deviation_ms
     为7,对于帧率越高,默认输入的frameDelayMS会越小,这里和max_time_deviation_ms去最小值,当噪声的方差越大
     max_time_deviation_ms的值月越大,其取值就会越接近取向传入的frameDelayMS*/ 
  int64_t max_time_deviation_ms =
      static_cast<int64_t>(time_deviation_upper_bound_ * sqrt(_varNoise) + 0.5);
  frameDelayMS = std::max(std::min(frameDelayMS, max_time_deviation_ms),
                          -max_time_deviation_ms);

  /*6)根据得出的延迟时间计算样本与卡尔曼滤波器估计的期望延迟之间的延迟差(反映网络噪声的大小),计算公式为
     frameDelayMS - (_theta[0] * deltaFSBytes + _theta[1])
     当前测量值 - 上一次卡尔曼滤波后的估计值 对应公式3.6.3和3.6.4*/ 
  double deviation = DeviationFromExpectedDelay(frameDelayMS, deltaFS);
  // Only update the Kalman filter if the sample is not considered an extreme
  // outlier. Even if it is an extreme outlier from a delay point of view, if
  // the frame size also is large the deviation is probably due to an incorrect
  // line slope.    
  //根据注释的意思是只有当样本不被认为是极端异常值时才更新卡尔曼滤波器,言外之意就是网络残差值不能超过
  //  _numStdDevDelayOutlier * sqrt(_varNoise) = 30ms 默认值,随_varNoise的大小变化而变化 
  if (fabs(deviation) < _numStdDevDelayOutlier * sqrt(_varNoise) ||
      frameSizeBytes >
          _avgFrameSize + _numStdDevFrameSizeOutlier * sqrt(_varFrameSize)) {
    // Update the variance of the deviation from the line given by the Kalman
    // filter.
    EstimateRandomJitter(deviation, incompleteFrame);
    // Prevent updating with frames which have been congested by a large frame,
    // and therefore arrives almost at the same time as that frame.
    // This can occur when we receive a large frame (key frame) which has been
    // delayed. The next frame is of normal size (delta frame), and thus deltaFS
    // will be << 0. This removes all frame samples which arrives after a key
    // frame.
    if ((!incompleteFrame || deviation >= 0.0) &&
        static_cast<double>(deltaFS) > -0.25 * _maxFrameSize) {
      // Update the Kalman filter with the new data
      KalmanEstimateChannel(frameDelayMS, deltaFS);
    }
  } else {  // 如果网络残差太大,说明噪声偏移太大,需要对测量噪声进行校正,本次不进行卡尔曼预测和校正  
    int nStdDev =
        (deviation >= 0) ? _numStdDevDelayOutlier : -_numStdDevDelayOutlier;
    EstimateRandomJitter(nStdDev * sqrt(_varNoise), incompleteFrame);
  }
  // Post process the total estimated jitter
  //6) 求得当前帧的jitterDelay最优估计值 
  if (_startupCount >= kStartupDelaySamples) {
    PostProcessEstimate();
  } else {
    _startupCount++;
  }
}
double VCMJitterEstimator::DeviationFromExpectedDelay(
    int64_t frameDelayMS,
    int32_t deltaFSBytes) const {
  return frameDelayMS - (_theta[0] * deltaFSBytes + _theta[1]); 
}
double avgFrameSize = _phi * _avgFrameSize + (1 - _phi) * frameSizeBytes;
_varFrameSize = VCM_MAX(
    _phi * _varFrameSize + (1 - _phi) * (frameSizeBytes - avgFrameSize) *
    (frameSizeBytes - avgFrameSize),1.0);

4.3)更新误差方差

void VCMJitterEstimator::EstimateRandomJitter(double d_dT,
                                              bool incompleteFrame) {
  uint64_t now = clock_->TimeInMicroseconds();
  //1)对帧率进行采样统计  
  if (_lastUpdateT != -1) {
    fps_counter_.AddSample(now - _lastUpdateT);
  }
  _lastUpdateT = now;

  if (_alphaCount == 0) {
    assert(false);
    return;
  }
  //2) alpha = 399  
  double alpha =
      static_cast<double>(_alphaCount - 1) / static_cast<double>(_alphaCount);
  _alphaCount++;
  if (_alphaCount > _alphaCountMax)
    _alphaCount = _alphaCountMax;//_alphaCountMax = 400

  // In order to avoid a low frame rate stream to react slower to changes,
  // scale the alpha weight relative a 30 fps stream.
  double fps = GetFrameRate();
  if (fps > 0.0) {
    double rate_scale = 30.0 / fps;
    // At startup, there can be a lot of noise in the fps estimate.
    // Interpolate rate_scale linearly, from 1.0 at sample #1, to 30.0 / fps
    // at sample #kStartupDelaySamples.
    if (_alphaCount < kStartupDelaySamples) {
      rate_scale =
          (_alphaCount * rate_scale + (kStartupDelaySamples - _alphaCount)) /
          kStartupDelaySamples;//kStartupDelaySamples = 30
    }
    //alpha = pow(399/400,30.0 / fps)  
    alpha = pow(alpha, rate_scale);
  }

  double avgNoise = alpha * _avgNoise + (1 - alpha) * d_dT;
  double varNoise =
      alpha * _varNoise + (1 - alpha) * (d_dT - _avgNoise) * (d_dT - _avgNoise);
  if (!incompleteFrame || varNoise > _varNoise) {
    _avgNoise = avgNoise;
    _varNoise = varNoise;
  }
  if (_varNoise < 1.0) {
    // The variance should never be zero, since we might get stuck and consider
    // all samples as outliers.
    _varNoise = 1.0;
  }
}
double avgNoise = alpha * _avgNoise + (1 - alpha) * d_dT;
double varNoise =
      alpha * _varNoise + (1 - alpha) * (d_dT - _avgNoise) * (d_dT - _avgNoise);

4.4)Kalman Filter 预测及校正

  double _thetaCov[2][2];  // Estimate covariance 先验估计误差协方差
  double _Qcov[2][2];      // Process noise covariance 过程噪声协方差对应Q向量
/**
@ frameDelayMS:为测量出来的当前帧和上一帧的帧间抖动延迟
@ deltaFSBytes:当前帧的数据量和上一帧的数据量之差
**/
void VCMJitterEstimator::KalmanEstimateChannel(int64_t frameDelayMS,
                                               int32_t deltaFSBytes) {
  
  double Mh[2];//P(i) = E[1/c(i) m(i)]
  double hMh_sigma;
  double kalmanGain[2];
  double measureRes;
  double t00, t01;
  // Kalman filtering
  /*1)计算先验估计误差协方差  
     结合公式3.4.1~3.4.4  
    e^-(i) = theta(i) - theta_hat^-(i)                          
    P^-(i) = {e^-(i) * e^-(i)^T}
           = E{(theta(i) - theta_hat^-(i)) * (theta(i) - theta_hat^-(i))^T}     
           = A * P(i-1) * A^T + Q                                               
           = P(i-1) + Q 
           = E(i-1) + Q   
   当前帧(i)的先验估计误差协防差 = 上一帧(i-1)的误差协方差 + 过程噪声协方差        
  */     
    
  //Prediction
  //M = M + Q = E(i-1) + Q     
  _thetaCov[0][0] += _Qcov[0][0];
  _thetaCov[0][1] += _Qcov[0][1];
  _thetaCov[1][0] += _Qcov[1][0];
  _thetaCov[1][1] += _Qcov[1][1];

  /*  
     2) 校正 根据公式3.5.1~3.5.2计算卡尔曼增益
    
                       P^-(i) * H^T
     k_bar(i) = ------------------------------------------------------
                    H * P^-(i) * H^T + R                   
              
                       P^-(i) * h_bar(i)
              = ------------------------------------------------------              (3.5.1)
                    h_bar(i)^T * P^-(i) * h_bar(i) + R                   
                     
                     ( E(i-1) + Q(i) ) * h_bar(i)
              = ------------------------------------------------------              (3.5.2)
                var_v_hat(i) + h_bar(i)^T * (E(i-1) + Q(i)) * h_bar(i)    
  
  */
  // Kalman gain
  // K = M*h'/(sigma2n + h*M*h') = M*h'/(1 + h*M*h') = M*h'/(var_v_hat(i) + h*M*h')
  // h = [dFS 1] 其中dFS对应的入参deltaFSBytes
  // Mh = M*h' = _thetaCov[2][2] * [dFS 1]^
  // hMh_sigma = h*M*h' + R = h_bar(i)^T * (E(i-1) + Q(i)) * h_bar(i) + R   
  Mh[0] = _thetaCov[0][0] * deltaFSBytes + _thetaCov[0][1];// 对应1/C(i) 信道传输速率的误差协方差
  Mh[1] = _thetaCov[1][0] * deltaFSBytes + _thetaCov[1][1];// 对应网络排队延迟m(i)的误差协方差
  // sigma weights measurements with a small deltaFS as noisy and
  // measurements with large deltaFS as good
  if (_maxFrameSize < 1.0) {
    return;
  }
  //sigma为测量噪声标准差的指数平均滤波,对应的是测量噪声的协方差R
  double sigma = (300.0 * exp(-fabs(static_cast<double>(deltaFSBytes)) /
                              (1e0 * _maxFrameSize)) +
                  1) *
                 sqrt(_varNoise);
  if (sigma < 1.0) {
    sigma = 1.0;
  }
  // hMh_sigma 对应H * P^-(i) * H^T = h_bar(i)^T * (E(i-1) + Q(i)) * h_bar(i) + R 
  // 对应公式(3.5.1)  
  //[dFS 1]^ * Mh  =  dFS * Mh[0] + Mh[1]  
  hMh_sigma = deltaFSBytes * Mh[0] + Mh[1] + sigma;
  if ((hMh_sigma < 1e-9 && hMh_sigma >= 0) ||
      (hMh_sigma > -1e-9 && hMh_sigma <= 0)) {
    assert(false);
    return;
  }
  //计算卡尔曼增益Mh / hMh_sigma  
  kalmanGain[0] = Mh[0] / hMh_sigma;
  kalmanGain[1] = Mh[1] / hMh_sigma;

  /*
  3)根据公式3.6.1~3.6.4校正计算后验估计值
    theta_hat(i) = theta_hat^-(i) + k_bar(i) * (d(i) - H * theta_hat^-(i))
                 = theta_hat(i-1) + k_bar(i) * (d(i) - H * theta_hat(i-1))              (3.6.1)
                 = theta_hat(i-1) + k_bar(i) * d(i) - k_bar(i) * H * theta_hat(i-1)     
                 = (1 - k_bar(i) * H) * theta_hat(i-1)  + k_bar(i) * d(i)               (3.6.2)
    其中k_bar(i) ~ [0 ~ 1/H]  
    z(i) = d(i) - h_bar(i)^T * theta_hat(i-1)                                           (3.6.3)
    theta_hat(i) = theta_hat(i-1) + z(i) * k_bar(i)                                     (3.6.4)  
  */  
  // Correction
  // theta = theta + K*(dT - h*theta)
  // 计算网络残差,得到最优估计值  
  measureRes = frameDelayMS - (deltaFSBytes * _theta[0] + _theta[1]);
  _theta[0] += kalmanGain[0] * measureRes; //公式(3.6.4)  
  _theta[1] += kalmanGain[1] * measureRes; //公式(3.6.4)  

  if (_theta[0] < _thetaLow) {
    _theta[0] = _thetaLow;
  }
  /**
  4)根据公式3.7.1~3.7.4更新误差协方差,为下一次预测提供最优滤波器系数
    e(i) = theta(i) - theta_hat(i)                      P(E(i)) ~ (0 , P)               (3.7.1)
    P(i) = E{e(i) * e(i)^T}                                                             (3.7.2)
         = E{(theta(i) - theta_hat(i)) * (theta(i) - theta_hat(i))^T}                   (3.7.3)
         = (I - k_bar(i) * H) * P^-(i)                                                  (3.7.4)
         = (I - k_bar(i) * h_bar(i)^T) * (E(i-1) + Q(i))                                (3.7.5)
         = E(i)  
  */
  // M = (I - K*h)*M
  t00 = _thetaCov[0][0];
  t01 = _thetaCov[0][1];
  _thetaCov[0][0] = (1 - kalmanGain[0] * deltaFSBytes) * t00 -
                    kalmanGain[0] * _thetaCov[1][0];
  _thetaCov[0][1] = (1 - kalmanGain[0] * deltaFSBytes) * t01 -
                    kalmanGain[0] * _thetaCov[1][1];
  _thetaCov[1][0] = _thetaCov[1][0] * (1 - kalmanGain[1]) -
                    kalmanGain[1] * deltaFSBytes * t00;
  _thetaCov[1][1] = _thetaCov[1][1] * (1 - kalmanGain[1]) -
                    kalmanGain[1] * deltaFSBytes * t01;

  // Covariance matrix, must be positive semi-definite.
  assert(_thetaCov[0][0] + _thetaCov[1][1] >= 0 &&
         _thetaCov[0][0] * _thetaCov[1][1] -
                 _thetaCov[0][1] * _thetaCov[1][0] >=
             0 &&
         _thetaCov[0][0] >= 0);
}
double noiseThreshold = _noiseStdDevs * sqrt(_varNoise) - _noiseStdDevOffset;
jitterDelay = _theta[0] * (_maxFrameSize - _avgFrameSize) + NoiseThreshold();

4.5)webrtc jitterdelay相关数据测试

总结:

参考文献

[1].A Google Congestion Control Algorithm for Real-Time Communication draft-alvestrand-rmcat-congestion-03
[2].Analysis and Design of the Google Congestion Control for Web Real-time Communication (WebRTC)
[3].DR_CAN 卡尔曼滤波视频教程
[4].从放弃到精通!卡尔曼滤波从理论到实践

上一篇 下一篇

猜你喜欢

热点阅读