cs231n## neural-networks-2

2018-04-19  本文已影响0人  db24cc

agenda

数据预处理

 class MaxNorm(Constraint):
  """MaxNorm weight constraint.
  Constrains the weights incident to each hidden unit
  to have a norm less than or equal to a desired value.
  # Arguments
      m: the maximum norm for the incoming weights.
      axis: integer, axis along which to calculate weight norms.
          For instance, in a `Dense` layer the weight matrix
          has shape `(input_dim, output_dim)`,
          set `axis` to `0` to constrain each weight vector
          of length `(input_dim,)`.
          In a `Conv2D` layer with `data_format="channels_last"`,
          the weight tensor has shape
          `(rows, cols, input_depth, output_depth)`,
          set `axis` to `[0, 1, 2]`
          to constrain the weights of each filter tensor of size
          `(rows, cols, input_depth)`.
  # References
      - [Dropout: A Simple Way to Prevent Neural Networks from Overfitting Srivastava, Hinton, et al. 2014](http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf)
  """

  def __init__(self, max_value=2, axis=0):
      self.max_value = max_value
      self.axis = axis

  def __call__(self, w):
      # from . import backend as K
      norms = K.sqrt(K.sum(K.square(w), axis=self.axis, keepdims=True))
      # 把norm限制在0-max之间
      desired = K.clip(norms, 0, self.max_value)
      # K.epsilon()是一个极小的随机因子 _EPSILON = 1e-7 the fuzz factor used in numeric expressions
      w *= (desired / (K.epsilon() + norms))
      return w

  def get_config(self):
      return {'max_value': self.max_value,
              'axis': self.axis}

还是从实际caffe如何做Dropout看下运作:

void DropoutLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
    const Dtype* bottom_data = bottom[0]->cpu_data();
    Dtype* top_data = top[0]->mutable_cpu_data();
    unsigned int* mask = rand_vec_.mutable_cpu_data();
    const int count = bottom[0]->count();
    //只在训练时生效
    if (this->phase_ == TRAIN) {
      // 抹top数据, 按照threshold_置值
      caffe_rng_bernoulli(count, 1. - threshold_, mask);
       //prototxt里是否设置比例训练
      if (scale_train_) {
        for (int i = 0; i < count; ++i) {
          top_data[i] = bottom_data[i] * mask[i] * scale_;
        }
      } else {
        for (int i = 0; i < count; ++i) {
          //& mask -> 设置top数据
          top_data[i] = bottom_data[i] * mask[i];
        }
      }
    } else {
      caffe_copy(bottom[0]->count(), bottom_data, top_data);
      if (!scale_train_) {
        caffe_scal<Dtype>(  count, 1. / scale_, top_data);
      }
    }
  }
上一篇 下一篇

猜你喜欢

热点阅读