[tf]进行梯度裁剪训练的方法以及设置学习率指数衰减
2018-12-12 本文已影响35人
VanJordan
global_step = tf.Variable(0, name="global_step", trainable=False)
optim = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate)
grads_and_vars = optim.compute_gradients(loss)
grads_and_vars_clip = [[tf.clip_by_value(g,-FLAGS.clip_grad,FLAGS.clip_grad), v] for g, v in grads_and_vars]
train_op = optim.apply_gradients(grads_and_vars_clip, global_step=global_step)
def get_optimizer(self):
'''
选择优化器
:return:
'''
with tf.variable_scope("optimizer"):
self.lr = tf.train.exponential_decay(self.learning_rate,
self.global_step, 15000, 0.99, staircase=True)
optimizer = self.optimizer # adam
if optimizer == "sgd":
self.opt = tf.train.GradientDescentOptimizer(self.lr)
elif optimizer == "adam":
self.opt = tf.train.AdamOptimizer(self.lr)
elif optimizer == "adgrad":
self.opt = tf.train.AdagradOptimizer(self.lr)
else:
raise KeyError
grads_vars = self.opt.compute_gradients(self.loss) # len(grads_vars) 12
capped_grads_vars = [[tf.clip_by_value(g, -self.clip, self.clip), v] for g, v in grads_vars] # 梯度进行截断(更新)
self.train_op = self.opt.apply_gradients(capped_grads_vars, self.global_step) # global_step要求解的一个值