[tf]tf.estimate

2019-01-16 本文已影响26人 VanJordan

学习流程：Estimator 封装了对机器学习不同阶段的控制，用户无需不断的为新机器学习任务重复编写训练、评估、预测的代码。可以专注于对网络结构的控制。
网络结构：Estimator 的网络结构是在 model_fn 中独立定义的，用户创建的任何网络结构都可以在 Estimator 的控制下进行机器学习。这可以允许用户很方便的使用别人定义好的 model_fn。
数据导入：Estimator 的数据导入也是由 input_fn 独立定义的。例如，用户可以非常方便的只通过改变 input_fn 的定义，来使用相同的网络结构学习不同的数据。
model_fn模型函数必须要有features, mode两个参数，可自己选择加入labels（可以把label也放进features中）。最后要返回特定的tf.estimator.EstimatorSpec()。模型有三个阶段都共用的正向传播部分，和由mode值来控制返回不同tf.estimator.EstimatorSpec的三个分支。
预测分支，将想要预测的值写进字典里面

# 创建predictions字典，里面写进所有你想要在预测值输出的数值
    # 隐藏层的数值也可以，这里演示了输出所有隐藏层层结果。
    # 字典的key是模型，value给的是对应的tensor
    predictions = {
        "image":features['image'],
        "conv1_out":conv1,
        "pool1_out":pool1,
        "conv2_out":conv2,
        "pool2_out":pool2,
        "pool2_flat_out":pool2_flat,
        "dense1_out":dense1,
        "logits":logits,
        "classes": tf.argmax(input=logits, axis=1),
        "labels": features['label'],
        "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
        }
    # 当mode为tf.estimator.ModeKeys.PREDICT时，我们就让模型返回预测的操作
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

训练分支需要loss和train_op操作符。

# 训练和评估时都会用到loss
    loss = tf.losses.sparse_softmax_cross_entropy(labels=features['label'], logits=logits)
    # 训练分支
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.AdamOptimizer(learning_rate=1e-3)
        train_op = optimizer.minimize(
        loss=loss,
        # global_step用于记录训练了多少步
        global_step=tf.train.get_global_step())
        # 返回的tf.estimator.EstimatorSpec根据
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

评估分支

# 注意评估的时候，模型和训练时一样，是一个循环的loop，不断累积计算评估指标。
    # 其中有两个局部变量total和count来控制
    # 把网络中的某个tensor结果直接作为字典的value是不好用的
    # loss的值是始终做记录的，eval_metric_ops中是额外想要知道的评估指标
    eval_metric_ops = {"accuracy": tf.metrics.accuracy(labels=features['label'], predictions=predictions["classes"])}
    # 不好用：eval_metric_ops = {"probabilities": predictions["probabilities"]}
    return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

创建一个实例

训练后的模型参数会保存在model_dir中，随着训练在目录下生成拥有类似下面内容的checkpoint文件。

# model_dir 表示模型要存到哪里
mnist_classifier = tf.estimator.Estimator(
    model_fn=model_fn, model_dir="mnist_model_cnn")

在训练或评估中利用Hook打印中间信息。

hooks：如果不送值，则训练过程中不会显示字典中的数值。
steps：指定了训练多少次，如果不送值，则训练到dataset API遍历完数据集为止。
max_steps：指定了最大训练次数。

# 在训练或评估的循环中，每50次print出一次字典中的数值
tensors_to_log = {"probabilities": "softmax_tensor"}
logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=50)
mnist_classifier.train(input_fn=train_input_fn, hooks=[logging_hook])

评估

# 训练集
eval_results = mnist_classifier.evaluate(input_fn=train_eval_fn, checkpoint_path=None)
print('train set')
print(eval_results)
# 测试集
# checkpoint_path是可以指定选择那个时刻保存的权重进行评估
eval_results = mnist_classifier.evaluate(input_fn=test_input_fn, checkpoint_path=None)
print('test set')
print(eval_results)

评估

predicts =list(mnist_classifier.predict(input_fn=test_input_fn))

predicts[0].keys()
# 输出为：
dict_keys(['image', 'conv1_out', 'pool1_out', 'conv2_out', 'pool2_out', 'pool2_flat_out', 'dense1_out', 'logits', 'classes', 'labels', 'probabilities'])

如果想要输出第四个卷积

plt.figure(num=4,figsize=(28,28))
for i in range(4):
    plt.subplot(1,4,i+1)
    plt.imshow(predicts[0]['conv1_out'][:,:,i],cmap = plt.cm.gray)
plt.savefig('conv1_out.png')

一个范例

def model_fn(features, labels, mode, params):
    if isinstance(features, dict):  # For serving
        features = features['feature']

    predictions = tf.layers.dense(features, 1)

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
    else:
        loss = tf.nn.l2_loss(predictions - labels)
        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(
                mode, loss=loss)

        elif mode == tf.estimator.ModeKeys.TRAIN:
            train_op = tf.train.AdamOptimizer(learning_rate=0.5).minimize(
                loss, global_step=tf.train.get_global_step())
            return tf.estimator.EstimatorSpec(
                mode, loss=loss, train_op=train_op)
        else:
            raise NotImplementedError()

early stopping

函数原型

tf.contrib.estimator.stop_if_no_increase_hook(
    estimator,
    metric_name,
    max_steps_without_increase,
    eval_dir=None,
    min_steps=0,
    run_every_secs=60,
    run_every_steps=None
)

metric_name: str类型，比如loss或者accuracy.
max_steps_without_increase: int，如果没有增加的最大长是多少，如果超过了这个最大步长metric还是没有增加那么就会停止。
eval_dir：默认是使用estimator.eval_dir目录，用于存放评估的summary file。
min_steps：训练的最小步长，如果训练小于这个步长那么永远都不会停止。
run_every_secs和run_every_steps：表示多长时间获得步长调用一次should_stop_fn。

import tf_metrics

# Metrics
weights = tf.sequence_mask(nwords)
metrics = {
    'acc': tf.metrics.accuracy(tags, pred_ids, weights),
    'precision': tf_metrics.precision(tags, pred_ids, num_tags, indices, weights),
    'recall': tf_metrics.recall(tags, pred_ids, num_tags, indices, weights),
    'f1': tf_metrics.f1(tags, pred_ids, num_tags, indices, weights),
}
# Tensoboard summaries
for metric_name, op in metrics.items():
    tf.summary.scalar(metric_name, op[1])
# 1. Define our input_fn
train_inpf = functools.partial(input_fn, 'words.train.txt', 'tags.train.txt',
                               params, shuffle_and_repeat=True)
eval_inpf = functools.partial(input_fn,'words.testa.txt', 'tags.testa.txt'
                              params)

# 2. Create a hook
Path(estimator.eval_dir()).mkdir(parents=True, exist_ok=True)
hook = tf.contrib.estimator.stop_if_no_increase_hook(
    estimator, 'f1', 500, min_steps=8000, run_every_secs=120)
train_spec = tf.estimator.TrainSpec(input_fn=input_fn, hooks=[hook])
eval_spec = tf.estimator.EvalSpec(input_fn=eval_inpf, throttle_secs=120)

# 3. Train with early stopping
tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

第二个范例

model_fn的定义有固定的标准，包括其对应的输入参数和返回参数。
model_fn定义了模型的网络结果，损失护士，优化函数以及评估参数等一系列的指标，可以根据自身需求进行相应的删减。

def model_fn(features, labels, mode, params):
    # Args:
    #
    # features: This is the x-arg from the input_fn.
    # labels:   This is the y-arg from the input_fn,
    #           see e.g. train_input_fn for these two.
    # mode:     Either TRAIN, EVAL, or PREDICT
    # params:   User-defined hyper-parameters, e.g. learning-rate.

    # Reference to the tensor named "x" in the input-function.
#     x = features["images"]
    x = tf.feature_column.input_layer(features, params['feature_columns'])
    # The convolutional layers expect 4-rank tensors
    # but x is a 2-rank tensor, so reshape it.
    net = tf.reshape(x, [-1, IMG_HEIGHT, IMG_WIDTH, NUM_CHANNEL])    

    # First convolutional layer.
    net = tf.layers.conv2d(inputs=net, name='layer_conv1',
                           filters=16, kernel_size=5,
                           padding='same', activation=tf.nn.relu)
    net = tf.layers.max_pooling2d(inputs=net, pool_size=2, strides=2)

    # Second convolutional layer.
    net = tf.layers.conv2d(inputs=net, name='layer_conv2',
                           filters=36, kernel_size=5,
                           padding='same', activation=tf.nn.relu)
    net = tf.layers.max_pooling2d(inputs=net, pool_size=2, strides=2)    

    # Flatten to a 2-rank tensor.
    net = tf.contrib.layers.flatten(net)
    # Eventually this should be replaced with:
    # net = tf.layers.flatten(net)

    # First fully-connected / dense layer.
    # This uses the ReLU activation function.
    net = tf.layers.dense(inputs=net, name='layer_fc1',
                          units=128, activation=tf.nn.relu)    

    # Second fully-connected / dense layer.
    # This is the last layer so it does not use an activation function.
    net = tf.layers.dense(inputs=net, name='layer_fc2',
                          units=10)

    # Logits output of the neural network.
    logits = net

    # Softmax output of the neural network.
    y_pred = tf.nn.softmax(logits=logits)

    # Classification output of the neural network.
    y_pred_cls = tf.argmax(y_pred, axis=1)

    if mode == tf.estimator.ModeKeys.PREDICT:
        # If the estimator is supposed to be in prediction-mode
        # then use the predicted class-number that is output by
        # the neural network. Optimization etc. is not needed.
        spec = tf.estimator.EstimatorSpec(mode=mode,
                                          predictions=y_pred_cls)
    else:
        # Otherwise the estimator is supposed to be in either
        # training or evaluation-mode. Note that the loss-function
        # is also required in Evaluation mode.

        # Define the loss-function to be optimized, by first
        # calculating the cross-entropy between the output of
        # the neural network and the true labels for the input data.
        # This gives the cross-entropy for each image in the batch.
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels,
                                                                       logits=logits)

        # Reduce the cross-entropy batch-tensor to a single number
        # which can be used in optimization of the neural network.
        loss = tf.reduce_mean(cross_entropy)

        # Define the optimizer for improving the neural network.
        optimizer = tf.train.AdamOptimizer(learning_rate=params["learning_rate"])

        # Get the TensorFlow op for doing a single optimization step.
        train_op = optimizer.minimize(
            loss=loss, global_step=tf.train.get_global_step())

        # Define the evaluation metrics,
        # in this case the classification accuracy.
        metrics = \
        {
            "accuracy": tf.metrics.accuracy(labels, y_pred_cls)
        }

        # Wrap all of this in an EstimatorSpec.
        spec = tf.estimator.EstimatorSpec(
            mode=mode,
            loss=loss,
            train_op=train_op,
            eval_metric_ops=metrics)

    return spec

实例化这个Estimaotr

params：是传入数据字典。
model_dir：是模型和log的保存路径。

params = {"learning_rate": 1e-4,
         'feature_columns': feature_columns}

model = tf.estimator.Estimator(model_fn=model_fn,
                               params=params,
                               model_dir="./cnn_classifer_dataset/")

接下来我们只需要简单调用train，evaluate，predict方法，就可以实现对模型的训练验证和测试了。

训练：

input_fn = lambda: csv_input_fn(\
                                files_name_pattern= TRAIN_DATA_FILES_PATTERN,mode=tf.estimator.ModeKeys.TRAIN)
# Train the Model
model.train(input_fn, steps=2000)

验证

input_fn = lambda: csv_input_fn(files_name_pattern= VAL_DATA_FILES_PATTERN,mode=tf.estimator.ModeKeys.EVAL)

model.evaluate(input_fn,steps=1)

测试

import itertools

input_fn = lambda: csv_input_fn(\
                                files_name_pattern= TEST_DATA_FILES_PATTERN,mode=tf.estimator.ModeKeys.PREDICT,batch_size=10)

predictions = list(itertools.islice(model.predict(input_fn=input_fn),10))
print('PREDICTIONS',predictions)

范例三

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
  
import sys
import argparse
import math
  
import tensorflow as tf
 
FLAGS = None
batch_size = 100
 
def _my_input_fn(filepath, num_epochs):
  # image - 784 (=28 x 28) elements of grey-scaled integer value [0, 1]
  # label - digit (0, 1, ..., 9)
  data_queue = tf.train.string_input_producer(
    [filepath],
    num_epochs = num_epochs) # data is repeated and it raises OutOfRange when data is over
  data_reader = tf.TFRecordReader()
  _, serialized_exam = data_reader.read(data_queue)
  data_exam = tf.parse_single_example(
    serialized_exam,
    features={
      'image_raw': tf.FixedLenFeature([], tf.string),
      'label': tf.FixedLenFeature([], tf.int64)
    })
  data_image = tf.decode_raw(data_exam['image_raw'], tf.uint8)
  data_image.set_shape([784])
  data_image = tf.cast(data_image, tf.float32) * (1. / 255)
  data_label = tf.cast(data_exam['label'], tf.int32)
  data_batch_image, data_batch_label = tf.train.batch(
    [data_image, data_label],
    batch_size=batch_size)
  return data_batch_image, data_batch_label
 
def _get_input_fn(filepath, num_epochs):
  return lambda: _my_input_fn(filepath, num_epochs)
 
def _my_model_fn(features, labels, mode):
  # with tf.device(...): # You can set device if using GPUs
   
  # define network and inference
  # (simple 2 fully connected hidden layer : 784->128->64->10)
  with tf.name_scope('hidden1'):
    weights = tf.Variable(
      tf.truncated_normal(
        [784, 128],
        stddev=1.0 / math.sqrt(float(784))),
      name='weights')
    biases = tf.Variable(
      tf.zeros([128]),
      name='biases')
    hidden1 = tf.nn.relu(tf.matmul(features, weights) + biases)
  with tf.name_scope('hidden2'):
    weights = tf.Variable(
      tf.truncated_normal(
        [128, 64],
        stddev=1.0 / math.sqrt(float(128))),
      name='weights')
    biases = tf.Variable(
      tf.zeros([64]),
      name='biases')
    hidden2 = tf.nn.relu(tf.matmul(hidden1, weights) + biases)
  with tf.name_scope('softmax_linear'):
    weights = tf.Variable(
      tf.truncated_normal(
        [64, 10],
        stddev=1.0 / math.sqrt(float(64))),
    name='weights')
    biases = tf.Variable(
      tf.zeros([10]),
      name='biases')
    logits = tf.matmul(hidden2, weights) + biases
 
  # compute evaluation matrix
  predicted_indices = tf.argmax(input=logits, axis=1)
  if mode != tf.estimator.ModeKeys.PREDICT:
    label_indices = tf.cast(labels, tf.int32) 
    accuracy = tf.metrics.accuracy(label_indices, predicted_indices)
    tf.summary.scalar('accuracy', accuracy[1]) # output to TensorBoard
 
  # compute loss
  loss = tf.losses.sparse_softmax_cross_entropy(
    labels=labels,
    logits=logits)
 
  # define operations
  if mode == tf.estimator.ModeKeys.TRAIN:
    #global_step = tf.train.create_global_step()
    #global_step = tf.contrib.framework.get_or_create_global_step()
    global_step = tf.train.get_or_create_global_step()    
    optimizer = tf.train.GradientDescentOptimizer(
      learning_rate=0.07)
    train_op = optimizer.minimize(
      loss=loss,
      global_step=global_step)
    return tf.estimator.EstimatorSpec(
      mode,
      loss=loss,
      train_op=train_op)
  if mode == tf.estimator.ModeKeys.EVAL:
    eval_metric_ops = {
      'accuracy': accuracy
    }
    return tf.estimator.EstimatorSpec(
      mode,
      loss=loss,
      eval_metric_ops=eval_metric_ops)
  if mode == tf.estimator.ModeKeys.PREDICT:
    probabilities = tf.nn.softmax(logits, name='softmax_tensor')
    predictions = {
      'classes': predicted_indices,
      'probabilities': probabilities
    }
    export_outputs = {
      'prediction': tf.estimator.export.PredictOutput(predictions)
    }
    return tf.estimator.EstimatorSpec(
      mode,
      predictions=predictions,
      export_outputs=export_outputs)
 
def main(_):
  # read TF_CONFIG
  run_config = tf.contrib.learn.RunConfig()
 
  # define
  mnist_fullyconnected_classifier = tf.estimator.Estimator(
    model_fn=_my_model_fn,
    model_dir=FLAGS.out_dir,
    config=run_config)
  train_spec = tf.estimator.TrainSpec(
    input_fn=_get_input_fn(FLAGS.train_file, 2),
    max_steps=60000 * 2 / batch_size)
  eval_spec = tf.estimator.EvalSpec(
    input_fn=_get_input_fn(FLAGS.test_file, 1),
    steps=10000 * 1 / batch_size,
    start_delay_secs=0)
     
  # run !
  tf.estimator.train_and_evaluate(
    mnist_fullyconnected_classifier,
    train_spec,
    eval_spec
  )
              
if __name__ == '__main__':
  parser = argparse.ArgumentParser()
  parser.add_argument(
    '--train_file',
    type=str,
    default='/home/demouser/train.tfrecords',
    help='File path for the training data.')
  parser.add_argument(
    '--test_file',
    type=str,
    default='/home/demouser/test.tfrecords',
    help='File path for the test data.')
  parser.add_argument(
    '--out_dir',
    type=str,
    default='/home/demouser/out',
    help='Dir path for the model and checkpoint output.')
  FLAGS, unparsed = parser.parse_known_args()
  
  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)

可以配合不同的hook使用

You cannot see the train loop (while clause for mini-batch) in this code, but you can use custom tf.train.SessionRunHook or tf.train.LoggingTensorHook for your custom code (ex : debug printing, etc) in the train loop.

[tf]tf.estimate

一个范例

early stopping

第二个范例

范例三

猜你喜欢

热点阅读