Question

我在tensorflow上实现了一个LSTM，以便为特征向量的每个时间步长提供一个标签（从-1到1的浮点数），如下例所示：

import tensorflow as tf
from tensorflow.contrib import rnn
from tensorflow.contrib.learn.python.learn.datasets import base
import random
import numpy as np
import fx
import load_data as ld
import matplotlib.pyplot as plt
import random

# Parameters
LEARNING_RATE = 0.001
TRAINING_ITERS = 20000
BATCH_SIZE = 128
DISPLAY_STEP = 10

# Network Parameters
N_TIMESTEPS = 1260 # Number of timesteps in each observation
N_OBSERVATIONS = 2000 # Number of observations in the training set
N_HIDDEN = 32 # Number of features in the hidden layer

# Ratios for splitting the data
DATA_SPLITTING = {'train': 0.6,
                  'validation': 0.2,
                  'test': 0.2}

# Generate a bunch of data points and then package them up in the array format
# needed by
# tensorflow
def generate_data (n_observations, Fs, n_timesteps, impose_slow_sine):
    features = []
    labels = []
    for i in range (n_observations):
       features_obs = generate_sinusoid (Fs, n_timesteps, impose_slow_sine)
       labels_obs = label_data (features_obs)
       features.append(features_obs)
       labels.append(labels_obs)
       # plot stuff to confirm labels are correct
       #plot_labels(features_obs, labels_obs)

    # Convert to 2d array
    features = np.array(features)
    labels = np.array(labels)
    # I want the data to have 3 dimensions because that's
    # the dimension my real data has. Here dimension 0 will be singleton.
    # Expand to 3 dimensions
    features = np.expand_dims(np.array (features), axis = 0)
    labels = np.expand_dims(np.array (labels), axis = 0)
    return features, labels

def label_data (x):
    max = np.amax (x)
    min = np.amin (x)
    return 2 * (x - max) / (max - min) + 1


def main ():
    # Generate the data
    features, labels = generate_data (N_OBSERVATIONS, N_TIMESTEPS, N_TIMESTEPS, True)
    # Split data into train, validation, and test sets
    data_split = fx.split_data (features, labels, DATA_SPLITTING)
    # Create objects that are iterable over batches
    train = fx.DataSet (data_split['train_features'], data_split['train_labels'])
    validation = fx.DataSet (data_split['validation_features'], data_split['validation_labels'])
    test = fx.DataSet (data_split['test_features'], data_split['test_labels'])
    # Create tf object that contains all the datasets
    data_sets = base.Datasets (train=train, validation=validation, test=test)

    # Get the dimensions for in the placeholders
    features_dimension = features.shape[0]
    labels_dimension = labels.shape[0]
    n_timesteps = features.shape[2]

    # TF Graph Placeholders
    # Dimension 0 is the number of dimensions in the features and labels;
    # dimension 1 is the number of observations;
    # dimension 2 is the number of timesteps.
    x = tf.placeholder ("float", [features_dimension, None, n_timesteps])
    y = tf.placeholder ("float", [labels_dimension, None, n_timesteps])

    # Define weights
    weights = {'out': tf.Variable (tf.zeros ([N_HIDDEN, labels_dimension]))}
    biases = {'out': tf.Variable (tf.zeros ([labels_dimension]))}

    def RNN (x, weights, biases):
        # Prepare data shape to match `rnn` function requirements
        # Current data input shape: (features_dimension, n_observations, n_timesteps)
        # Permuting features_dimension and n_timesteps
        x = tf.transpose (x, [2, 1, 0])
        # Reshaping to (n_observations*n_timesteps, features_dimension) (we are removing the depth dimension with this)
        x = tf.reshape(x, [-1, features_dimension])
        # Split the previous 2D tensor to get a list of `n_timesteps` tensors of
        # shape (n_observations, features_dimension).
        x = tf.split (x, n_timesteps, 0)
        # Define a lstm cell with tensorflow
        lstm_cell = rnn.LSTMCell (N_HIDDEN, use_peepholes=True)
        # Get lstm cell output
        # outputs is a list of `n_timesteps` tensors with shape (n_observations, N_HIDDEN)
        outputs, states = rnn.static_rnn (lstm_cell, x, dtype=tf.float32)
        # Transform the list into a 3D tensor with dimensions (n_timesteps, n_observations, N_HIDDEN)
        outputs = tf.stack(outputs)
        # Linear activation
        def pred_fn(current_output):
            return tf.matmul(current_output, weights['out']) + biases['out']
        # Use tf.map_fn to apply pred_fn to each tensor in outputs, along dimension 0 (timestep dimension)
        pred = tf.map_fn(pred_fn, outputs)

        # Return pred with the same dimensions as the placeholder y
        # Current shape: (n_timesteps, n_observations, labels_dimension)
        # Required shape: (labels_dimension, n_observations, n_timesteps)
        # Permute n_timesteps and n_timesteps
        return tf.transpose(pred, [2, 1, 0])

    # Results from the RNN
    pred = RNN (x, weights, biases)
    cost = tf.reduce_mean (tf.square (pred - y))
    optimizer = tf.train.GradientDescentOptimizer (LEARNING_RATE).minimize (cost)
    # Evaluate model
    accuracy = tf.reduce_mean (tf.cast (tf.square (pred - y), "float"))
    # Initializing the variables
    init = tf.global_variables_initializer ()
    # Launch the graph
    with tf.Session () as sess:
        sess.run (init)
        step = 1
        # Keep training until reach max iterations
        while step * BATCH_SIZE < TRAINING_ITERS:
            batch_x, batch_y = data_sets.train.next_batch(BATCH_SIZE)
            # Run optimization op (backprop)
            sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
            if step % DISPLAY_STEP == 0:
                # Calculate batch accuracy
                acc = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y})
                # Calculate batch loss
                loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y})
                print("Iter " + str(step*BATCH_SIZE) + ", Minibatch Loss= " + \
                      "{:.6f}".format(loss) + ", Training Accuracy= " + \
                      "{:.5f}".format(acc))
            step += 1

        print ("Optimization Finished!")

        # Calculate accuracy for the test data
        test_features = data_sets.test.features
        test_labels = data_sets.test.labels
        print ("Testing Accuracy:", \
            sess.run (accuracy, feed_dict={x: test_features, y: test_labels}))

if __name__ == '__main__':
    main ()

现在我想在我的代码上实现提前停止，以避免过度拟合。实现这一目标最直接的方法是什么？ tensorflow上是否已经实现了什么？我认为tf.contrib.learn API可能允许我这样做，但我不确定如何将其特别应用于我的案例。

Answer 1

使用以下代码提前停止

import tensorflow as tf
class myCallback(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs={}):
    if(logs.get('accuracy')>0.8):
      print("\nReached 80% accuracy so cancelling training!")
      self.model.stop_training = True 
callbacks = myCallback()

model.fit(x_train, y_train, epochs=10, callbacks=[callbacks])

在Tensorflow上使用LSTM提前停止

1 个答案: