从 7 天更改为 30 天时,时间序列预测不起作用

时间:2021-06-05 12:01:37

标签: python pandas tensorflow machine-learning

我一直在关注如何使用多通道 CNN 进行多步时间序列预测的指南:Guide

我使用了不同的数据并试图预测 30 天而不是 7 天。

出于某种原因,其中一个函数出现“元组超出范围”错误。这不是我运行代码 7 天时发生的事情,这表明我所做的更改是我认为的问题吗?我尝试了几次修复,但没有任何效果。我可以看到,第二次通过方法 forecast() 时,它打印了历史形状 (31,),这让我很困惑,第一次打印的是 (30, 74, 7)。它应该改变形状吗?除非我错过了,否则本教程不会提及或显示这一点。为什么会发生这种情况,我该如何解决?

错误如下: error

我的代码如下:

# split a univariate dataset into train/test sets
def split_dataset(data):
  # split into standard weeks

  print(len(data))

  train, test = data[1:-77], data[-77:-6]#328
  # restructure into windows of weekly/monthly data
  print("Pre-split: ")
  print(train.shape)
  max_chunks = 30
  max_nb_value_1 = len(train)//30 # euclidean division : a = bq+r with r < q, this means that q (max_nb_value here) is the maximum number of element that we can take
  max_nb_value_2 =len(test)//30
  train = array([train[i:i+max_nb_value_1] for i in range(0, max_chunks*max_nb_value_1, max_nb_value_1)])
  test = array([test[i:i+max_nb_value_2] for i in range(0, max_chunks*max_nb_value_2, max_nb_value_2)])
  print("Post-split: ")
  print(train.shape)
  print(test.shape)
  return train, test

# evaluate one or more weekly forecasts against expected values
def evaluate_forecasts(actual, predicted):
    scores = list()
    # calculate an RMSE score for each day
    for i in range(actual.shape[1]):
        # calculate mse
        mse = mean_squared_error(actual[:, i], predicted[:, i])
        # calculate rmse
        rmse = sqrt(mse)
        # store
        scores.append(rmse)
    # calculate overall RMSE
    s = 0
    for row in range(actual.shape[0]):
        for col in range(actual.shape[1]):
            s += (actual[row, col] - predicted[row, col])**2
    score = sqrt(s / (actual.shape[0] * actual.shape[1]))
    return score, scores

# summarize scores
def summarize_scores(name, score, scores):
    s_scores = ', '.join(['%.1f' % s for s in scores])
    print('%s: [%.3f] %s' % (name, score, s_scores))

# convert history into inputs and outputs
def to_supervised(train, n_input, n_out=30):
    # flatten data
    data = train.reshape((train.shape[0]*train.shape[1], train.shape[2]))
    X, y = list(), list()
    in_start = 0
    # step over the entire history one time step at a time
    for _ in range(len(data)):
        # define the end of the input sequence
        in_end = in_start + n_input
        out_end = in_end + n_out
        # ensure we have enough data for this instance
        if out_end <= len(data):
      #This differs from univariate. Here we make sure to take the whole dataset.
            X.append(data[in_start:in_end, :])
            y.append(data[in_end:out_end, 0])
        # move along one time step
        in_start += 1
    return array(X), array(y)

# train the model
def build_model(train, n_input):
    # prepare data
    train_x, train_y = to_supervised(train, n_input)
    # define parameters
    verbose, epochs, batch_size = 0, 70, 16
    n_timesteps, n_features, n_outputs = train_x.shape[1], train_x.shape[2], train_y.shape[1]
    # define model
    model = Sequential()
    model.add(Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(n_timesteps,n_features)))
    model.add(Conv1D(filters=32, kernel_size=3, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(filters=16, kernel_size=3, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(100, activation='relu'))
    model.add(Dense(n_outputs))
    model.compile(loss='mse', optimizer='adam')
    # fit network
    model.fit(train_x, train_y, epochs=epochs, batch_size=batch_size, verbose=verbose)
    return model

# Make a forecast.
def forecast(model, history, n_input):
  # flatten data
  data = array(history)
  print("History shape in forecast(): ")
  print(data.shape)
  data = data.reshape((data.shape[0]*data.shape[1], data.shape[2]))
  # retrieve last observations for input data
  # For multivariate, make sure to use all features.
  input_x = data[-n_input:, :]
  # reshape into [1, n_input, 1]
  # We need to change the shape as well to take all features.
  input_x = input_x.reshape((1, input_x.shape[0], input_x.shape[1]))
  # forecast the next week
  yhat = model.predict(input_x, verbose=0)
  # we only want the vector forecast
  yhat = yhat[0]
  return that

# evaluate a single model
def evaluate_model(train, test, n_input):
  # fit model
  model = build_model(train, n_input)
  # history is a list of weekly data
  history = [x for x in train]
  # walk-forward validation over each week
  predictions = list()
  for i in range(len(test)):
    # predict the week
    yhat_sequence = forecast(model, history, n_input)
    # store the predictions
    predictions.append(yhat_sequence)
    # get real observation and add to history for predicting the next week
    history.append(test[i, :])
  # evaluate predictions days for each week
  predictions = array(predictions)
  score, scores = evaluate_forecasts(test[:, :, 0], predictions)
  return score, scores

# IMPORT & NORMALIZE DATA
URL = 'https://raw.githubusercontent.com/victordahl/dataset/master/MSFT_10yrs.csv'
#URL = 'https://raw.githubusercontent.com/victordahl/dataset/master/MSFT_10yrs_open.csv'
csvfile = pd.read_csv(URL, header=0, infer_datetime_format=True, parse_dates=['Date'], index_col=['Date'])
#print("cvs type", type(csvfile))

#print(csvfile.iloc[:, 1:]) 
min_max_scaler = preprocessing.MinMaxScaler()

# Normalize last five columns (excluding date in first columns)
x_scaled = min_max_scaler.fit_transform(csvfile.iloc[:, 0:])
df = pd.DataFrame(x_scaled)
# Rename all columns
print(df)
df.columns = ["Open","High","Low","Close","Volume"]

# Derived features
short_SMA = 50
long_SMA = 200
df["50SMA"] = df["Open"].rolling(window=short_SMA).mean()
df["200SMA"] = df["Open"].rolling(window=long_SMA).mean()
# Trim head of dataset to remove the NaN days in SMA feature head
df = df.iloc[long_SMA:len(df)]


train, test = split_dataset(df.values)
# evaluate model and get scores
n_input = 30
score, scores = evaluate_model(train, test, n_input)
# summarize scores
summarize_scores('cnn', score, scores)
# plot scores
# Plot
days = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14','15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30']
pyplot.title("$MSFT prediction")
#pyplot.plot(days, scores, marker='o', label='Predicted close')
pyplot.plot(days, scores, marker='o', label='Predicted close')
pyplot.legend()
pyplot.show()

0 个答案:

没有答案
相关问题