Question

我正在练习如何使用来自 Kaggle 的这个数据集在单变量系列上创建 LSTM 模型：https://www.kaggle.com/sumanthvrao/daily-climate-time-series-data

我的问题是我无法准确预测温度，而且我的损失似乎无处不在。我尝试了多种方法，包括

确保时间序列数据是静止的
更改时间步长
更改超参数
使用堆叠的 LSTM 模型

我真的很好奇我的代码有什么问题，尽管我有一些假设：

我在预处理数据时出错
我错误地引入了平稳性
此数据集需要多变量方法

%tensorflow_version 2.x  # this line is not required unless you are in a notebook
 
import tensorflow as tf
from numpy import array
from numpy import argmax
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten

# preparing independent and dependent features
def prepare_data(timeseries_data, n_features):
    X, y =[],[]
    for i in range(len(timeseries_data)):
        # find the end of this pattern
        end_ix = i + n_features
        # check if we are beyond the sequence
        if end_ix > len(timeseries_data)-1:
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = timeseries_data[i:end_ix], timeseries_data[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

# preparing independent and dependent features
def prepare_x_input(timeseries_data, n_features):
    x = []
    for i in range(len(timeseries_data)):
        # find the end of this pattern
        end_ix = i + n_features
        # check if we are beyond the sequence
        if end_ix > len(timeseries_data):
            break
        # gather input and output parts of the pattern
        seq_x = timeseries_data[i:end_ix]
        x.append(seq_x)
        x = x[-1:]
        #remove non-stationerity
        #x = np.log(x)
    return np.array(x)

#read data and filter temperature column
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Weather Parameter/DailyDelhiClimateTrain.csv')
df.head()
temp_df = df.pop('meantemp')
plt.plot(temp_df)

#make data stationery
sta_temp_df = np.log(temp_df).diff()
plt.figure(figsize=(15,5))
plt.plot(sta_temp_df)
print(sta_temp_df)

time_step = 7
x, y = prepare_data(sta_temp_df, time_step)
n_features = 1
x = x.reshape((x.shape[0], x.shape[1], n_features))

model = Sequential()
model.add(LSTM(10, return_sequences=True, input_shape=(time_step, n_features)))
model.add(LSTM(10))
model.add(Dense(16, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1))
 
model.compile(optimizer='adam', loss='mse')
model.summary()

result = model.fit(x, y, epochs=800)

n_days = 113
pred_temp_df = list(temp_df)
test = sta_temp_df.copy()
sta_temp_df = list(sta_temp_df)
i = 0
 
while(i<n_days):
    x_input = prepare_x_input(sta_temp_df, time_step)
    print(x_input)
    x_input = x_input.reshape((1, time_step, n_features))
    #pass data into model
    yhat = model.predict(x_input, verbose=0)
    yhat.flatten
    print(yhat[0][0])
    sta_temp_df.append(yhat[0][0])
    i = i+1

sta_temp_df[0] = np.log(temp_df[0])
cum_temp_df = np.exp(np.cumsum(sta_temp_df))
print(cum_temp_df)

我的代码如上所示。如果有人能指出我在这里做错了什么，我将不胜感激！

时间序列 LSTM 模型错误预测

0 个答案: