Question

我想预测未来数据的行为。我的数据x和y的值约为1000个值。我想预测值y [1001]。这是我的例子。

from numpy.random import randn
from numpy.random import seed
from numpy import sqrt
import numpy
from numpy import sum as arraysum
from scipy.stats import linregress
from matplotlib import pyplot

seed(1)
x = 20 * randn(1000) + 100
print(numpy.size(x))
y = x + (10 * randn(1000) + 50)
print(numpy.size(y))
# fit linear regression model
b1, b0, r_value, p_value, std_err = linregress(x, y)
# make predictions
yhat = b0 + b1 * x
# define new input, expected value and prediction
x_in = x[1001]
y_out = y[1001]
yhat_out = yhat[1001]
# estimate stdev of yhat
sum_errs = arraysum((y - yhat)**2)
stdev = sqrt(1/(len(y)-2) * sum_errs)
# calculate prediction interval
interval = 1.96 * stdev
print('Prediction Interval: %.3f' % interval)
lower, upper = y_out - interval, y_out + interval
print('95%% likelihood that the true value is between %.3f and %.3f' % (lower, upper))
print('True value: %.3f' % yhat_out)
# plot dataset and prediction with interval
pyplot.scatter(x, y)
pyplot.plot(x, yhat, color='red')
pyplot.errorbar(x_in, yhat_out, yerr=interval, color='black', fmt='o')
pyplot.show()

当我尝试这样做时，它给了我这个错误。

     x_in = x[1001]
IndexError: index 1001 is out of bounds for axis 0 with size 1000

我的目标是预测未来数据的行为并通过绘制误差线来评估它。我看到了这个示例how do you create a linear regression forecast on time series data in python，但我不知道如何将其应用于我的数据。我发现可以使用ARIMA模型。请问我该怎么做？

Answer 1

x = 20 * randn(1000) + 100

^在这里，您将创建仅具有1000个值的输入向量X。

y = x + (10 * randn(1000) + 50)

^，在这里您又创建了仅具有1000个值的输出向量y。

因此，当您执行x_in = x[1001]时，是指输入向量中不存在的元素，因为它仅包含1000个元素。

一个快速解决方法是

seed(1)
x = 20 * randn(1001) + 100
print(numpy.size(x))
y = x + (10 * randn(1001) + 50)
print(numpy.size(y))
# fit linear regression model
b1, b0, r_value, p_value, std_err = linregress(x[:1000], y[:1000])
# make predictions
yhat = b0 + b1 * x
# define new input, expected value and prediction
x_in = x[1000]
y_out = y[1000]

Answer 2

这是图形化多项式拟合器的代码，以使用numpy.polyfit（）执行拟合和使用mu，py.polyval（）预测值来拟合一阶多项式。您可以通过更改代码顶部的“ polynomialOrder = 1”行来尝试不同的多项式阶数。

import numpy, matplotlib
import matplotlib.pyplot as plt

xData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.0, 6.6, 7.7, 0.0])
yData = numpy.array([1.1, 20.2, 30.3, 40.4, 50.0, 60.6, 70.7, 0.1])

polynomialOrder = 1 # example straight line

# curve fit the test data
fittedParameters = numpy.polyfit(xData, yData, polynomialOrder)
print('Fitted Parameters:', fittedParameters)

modelPredictions = numpy.polyval(fittedParameters, xData)
absError = modelPredictions - yData

SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)

print()


##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
    f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
    axes = f.add_subplot(111)

    # first the raw data as a scatter plot
    axes.plot(xData, yData,  'D')

    # create data for the fitted equation plot
    xModel = numpy.linspace(min(xData), max(xData))
    yModel = numpy.polyval(fittedParameters, xModel)

    # now the model as a line plot
    axes.plot(xModel, yModel)

    axes.set_xlabel('X Data') # X axis data label
    axes.set_ylabel('Y Data') # Y axis data label

    plt.show()
    plt.close('all') # clean up after using pyplot

graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)

如何通过线性回归预测值？

2 个答案: