为什么我的机器学习多项式回归程序不起作用?

时间:2021-03-12 07:42:44

标签: python machine-learning scikit-learn polynomial-approximations

我正在编写一个程序,该程序将生成给定温度数据的多项式模型。您可以在下面的屏幕截图中看到我的程序以及我的数据:

from google.colab import drive
import os as os             # for moving around the operating system
import matplotlib.pyplot as plt
import numpy as np
import pickle
import calendar

drive.mount("/content/drive", force_remount=True)
fileName = "/content/drive/My Drive/11th Grade/Independent Study/MBCP2018-2019_iButton1.csv"

## remove excess lines of header data
rawData = open(fileName, 'r');
data = rawData.readlines();
rawData.close()

data = data[15:len(data)];

# ----------------------------------------------------------------------------------------------------------- #
dateTime = [];
unit = [];
T = [];


unit = [];
T = []; # temperature
year = [];
month = [];
day = [];           # convert the day into a decimal day value
hour = [];
minute = [];
second = [];
td = []; # Decimal value of year


## use this for converting hours minutes seconds to days https://matplotlib.org/api/dates_api.html

i = 0;

while i < len(data):
    
    dateTimeTemp, unitTemp, T_Temp   = data[i].split(',');
    monthDayYear, time_Temp, AMPM_Temp = dateTimeTemp.split(' ');
    month_Temp,day_Temp,year_Temp = monthDayYear.split('/');
    hour_Temp,minute_Temp,second_Temp = time_Temp.split(':');

    ## clean up the strings to be processed by np.datetime64 - a better solution has got to exist in python...
    if int(month_Temp) < 10:
        month_Temp = '0' + month_Temp;
    if int(day_Temp) < 10:
        day_Temp = '0' + day_Temp;
    if (int(hour_Temp) < 10) & (int(hour_Temp) != 0):
        hour_Temp = '0' + hour_Temp;


    # convert the clock to 24hr
    if AMPM_Temp == 'PM':
        if int(hour_Temp) == 12:
            hour_Temp = '12';
        else:
            hour_Temp = str(int(hour_Temp)+12);
    elif int(hour_Temp) == 12:      ## when it's in the mo'nin' and clock should read 00:00
        hour_Temp = '00';

    dateTime.append(dateTimeTemp);
    year.append(int('20'+year_Temp));
    month.append(int(month_Temp));
    day.append(int(day_Temp));
    hour.append(int(hour_Temp));
    minute.append(int(minute_Temp));
    second.append(int(second_Temp));
    
    ## time difference from the beginning of the year.
    td_Temp = np.datetime64('20' + year_Temp + '-' + month_Temp + '-' + day_Temp + 'T' + hour_Temp + ':' + minute_Temp + ':' + second_Temp)- np.datetime64('20' + year_Temp + '-01-01T00:00:00');
    td_Temp = td_Temp / np.timedelta64(1,'s'); ## converts this into a decimal seconds value
    
    # converts this into a decimal year value.
    if calendar.isleap(year[i]):
        td_Temp = td_Temp/86400/366;
    else:
        td_Temp = td_Temp/86400/365;

    td.append(td_Temp);
    unit.append(unitTemp);
    T.append(float(T_Temp));

    i = i + 1;

np.asarray(td);
np.asarray(T);
np.asarray(year);
td = np.add(td,year);

plt.plot(td,T)
plt.xlabel('Decimal year')
plt.ylabel('Temperature (C)')
plt.xlim(left=2018.5464,right=2019.5395)

# ------------------------------------------------------------------------------------- #

# Machine Learning Stuff! #

# SPLICING WORK #
td = np.round(td,4)

# only have to change these two parameters, but make sure these values exist in the csv file
leftBound = 2018.5464
rightBound = 2019.5395

# returns as a tuple
xLowerBound = np.where(td == leftBound)
xUpperBound = np.where(td == rightBound)

# splicing the array
td = td[xLowerBound[0][0]:xUpperBound[0][0]]
T = T[xLowerBound[0][0]:xUpperBound[0][0]]

# MACHINE LEARNING ALGORITHM #
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X1,X2,Y1,Y2 = train_test_split(td, T, random_state=0, train_size=0.5)
X1 = X1.reshape(-1,1)
X2 = X2.reshape(-1,1)

print("X1: " + str(X1))
print("X2: " + str(X2))
print("Y1: " + str(Y1))
print("Y2: " + str(Y2))

degree=2
polyreg=make_pipeline(PolynomialFeatures(degree),LinearRegression())
polyreg.fit(X1,Y1)

plt.figure(1)
plt.scatter(X1,Y1)
plt.plot(X2,polyreg.predict(X2),color="black")
plt.title("Polynomial regression with degree "+str(degree))
plt.show()

然而,当我生成模型并改变模型的度数时,我得到了这些非常潦草、时髦的图表。问题是什么?我真的很感激有一个解决方案,因为我已经被这个问题困扰了一个星期。

Data

Degree 1

Degree 2

0 个答案:

没有答案