2020-07-17 暑期学习日更计划 (李宏毅2020-hw1

2020-07-16  本文已影响0人  Reza_

ML2020spring - hw1

该作业kaggle地址:ML2020spring - hw1 Regression - PM2.5 Prediction

pytorch入门系列,把之前用numpy实现的神经网络部分改成了pytorch,数据预处理的方法没有改。

数据预处理部分

import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

data=pd.read_csv('./lhy_DL_Hw/train.csv',encoding='big5')
data=data.iloc[:,3:]
data[data=='NR']=0
raw_data=data.to_numpy()

month_data={}
for month in range(12):
    sample=np.empty([18,480])
    for day in range(20):
        sample[:, day * 24 : (day + 1) * 24] = raw_data[18 * (20 * month + day) : 18 * (20 * month + day + 1), :]
    month_data[month]=sample

x=np.empty([12*471,18*9],dtype=float)
y=np.empty([12*471,1],dtype=float)
for month in range(12):
    for day in range(20):
        for hour in range(24):
            if day == 19 and hour > 14:
                continue
            x[month * 471 + day * 24 + hour, :] = month_data[month][:, day * 24 + hour: day * 24 + hour + 9].reshape(1,
                                                                                                                     -1)
            # 拉平month_data,每次从month_data中获取9个特征,赋值到x中
            y[month * 471 + day * 24 + hour, 0] = month_data[month][9, day * 24 + hour + 9]
            # y获取month_data的第九行,PM2.5的值作为标签值

mean_x = np.mean(x, axis = 0) #18 * 9
std_x = np.std(x, axis = 0) #18 * 9
mean_y=np.mean(y,axis=0)
std_y=np.std(y,axis=0)
#Z-Score标准化

for i in range(len(x)): #12 * 471
    for j in range(len(x[0])): #18 * 9
        if std_x[j] != 0:
            x[i][j] = (x[i][j] - mean_x[j]) / std_x[j]

搭建网络部分

x_train=torch.from_numpy(x).float()
y_train=torch.from_numpy(y).float()
print(x_train.shape,y_train.shape)


learning_rate=0.03
input_size=162
output_size=1
num_epoches=4000
batch_size=1


model = nn.Linear(input_size, output_size)
#只有一层的线性模型

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
loss_list=[]

开始训练

for epoch in range(num_epoches):

    # print('inputs=',inputs.shape,'label=',labels.shape)
    # print(inputs,labels)
    optimizer.zero_grad()
    outputs=model(x_train)
    loss=criterion(outputs,y_train)
    loss=torch.sqrt(loss)
    loss.backward()
    optimizer.step()
    loss_list.append(loss.item())

    if epoch%100==0:
        print(epoch,loss.item())

plt.plot(np.arange(0,num_epoches,1),loss_list[:num_epoches:1])
plt.show()
print("Finsh Training")

开始测试:

testdata = pd.read_csv('./lhy_DL_Hw/test.csv', header = None, encoding = 'big5')
test_data = testdata.iloc[:, 2:]
test_data[test_data == 'NR'] = 0
test_data = test_data.to_numpy()
test_x = np.empty([240, 18*9], dtype = float)
for i in range(240):
    test_x[i, :] = test_data[18 * i: 18* (i + 1), :].reshape(1, -1)
#标准化
for i in range(len(test_x)):
    for j in range(len(test_x[0])):
        if std_x[j] != 0:
            test_x[i][j] = (test_x[i][j] - mean_x[j]) / std_x[j]

test_x=torch.from_numpy(test_x).float()
test_outputs=model(test_x)

把测试集中得到的值写入到csv文件中

import csv
with open('submit.csv', mode='w', newline='') as submit_file:
    csv_writer = csv.writer(submit_file)
    header = ['id', 'value']
    # print(header)
    csv_writer.writerow(header)
    for i in range(240):
        row = ['id_' + str(i), test_outputs[i][0].item()]
        csv_writer.writerow(row)
        # print(row)

线性回归模型实现!

上一篇 下一篇

猜你喜欢

热点阅读