数据蛙数据分析每周作业

常用线性回归算法类库简介

2019-05-23  本文已影响0人  小T数据站

常用线性回归算法类库简介:

此部分参考文章:scikit-learn(sklearn)线性回归算法类库介绍

数据集来源于一个循环发电厂,共有9568个样本数据,每个数据有5列,分别是AT(温度),V(压力),AP(湿度),RH(压强)和PE(输出电力)。

import numpy as np
import pandas as pd
​
data = pd.read_excel('Folds5x2_pp.xlsx')
data.head()
data.shape
X = data[['AT','V','AP','RH']]
Y = data[['PE']]
# 划分训练集和测试集
from sklearn.model_selection import train_test_split
train_X,test_X,train_Y,test_Y = train_test_split(X,Y,test_size = 0.3,random_state = 1)
​
# 拟合训练集
from sklearn.linear_model import LinearRegression
linreg = LinearRegression()
linreg.fit(train_X,train_Y)
# 打印模型系数
print(linreg.intercept_)
print(linreg.coef_)
​
# 评价模型好坏
from sklearn import metrics
test_Y_pred = linreg.predict(test_X)
print("测试集MSE:",metrics.mean_squared_error(test_Y,test_Y_pred))
print("测试集RMSE:",np.sqrt(metrics.mean_squared_error(test_Y,test_Y_pred)))
​
# 交叉验证持续优化模型
from sklearn.model_selection import cross_val_predict
Y_pred = cross_val_predict(linreg,X,Y,cv = 10)
print("10折交叉验证MSE:",metrics.mean_squared_error(Y,Y_pred))
print("10折交叉验证RMSE:",np.sqrt(metrics.mean_squared_error(Y,Y_pred)))
​
# 加入正则化项
​
## 加入L2正则化
from sklearn.linear_model import RidgeCV
# 在初始化RidgrCV类时,提供一组备选的α值,RidgeCV类会帮我们选择一个合适的α值
ridgecv = RidgeCV(alphas = [0.01,0.1,0.5,1,3,5,7,10,20,100],cv = 5)
# 拟合训练集
ridgecv.fit(train_X,train_Y)
# 打印最优α值
print("最优的alpha值:",ridgecv.alpha_)
# 打印模型的系数
print(ridgecv.intercept_)
print(ridgecv.coef_)
# 评价模型好坏
test_Y_pred = ridgecv.predict(test_X)
print("测试集MSE:",metrics.mean_squared_error(test_Y,test_Y_pred))
print("测试集RMSE:",np.sqrt(metrics.mean_squared_error(test_Y,test_Y_pred)))
​
## 加入L1正则化
### 坐标轴下降法
from sklearn.linear_model import LassoCV
lassocv = LassoCV(alphas = [0.01,0.1,0.5,1,3,5,7,10,20,100],cv = 5)
# 拟合训练集
lassocv.fit(train_X,train_Y.values.ravel())
print("最优的α值:",lassocv.alpha_)
print(lassocv.intercept_)
print(lassocv.coef_)
# 评价模型好坏
test_Y_pred = lassocv.predict(test_X)
print("测试集MSE:",metrics.mean_squared_error(test_Y,test_Y_pred))
print("测试集RMSE:",np.sqrt(metrics.mean_squared_error(test_Y,test_Y_pred)))
​
### 最小回归法
from sklearn.linear_model import LassoLarsCV
lassoLarscv = LassoLarsCV(cv = 5)
lassoLarscv.fit(train_X,train_Y.values.ravel())
print(lassoLarscv.intercept_)
print(lassoLarscv.coef_)
test_Y_pred = lassoLarscv.predict(test_X)
print("测试集MSE:",metrics.mean_squared_error(test_Y,test_Y_pred))
print("测试集RMSE:",np.sqrt(metrics.mean_squared_error(test_Y,test_Y_pred)))
​
## Lasso和Ridge中庸化的产物:ElasticNet
from sklearn.linear_model import ElasticNet
# 在初始化ElasticNet类时,指定超参数α和ρ,默认值分别是1.0和0.5
elasticNet = ElasticNet(alpha = 1.0,l1_ratio = 0.5)
elasticNet.fit(train_X,train_Y)
test_Y_pred = elasticNet.predict(test_X)
print("测试集MSE:",metrics.mean_squared_error(test_Y,test_Y_pred))
print("测试集RMSE:",np.sqrt(metrics.mean_squared_error(test_Y,test_Y_pred)))
**************************
from sklearn.linear_model import ElasticNetCV
elasticNetCV = ElasticNetCV(l1_ratio = 0.7,alphas = [0.01,0.1,0.5,1,3,5,7,10,20,100],cv = 5)
elasticNetCV.fit(train_X,train_Y.values.ravel())
print("最优的alpha值:",elasticNetCV.alpha_)
print(elasticNetCV.intercept_)
print(elasticNetCV.coef_)
test_Y_pred = elasticNetCV.predict(test_X)
print("测试集MSE:",metrics.mean_squared_error(test_Y,test_Y_pred))
print("测试集RMSE:",np.sqrt(metrics.mean_squared_error(test_Y,test_Y_pred)))
上一篇 下一篇

猜你喜欢

热点阅读