Python3入门机器学习 - 线性回归与knn算法处理bost
2018-02-26 本文已影响21人
c6ad47dbfc82
简单线性回归
最小二乘法实现原理
使用最小二乘法计算a、b的值,实现线性回归的拟合
# _*_ encoding:utf-8 _*_
import numpy as np
class SimpleLinearRegression1: //该类使用for循环方法计算a、b值,效率较低
def __init__(self):
self.a_ = None
self.b_ = None
def fit(self,X_train,y_train):
X_mean = np.mean(X_train)
y_mean = np.mean(y_train)
num = 0.0
d = 0.0
for (x,y) in zip(X_train,y_train):
num += (x-X_mean)*(y-y_mean)
d += (x-X_mean)**2
self.a_ = num/d
self.b_ = y_mean - self.a_*X_mean
def predict(self,X_test):
return np.array([self._predict(x) for x in X_test ])
def _predict(self,x):
return self.a_*x+self.b_
def __repr__(self):
return "SimpleLinearRegression1()"
class SimpleLinearRegression2: // 该类使用向量乘积方法计算a、b值,效率较高
def __init__(self):
self.a_ = None
self.b_ = None
def fit(self, X_train, y_train):
X_mean = np.mean(X_train)
y_mean = np.mean(y_train)
num = (X_train-X_mean).dot(y_train-y_mean)
d = (X_train-X_mean).dot(X_train-X_mean)
self.a_ = num / d
self.b_ = y_mean - self.a_ * X_mean
def predict(self, X_test):
return np.array([self._predict(x) for x in X_test])
def _predict(self, x):
return self.a_ * x + self.b_
def __repr__(self):
return "SimpleLinearRegression2()"
测试
import numpy as np
from matplotlib import pyplot
x = np.random.random(size=100)
y = 3.0*x+4.0+np.random.normal(size=100)
%run MyScripts/SimpleLinearRegression.py
reg1 = SimpleLinearRegression1()
reg2 = SimpleLinearRegression2()
%timeit reg1.fit(x,y)
%timeit reg2.fit(x,y)
y1 = reg1.predict(x)
y2 = reg2.predict(x)
pyplot.scatter(x,y)
pyplot.plot(x,y1,color="r",alpha=0.5)
pyplot.plot(x,y2,color='g')
简单线性回归处理boston数据集
仅以boston数据集的第六个特征作为x轴衡量指标
MSE
mse = np.sum((y_predict-y_test)**2)/len(y_test)
RMSE
rmse = sqrt(mse)
MAE
mae = np.sum(np.absolute(y_predict-y_test))/len(y_test)
R Square
1-mean_squared_error(y_test,y_predict)/np.var(y_test)
多元线性回归模型
# _*_ encoding:utf-8 _*_
import numpy as np
from sklearn.metrics import r2_score
class LinearRegression:
def __init__(self):
self.coef_ = None
self.interception_ = None
self._theta = None
def fit_normal(self,X_train,y_train):
X_b = np.hstack([np.ones((len(X_train),1)),X_train])
self._theta = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y_train)
self.interception_ = self._theta[0]
self.coef_ = self._theta[1:]
return self
def predict(self,X_predict):
X_b = np.hstack([np.ones((len(X_predict),1)),X_predict])
return X_b.dot(self._theta)
def score(self,X_test,y_test):
return r2_score(y_test,self.predict(X_test))
def __repr__(self):
return "LinearRegression()"
KNN算法处理回归问题
knn_reg = KNeighborsRegressor()
params=[
{
'weights':['uniform'],
'n_neighbors':[i for i in range(1,11)]
},
{
'weights':['distance'],
'n_neighbors':[i for i in range(1,11)],
'p':[i for i in range(1,6)]
}
]
grid_search = GridSearchCV(knn_reg,params,n_jobs=-1,verbose=1)
grid_search.fit(X_train,y_train)
-
grid_search.best_params_
{'n_neighbors': 5, 'p': 1, 'weights': 'distance'} -
grid_search.best_score_
0.634093080186858 -
grid_search.best_estimator_.score(X_test,y_test)
0.7044357727037996