2018-10-20
2018-10-20 本文已影响2人
辘轳鹿鹿
Python与数据挖掘(二)——逻辑回归
2、算法实现
import pandas as pd
from sklearn.linear_model import LogisticRegression, RandomizedLogisticRegreesion
from sklearn.cross_validation import train_test_split
#读入EXCEL文件,声明编码方式,默认方式是ASCII
data=pd.read_csv('E/Python/LogisticRegression.csv',encoding='utf-8')
将类别型变量进行独热编码,使字符串变为数字“0”,“1”
data_dum=pd.get_dummies(data,prefix='rank',columns=['rank'],drop_first=True)
#切分训练集和测试集
X_train,X_test, y_train,y_test=train_test_split(data_dum.ix[:, 1:],data_dum.ix[:,0],test_size=.1,random_state=520)
lr=LogisticRegression()
lr.fit(X_train,y_train)