python:使用Xgboost进行回归

2022-03-10  本文已影响0人  可能性之兽

主要是xgboost的回归
scikit-learn: machine learning in Python — scikit-learn 1.0.2 documentation
XGBoost Documentation — xgboost 1.5.2 documentation

视频

https://www.youtube.com/watch?v=OtD8wVaFm6E
XGBoost in Python from Start to Finish - YouTube

如何对回归结果进行评价

https://juejin.cn/post/6997563709157539847

针对单个指标使用的情况

image.png

针对多个指标配合使用的情况

image.png
!pip install xgboost

!pwd

一,分类问题

from sklearn.datasets import load_iris
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier
from xgboost import plot_importance
from matplotlib import pyplot as plt
iris = load_iris()

x,y=-iris.data,iris.target

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=0)

params={
    'booster':'gbtree',
    'objective':'multi:softmax',
    'gamma':0.1,
    'max_depth':5,
    'lambda':10,
    'subsample':0.7,
    'colsample_bytree':0.7,
    'min_child_weight':3,
    'eta':0.1,
    'seed':1000,
    'nthread':4,
    'num_class':3,
    'verbosity':0
}

plst=list(params.items())
plst
dtrain=xgb.DMatrix(x_train,label=y_train)
num_rounds=10
model=xgb.train(plst,dtrain,num_rounds)
model

dtest = xgb.DMatrix(x_test)
ans = model.predict(dtest)

ans

cnt1=0
cnt2=0
for i in range(len(y_test)):
    if y_test[i]==ans[i]:
        cnt1+=1
    else:
        cnt2+=1
print("Accuracy:",cnt1/(cnt1+cnt2))

plot_importance(model)
plt.show()

二,xgboost导入数据的方法


import xgboost as xgb
import numpy as np
import scipy
import pandas
data=np.random.randn(100,10)
label=np.random.randint(2,size=100)
dtrain=xgb.DMatrix(data,label=label)

scr=scipy.sparse.csr_matrix(data,(100,2)) ## 进行稀疏矩阵转换
dtrain = xgb.DMatrix(scr)
scr
data = pandas.DataFrame(np.arange(12).reshape((4,3)), columns=['a', 'b', 'c'])
label = pandas.DataFrame(np.random.randint(2, size=4))
dtrain = xgb.DMatrix(data, label=label)

三, xgboost回归问题

from sklearn import datasets
boston=datasets.load_boston()
features=boston.data[:,0:2]

target=boston.target

import xgboost as xgb
from xgboost import plot_importance
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test=train_test_split(features,target,test_size=0.2,random_state=0)
x_train.shape
x_train.ndim
len(x_train)

model=xgb.XGBRegressor(max_depth=3,n_estimators=100,learning_rate=0.1)
model.fit(x_train,y_train)

x_predicted=model.predict(x_test)
x_predicted

plot_importance(model)
plt.show()

案例实战

import os
os.listdir(os.getcwd())

https://archive.ics.uci.edu/ml/machine-learning-databases/concrete/compressive/Concrete_Data.xls

data=pd.read_excel('Concrete_Data.xls')
data.head()

data.rename(columns={"Concrete compressive strength(MPa, megapascals) ":'label'},inplace=True)
data.shape

data.columns
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(data.drop(['label'],axis=1),data['label'],test_size=0.2,random_state=0)

xgb_train=xgb.DMatrix(x_train,label=y_train)
xgb_test=xgb.DMatrix(x_test,label=y_test)

params={
    'booster':'gbtree',
    'objective':'reg:linear',
    'gamma':0.1,
    'max_depth':6,
    'lambda':10,
    'subsample':0.8,
    'eta':0.1
}

num_rounds=100
watchlist=[(xgb_train,'train'),(xgb_test,'test')]
model=xgb.train(params,xgb_train,num_rounds,watchlist)

model.save_model('0309testxgb.model')

Loadmodel

model=xgb.Booster(model_file='0309testxgb.model')
x_predicted=model.predict(xgb.DMatrix(x_test))


print(x_predicted)

上一篇 下一篇

猜你喜欢

热点阅读