机器学习之LightGBM算法案例

2021-06-15  本文已影响0人  米陽

#!/usr/bin/python3

# -*- coding: utf-8 -*-

"""

@Time    : 2021/6/15 7:52

@Author  : Loden

@FileName: customer_default_demo.py

@Software: PyCharm

@desc    :

"""

import pandasas pd

from sklearn.model_selectionimport train_test_split

from lightgbm import LGBMClassifier

from sklearn.metricsimport accuracy_score

from sklearn.metricsimport roc_curve

import matplotlib.pyplotas plt

from sklearn.metricsimport roc_auc_score

from sklearn.model_selectionimport GridSearchCV

df = pd.read_excel(r'D:\python_data\MuffetTrader\data_file\客户信息及违约表现.xlsx')

# print(df.head(10))

# 提取特征变量和目标变量

X = df.drop(columns='是否违约')

Y = df['是否违约']

# 划分训练集和测试集

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=1)

# 模型训练和搭建

model = LGBMClassifier()

model.fit(X_train, Y_train)

# 模型预测与评估

y_pred = model.predict(X_test)

print(y_pred)

# 汇总预测值和实际值

a = pd.DataFrame()

a['预测值'] =list(y_pred)

a['实际值'] =list(Y_test)

print(a.head(10))

# 查看模型整体的预测准确度

score = accuracy_score(y_pred, Y_test)

print(score)

print(model.score(X_test, Y_test))

# 查看预测属于各个分类的概率

y_pred_proba = model.predict_proba(X_test)

fpr, tpr, thres = roc_curve(Y_test, y_pred_proba[:, 1])

plt.plot(fpr, tpr)

# plt.show()

# 计算模型的AUC值

score = roc_auc_score(Y_test.values, y_pred_proba[:, 1])

print(score)

# 筛选特征总最重要的变量

features = X.columns

importances = model.feature_importances_

importances_df = pd.DataFrame()

importances_df['特征名称'] = features

importances_df['特征重要性'] = importances

importances_df.sort_values('特征重要性', ascending=False)

print(importances_df)

# 模型参数调优

parameters = {'num_leaves': [10, 15, 31], 'n_estimators': [10, 20, 30], 'learning_rate': [0.05, 0.1, 0.2]}

model = LGBMClassifier()

grid_search = GridSearchCV(model, parameters, scoring='roc_auc', cv=5)

grid_search.fit(X_train, Y_train)

print('最优参数:')

print(grid_search.best_params_)

model = LGBMClassifier(num_leaves=10, learning_rate=0.2, n_estimators=10)

model.fit(X_train, Y_train)

y_pred_proba = model.predict_proba(X_test)

score = roc_auc_score(Y_test, y_pred_proba[:, 1])

print(score)

上一篇下一篇

猜你喜欢

热点阅读