风控模型6大核心指标

2020-11-27  本文已影响0人  overad
# -*- coding: utf-8 -*-
"""
Created on Fri Nov 27 15:31:06 2020
"""

import numpy as np
import pandas as pd
import seaborn as sns

import matplotlib.pyplot as plt

n_sample = 100000

df_score = pd.DataFrame({
    'user_id':[u for u in range(n_sample)],
    'label':np.random.randint(2,size=n_sample),
    'score':900*np.random.random(size=n_sample),
    'term':20201+np.random.randint(5,size=n_sample)
    })

df_score.groupby('term').agg(total=('label','count'),
                             bad=('label','sum'),
                             bad_rate=('label','mean'))


#KS,GINI,AUC

from sklearn.metrics import roc_auc_score,roc_curve

def get_auc(ytrue,yprob):
    auc = roc_auc_score(ytrue,yprob)
    if auc < 0.5:
        auc = 1 - auc 
    return auc

def get_ks(ytrue,yprob):
    fpr,tpr,thr = roc_curve(ytrue,yprob)
    ks = max(abs(tpr - fpr))
    return ks

def get_gini(ytrue,yprob):
    auc = get_auc(ytrue, yprob)
    gini = 2 * auc - 1
    return gini

df_metrics = pd.DataFrame({
    'auc':df_score.groupby('term').apply(lambda x:get_auc(x['label'], x['score'])),
    'ks': df_score.groupby('term').apply(lambda x: get_ks(x['label'], x['score'])),
    'gini': df_score.groupby('term').apply(lambda x: get_gini(x['label'], x['score']))
    })

#PSI
df_score['score_bin'] = pd.cut(df_score['score'],[0,500,700,800,900])

df_total = pd.pivot_table(df_score,
                          values='user_id',
                          index='score_bin',
                          columns=['term'],
                          aggfunc="count",
                          margins=True)

df_ratio = df_total.div(df_total.iloc[-1,:],axis=1)

eps = np.finfo(np.float32).eps
lst_psi = list()
for idx in range(1,len(df_ratio.columns)-1):
    last,cur = df_ratio.iloc[0,-1:idx-1] + eps,df_ratio.iloc[0,-1:idx]+eps
    psi = sum((cur-last) * np.log(cur/last))
    lst_psi.append(psi)
    
df_ratio.append(pd.Series([np.nan]+lst_psi+[np.nan],
                          index=df_ratio.columns,
                          name='psi'))



#总人数比例和坏客户比例
df_total = pd.pivot_table(df_score,
                          values='user_id',
                          index='score_bin',
                          columns=['term'],
                          aggfunc='count',
                          margins=True)

df_ratio = df_total.div(df_total.iloc[-1,:],axis=1)

df_bad = pd.pivot_table(df_score[df_score['label']==1],
                        values='user_id',
                        index='score_bin',
                        columns=['term'],
                        aggfunc='count',
                        margins=True)

df_bad_rate = df_bad / df_total

#作图

colormap = sns.diverging_palette(130,20,as_cmap=True)
df_ratio.drop('All').T.plot(kind='bar',stacked=True,colormap=colormap)
plt.legend(bbox_to_anchor=(1.05,1),loc=2,borderaxespad=0.)

colormap=sns.diverging_palette(130,20,as_cmap=True)
df_bad_rate.drop('All').T.plot(kind='line',colormap=colormap)
plt.legend(bbox_to_anchor=(1.05,1),loc=2,borderaxespad=0.)

plt.show()


上一篇 下一篇

猜你喜欢

热点阅读