XGBOOST查看特征分数
2017-05-09 本文已影响0人
Yuu_CX
#y = bst.predict(sub_trainning_data)
feature_score = bst.get_fscore()
feature_score = sorted(feature_score.items(), key=lambda x:x[1],reverse = True)
fs = []
for (key,value) in feature_score:
fs.append("{0},{1}\n".format(key,value))
with open('../sub/submission.csv','w') as f:
f.writelines("feature,fscore\n")
f.writelines(fs)
df = pd.DataFrame(feature_score , columns=['feature', 'fscore'])
df['fscore'] = df['fscore'] / df['fscore'].sum()
featp = df.plot(kind='barh', x='feature', y='fscore', legend=False, figsize=(6, 10))
plt.title('XGBoost Feature Importance')
plt.xlabel('relative importance')