Commit bd18c3b0 authored by 王家华's avatar 王家华

bug修正

parent 589bfcb3
This diff is collapsed.
......@@ -26,7 +26,7 @@ def topN_feature_importance(model, clf, title="untitled", save_path='./mvp/plots
model.plot_importance(clf, max_num_features=topN)
plt.title("Feature Importances")
path = save_path + title + " featureImportance.png"
path = save_path + title + "_featureImportance.png"
plt.savefig(path)
plt.show()
return path
......@@ -67,13 +67,14 @@ def plot_table_list(datalist, auc, datalist_description=None, title='untitled',
tab_df = []
if tab_df_list is None:
for data in datalist:
data = data.fillna(-1)
tab_df.append(
pd.Series(data.index).astype(str).map(lambda x: x.replace(' ', '').replace('0.', '.')).tolist())
tab_df.append(
pd.Series(data.values).astype(str).map(lambda x: x.replace(' ', '').replace('0.', '.')).tolist())
# validate tab_rows
if tab_rows is None:
table_rows.append('index');
table_rows.append('index')
table_rows.append('values')
else:
# tab_rows was given by
......@@ -113,12 +114,11 @@ def plot_table_list(datalist, auc, datalist_description=None, title='untitled',
if saved_path is not None:
plt.savefig(saved_path + title + ".png")
plt.show()
return 1
return saved_path + title + ".png"
def plot_table_df(dataset, auc, title='untitled', X_label=None, y_label=None,
def plot_table_df(dataset, auc=None, title='untitled', X_label=None, y_label=None,
tab_df=None, plot_tab=True, saved_path=None):
print(tab_df)
'''
instructions : visualization of pivot with single dataframe
Params :
......@@ -132,7 +132,24 @@ def plot_table_df(dataset, auc, title='untitled', X_label=None, y_label=None,
'''
fig, axs = plt.subplots(1, 1, figsize=(13, 9), linewidth=0.1)
if isinstance(dataset, pd.DataFrame):
table_rows = dataset.columns
else:
axs.plot(range(len(dataset.index)), dataset.values)
fig.subplots_adjust(bottom=0.2)
plt.grid()
if y_label is not None:
plt.ylabel(y_label)
if X_label is not None:
plt.xlabel(X_label)
# plt.vlines(xrange(len(cols))0],y,color='lightgrey',linestyle='--')
plt.title(title)
if saved_path is not None:
plt.savefig(saved_path + title + ".png")
plt.show()
return saved_path + title + ".png"
table_cols = pd.Series(dataset.index).astype(str).map(lambda x: x.replace(' ', '')).map(
lambda x : x.replace('0.', '.'))
......@@ -140,7 +157,10 @@ def plot_table_df(dataset, auc, title='untitled', X_label=None, y_label=None,
for i in range(len(table_rows)):
x = range(len(table_cols))
y = dataset.iloc[:, i]
if auc != None:
axs.plot(x, y, label=str(table_rows[i]) + ' AUC: ' + str(auc[i]))
else:
axs.plot(x, y, label=str(table_rows[i]))
# if table should be plot
if plot_tab:
if tab_df is None:
......@@ -158,7 +178,7 @@ def plot_table_df(dataset, auc, title='untitled', X_label=None, y_label=None,
plt.xticks([])
# otherwise, nothing to do here
the_table.auto_set_font_size(False)
the_table.set_fontsize(9)
the_table.set_fontsize(8)
fig.subplots_adjust(bottom=0.2)
plt.grid()
if y_label is not None:
......@@ -171,7 +191,7 @@ def plot_table_df(dataset, auc, title='untitled', X_label=None, y_label=None,
if saved_path is not None:
plt.savefig(saved_path + title + ".png")
plt.show()
return 1
return saved_path + title + ".png"
......
......@@ -22,8 +22,6 @@ score = 'score'
prediction = 'predict'
############################
# 备份df_sample
#df_sample.to_csv(str(datetime.date.today())+"dhb_samples.xlsx")
......@@ -61,15 +59,24 @@ predictions ,test_auc = lightgbm.predict(lgbm,df_test,dhb.features,target)
# 把新的预测结果加入test
df_test[prediction] = predictions
####### allocator cache ############
applied_from = {'1,214,217,198': '内部', '333': '融360', '159537': '360金融'}
applied_type = {'1,2':'首贷','1,2,3':'全量客群','1':'首申','2':'复申','3':'复贷'}
####################################
### report
import os
os.chdir("E:/bla/model_mvp/")
# plot feature importance
topnfeat_path = matplot.topN_feature_importance(lgb, lgbm, title="untitled", save_path='./mvp/plots/', topN=20)
topnfeat_path = matplot.topN_feature_importance(lgb, lgbm, title="untitled", save_path='./mvp/plots/cache/', topN=20)
importanct_feat = pd.DataFrame({
'column': dhb.features,
'importance': lgbm.feature_importance(),
}).sort_values(by='importance',ascending=False).column.tolist()[:3]
# report file
report_path = "E:/bla/model_mvp/"
......@@ -91,21 +98,23 @@ document.add_picture(topnfeat_path)
document.add_paragraph('univar_chart')
# 遍历目标features画出univarchart
for i in dhb.features[:3]:
for i in importanct_feat:
univar_train = datacal.cal_univar(df_train, i, target, qcut=10)
univar_val = datacal.cal_univar(df_val, i, target, qcut=10)
univar_test = datacal.cal_univar(df_test, i, target, qcut=10)
tab_df_list = [univar_train,univar_val,univar_test]
univarChart = matplot.plot_table_list([univar_train,univar_val,univar_test], [1,2,3], datalist_description=None, title= i +' univar Chart', X_label=None, y_label=None,
tab_df_list=None, plot_tab=True,
tab_df_list=tab_df_list, plot_tab=False,
saved_path='./mvp/plots/cache/')
document.add_picture('./mvp/plots/cache/' + i +' univar Chart' + ".png")
document.add_picture(univarChart)
document.add_paragraph('PDP_chart')
# 遍历目标features 画出对应PDP
for i in dhb.features[:3]:
for i in importanct_feat:
pdp = datacal.cal_pdp(df=df_test, score=prediction, feature=i, qcut=10)
pdpChart = matplot.plot_table(pdp,title= i +' PDP Chart',saved_path='./mvp/plots/cache/')
document.add_picture('./mvp/plots/cache/' + i +' PDP Chart' + ".png")
pdpChart = matplot.plot_table_df(pdp, ['1'], title=i + ' PDP Chart', X_label=None, y_label=None,
tab_df=None, plot_tab=True, saved_path='./mvp/plots/cache/')
document.add_picture(pdpChart)
......@@ -113,13 +122,20 @@ filetool.saveDocument(document, report_path, report_name)
document.add_paragraph('lift_chart')
# 遍历给定渠道 & 客群 默认等频画出liftchart
for channel in applied_from:
for type in applied_type:
try:
for channel in ['333','159537','1,214,217,198']:
for type in ['1','2','3']:
df_sliced = df_test[df_test.applied_type.map(lambda x : True if str(x) in type.split(',') else False) & df_test.applied_from.map(lambda x : True if str(x) in channel.split(',') else False)]
lift = datacal.cal_liftchart(df_sliced,score=prediction)
liftChart = matplot.plot_table(lift, title=i +' lift Chart',saved_path='./mvp/plots/cache')
document.add_picture("./mvp/plots/cache" + i + ' lift Chart.png')
lift_pred = datacal.cal_lift(df_sliced,score=prediction)
lift_online = datacal.cal_lift(df_sliced,score=score)
#liftChart = matplot.plot_table(lift, title=i +' lift Chart',saved_path='./mvp/plots/cache/')
liftChart = matplot.plot_table_list([lift_pred, lift_online], [roc_auc_score(df_test[target],df_test[prediction]),roc_auc_score(df_test[target],df_test[score])], datalist_description=None,
title= applied_from[channel]+applied_type[type]+ ' lift Chart', X_label=None, y_label=None,
tab_df_list=tab_df_list, plot_tab=False,
saved_path='./mvp/plots/cache/')
document.add_picture(liftChart)
except:
pass
filetool.saveDocument(document, report_path, report_name)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment