Commit bd18c3b0 authored by 王家华's avatar 王家华

bug修正

parent 589bfcb3
This diff is collapsed.
...@@ -26,7 +26,7 @@ def topN_feature_importance(model, clf, title="untitled", save_path='./mvp/plots ...@@ -26,7 +26,7 @@ def topN_feature_importance(model, clf, title="untitled", save_path='./mvp/plots
model.plot_importance(clf, max_num_features=topN) model.plot_importance(clf, max_num_features=topN)
plt.title("Feature Importances") plt.title("Feature Importances")
path = save_path + title + " featureImportance.png" path = save_path + title + "_featureImportance.png"
plt.savefig(path) plt.savefig(path)
plt.show() plt.show()
return path return path
...@@ -67,13 +67,14 @@ def plot_table_list(datalist, auc, datalist_description=None, title='untitled', ...@@ -67,13 +67,14 @@ def plot_table_list(datalist, auc, datalist_description=None, title='untitled',
tab_df = [] tab_df = []
if tab_df_list is None: if tab_df_list is None:
for data in datalist: for data in datalist:
data = data.fillna(-1)
tab_df.append( tab_df.append(
pd.Series(data.index).astype(str).map(lambda x: x.replace(' ', '').replace('0.', '.')).tolist()) pd.Series(data.index).astype(str).map(lambda x: x.replace(' ', '').replace('0.', '.')).tolist())
tab_df.append( tab_df.append(
pd.Series(data.values).astype(str).map(lambda x: x.replace(' ', '').replace('0.', '.')).tolist()) pd.Series(data.values).astype(str).map(lambda x: x.replace(' ', '').replace('0.', '.')).tolist())
# validate tab_rows # validate tab_rows
if tab_rows is None: if tab_rows is None:
table_rows.append('index'); table_rows.append('index')
table_rows.append('values') table_rows.append('values')
else: else:
# tab_rows was given by # tab_rows was given by
...@@ -113,12 +114,11 @@ def plot_table_list(datalist, auc, datalist_description=None, title='untitled', ...@@ -113,12 +114,11 @@ def plot_table_list(datalist, auc, datalist_description=None, title='untitled',
if saved_path is not None: if saved_path is not None:
plt.savefig(saved_path + title + ".png") plt.savefig(saved_path + title + ".png")
plt.show() plt.show()
return 1 return saved_path + title + ".png"
def plot_table_df(dataset, auc, title='untitled', X_label=None, y_label=None, def plot_table_df(dataset, auc=None, title='untitled', X_label=None, y_label=None,
tab_df=None, plot_tab=True, saved_path=None): tab_df=None, plot_tab=True, saved_path=None):
print(tab_df)
''' '''
instructions : visualization of pivot with single dataframe instructions : visualization of pivot with single dataframe
Params : Params :
...@@ -132,7 +132,24 @@ def plot_table_df(dataset, auc, title='untitled', X_label=None, y_label=None, ...@@ -132,7 +132,24 @@ def plot_table_df(dataset, auc, title='untitled', X_label=None, y_label=None,
''' '''
fig, axs = plt.subplots(1, 1, figsize=(13, 9), linewidth=0.1) fig, axs = plt.subplots(1, 1, figsize=(13, 9), linewidth=0.1)
table_rows = dataset.columns
if isinstance(dataset, pd.DataFrame):
table_rows = dataset.columns
else:
axs.plot(range(len(dataset.index)), dataset.values)
fig.subplots_adjust(bottom=0.2)
plt.grid()
if y_label is not None:
plt.ylabel(y_label)
if X_label is not None:
plt.xlabel(X_label)
# plt.vlines(xrange(len(cols))0],y,color='lightgrey',linestyle='--')
plt.title(title)
if saved_path is not None:
plt.savefig(saved_path + title + ".png")
plt.show()
return saved_path + title + ".png"
table_cols = pd.Series(dataset.index).astype(str).map(lambda x: x.replace(' ', '')).map( table_cols = pd.Series(dataset.index).astype(str).map(lambda x: x.replace(' ', '')).map(
lambda x : x.replace('0.', '.')) lambda x : x.replace('0.', '.'))
...@@ -140,7 +157,10 @@ def plot_table_df(dataset, auc, title='untitled', X_label=None, y_label=None, ...@@ -140,7 +157,10 @@ def plot_table_df(dataset, auc, title='untitled', X_label=None, y_label=None,
for i in range(len(table_rows)): for i in range(len(table_rows)):
x = range(len(table_cols)) x = range(len(table_cols))
y = dataset.iloc[:, i] y = dataset.iloc[:, i]
axs.plot(x, y, label=str(table_rows[i]) + ' AUC: ' + str(auc[i])) if auc != None:
axs.plot(x, y, label=str(table_rows[i]) + ' AUC: ' + str(auc[i]))
else:
axs.plot(x, y, label=str(table_rows[i]))
# if table should be plot # if table should be plot
if plot_tab: if plot_tab:
if tab_df is None: if tab_df is None:
...@@ -157,8 +177,8 @@ def plot_table_df(dataset, auc, title='untitled', X_label=None, y_label=None, ...@@ -157,8 +177,8 @@ def plot_table_df(dataset, auc, title='untitled', X_label=None, y_label=None,
loc='bottom') loc='bottom')
plt.xticks([]) plt.xticks([])
# otherwise, nothing to do here # otherwise, nothing to do here
the_table.auto_set_font_size(False) the_table.auto_set_font_size(False)
the_table.set_fontsize(9) the_table.set_fontsize(8)
fig.subplots_adjust(bottom=0.2) fig.subplots_adjust(bottom=0.2)
plt.grid() plt.grid()
if y_label is not None: if y_label is not None:
...@@ -171,7 +191,7 @@ def plot_table_df(dataset, auc, title='untitled', X_label=None, y_label=None, ...@@ -171,7 +191,7 @@ def plot_table_df(dataset, auc, title='untitled', X_label=None, y_label=None,
if saved_path is not None: if saved_path is not None:
plt.savefig(saved_path + title + ".png") plt.savefig(saved_path + title + ".png")
plt.show() plt.show()
return 1 return saved_path + title + ".png"
......
...@@ -22,8 +22,6 @@ score = 'score' ...@@ -22,8 +22,6 @@ score = 'score'
prediction = 'predict' prediction = 'predict'
############################ ############################
# 备份df_sample # 备份df_sample
#df_sample.to_csv(str(datetime.date.today())+"dhb_samples.xlsx") #df_sample.to_csv(str(datetime.date.today())+"dhb_samples.xlsx")
...@@ -61,15 +59,24 @@ predictions ,test_auc = lightgbm.predict(lgbm,df_test,dhb.features,target) ...@@ -61,15 +59,24 @@ predictions ,test_auc = lightgbm.predict(lgbm,df_test,dhb.features,target)
# 把新的预测结果加入test # 把新的预测结果加入test
df_test[prediction] = predictions df_test[prediction] = predictions
####### allocator cache ############ ####### allocator cache ############
applied_from = {'1,214,217,198': '内部', '333': '融360', '159537': '360金融'} applied_from = {'1,214,217,198': '内部', '333': '融360', '159537': '360金融'}
applied_type = {'1,2':'首贷','1,2,3':'全量客群','1':'首申','2':'复申','3':'复贷'} applied_type = {'1,2':'首贷','1,2,3':'全量客群','1':'首申','2':'复申','3':'复贷'}
#################################### ####################################
### report ### report
import os
os.chdir("E:/bla/model_mvp/")
# plot feature importance # plot feature importance
topnfeat_path = matplot.topN_feature_importance(lgb, lgbm, title="untitled", save_path='./mvp/plots/', topN=20) topnfeat_path = matplot.topN_feature_importance(lgb, lgbm, title="untitled", save_path='./mvp/plots/cache/', topN=20)
importanct_feat = pd.DataFrame({
'column': dhb.features,
'importance': lgbm.feature_importance(),
}).sort_values(by='importance',ascending=False).column.tolist()[:3]
# report file # report file
report_path = "E:/bla/model_mvp/" report_path = "E:/bla/model_mvp/"
...@@ -91,21 +98,23 @@ document.add_picture(topnfeat_path) ...@@ -91,21 +98,23 @@ document.add_picture(topnfeat_path)
document.add_paragraph('univar_chart') document.add_paragraph('univar_chart')
# 遍历目标features画出univarchart # 遍历目标features画出univarchart
for i in dhb.features[:3]: for i in importanct_feat:
univar_train = datacal.cal_univar(df_train, i, target, qcut=10) univar_train = datacal.cal_univar(df_train, i, target, qcut=10)
univar_val = datacal.cal_univar(df_val, i, target, qcut=10) univar_val = datacal.cal_univar(df_val, i, target, qcut=10)
univar_test = datacal.cal_univar(df_test, i, target, qcut=10) univar_test = datacal.cal_univar(df_test, i, target, qcut=10)
tab_df_list = [univar_train,univar_val,univar_test]
univarChart = matplot.plot_table_list([univar_train,univar_val,univar_test], [1,2,3], datalist_description=None, title= i +' univar Chart', X_label=None, y_label=None, univarChart = matplot.plot_table_list([univar_train,univar_val,univar_test], [1,2,3], datalist_description=None, title= i +' univar Chart', X_label=None, y_label=None,
tab_df_list=None, plot_tab=True, tab_df_list=tab_df_list, plot_tab=False,
saved_path='./mvp/plots/cache/') saved_path='./mvp/plots/cache/')
document.add_picture('./mvp/plots/cache/' + i +' univar Chart' + ".png") document.add_picture(univarChart)
document.add_paragraph('PDP_chart') document.add_paragraph('PDP_chart')
# 遍历目标features 画出对应PDP # 遍历目标features 画出对应PDP
for i in dhb.features[:3]: for i in importanct_feat:
pdp = datacal.cal_pdp(df=df_test, score=prediction, feature=i, qcut=10) pdp = datacal.cal_pdp(df=df_test, score=prediction, feature=i, qcut=10)
pdpChart = matplot.plot_table(pdp,title= i +' PDP Chart',saved_path='./mvp/plots/cache/') pdpChart = matplot.plot_table_df(pdp, ['1'], title=i + ' PDP Chart', X_label=None, y_label=None,
document.add_picture('./mvp/plots/cache/' + i +' PDP Chart' + ".png") tab_df=None, plot_tab=True, saved_path='./mvp/plots/cache/')
document.add_picture(pdpChart)
...@@ -113,13 +122,20 @@ filetool.saveDocument(document, report_path, report_name) ...@@ -113,13 +122,20 @@ filetool.saveDocument(document, report_path, report_name)
document.add_paragraph('lift_chart') document.add_paragraph('lift_chart')
# 遍历给定渠道 & 客群 默认等频画出liftchart # 遍历给定渠道 & 客群 默认等频画出liftchart
for channel in applied_from: try:
for type in applied_type: for channel in ['333','159537','1,214,217,198']:
df_sliced = df_test[df_test.applied_type.map(lambda x : True if str(x) in type.split(',') else False) & df_test.applied_from.map(lambda x : True if str(x) in channel.split(',') else False)] for type in ['1','2','3']:
lift = datacal.cal_liftchart(df_sliced,score=prediction) df_sliced = df_test[df_test.applied_type.map(lambda x : True if str(x) in type.split(',') else False) & df_test.applied_from.map(lambda x : True if str(x) in channel.split(',') else False)]
liftChart = matplot.plot_table(lift, title=i +' lift Chart',saved_path='./mvp/plots/cache') lift_pred = datacal.cal_lift(df_sliced,score=prediction)
document.add_picture("./mvp/plots/cache" + i + ' lift Chart.png') lift_online = datacal.cal_lift(df_sliced,score=score)
#liftChart = matplot.plot_table(lift, title=i +' lift Chart',saved_path='./mvp/plots/cache/')
liftChart = matplot.plot_table_list([lift_pred, lift_online], [roc_auc_score(df_test[target],df_test[prediction]),roc_auc_score(df_test[target],df_test[score])], datalist_description=None,
title= applied_from[channel]+applied_type[type]+ ' lift Chart', X_label=None, y_label=None,
tab_df_list=tab_df_list, plot_tab=False,
saved_path='./mvp/plots/cache/')
document.add_picture(liftChart)
except:
pass
filetool.saveDocument(document, report_path, report_name) filetool.saveDocument(document, report_path, report_name)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment