Commit 085d706c authored by 王家华's avatar 王家华

仍然没有加表格 好气呀

parent 9b10189a
......@@ -4,10 +4,16 @@
<list default="true" id="c45d2e80-934e-41cc-8f01-c6d0d282db9d" name="Default Changelist" comment="">
<change beforePath="$PROJECT_DIR$/graph/matplot.py" beforeDir="false" afterPath="$PROJECT_DIR$/graph/matplot.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/models_kit/general_methods.py" beforeDir="false" afterPath="$PROJECT_DIR$/models_kit/general_methods.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/models_obj/dhb_obj.py" beforeDir="false" afterPath="$PROJECT_DIR$/models_obj/dhb_obj.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/mvp/allocator.py" beforeDir="false" afterPath="$PROJECT_DIR$/mvp/allocator.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/mvp/plots/cache/Univariate Chart of dhb_overview_dun_call_total_duration.png" beforeDir="false" afterPath="$PROJECT_DIR$/mvp/plots/cache/Univariate Chart of dhb_overview_dun_call_total_duration.png" afterDir="false" />
<change beforePath="$PROJECT_DIR$/mvp/plots/cache/Univariate Chart of dhb_overview_ntdun_call_avg_duration.png" beforeDir="false" afterPath="$PROJECT_DIR$/mvp/plots/cache/Univariate Chart of dhb_overview_ntdun_call_avg_duration.png" afterDir="false" />
<change beforePath="$PROJECT_DIR$/mvp/plots/cache/Univariate Chart of dhb_overview_ntdun_call_duration_below15.png" beforeDir="false" afterPath="$PROJECT_DIR$/mvp/plots/cache/Univariate Chart of dhb_overview_ntdun_call_duration_below15.png" afterDir="false" />
<change beforePath="$PROJECT_DIR$/mvp/plots/cache/pdp Chart with 9 1.png" beforeDir="false" afterPath="$PROJECT_DIR$/mvp/plots/cache/pdp Chart with 9 1.png" afterDir="false" />
<change beforePath="$PROJECT_DIR$/mvp/plots/cache/内部复申 lift Chart.png" beforeDir="false" afterPath="$PROJECT_DIR$/mvp/plots/cache/内部复申 lift Chart.png" afterDir="false" />
<change beforePath="$PROJECT_DIR$/mvp/plots/cache/内部首付贷 lift Chart.png" beforeDir="false" afterPath="$PROJECT_DIR$/mvp/plots/cache/内部首付贷 lift Chart.png" afterDir="false" />
<change beforePath="$PROJECT_DIR$/mvp/plots/cache/内部首申 lift Chart.png" beforeDir="false" afterPath="$PROJECT_DIR$/mvp/plots/cache/内部首申 lift Chart.png" afterDir="false" />
<change beforePath="$PROJECT_DIR$/mvp/plots/cache/内部首贷 lift Chart.png" beforeDir="false" afterPath="$PROJECT_DIR$/mvp/plots/cache/内部首贷 lift Chart.png" afterDir="false" />
<change beforePath="$PROJECT_DIR$/mvp/refit.py" beforeDir="false" afterPath="$PROJECT_DIR$/mvp/refit.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/tools/filetool.py" beforeDir="false" afterPath="$PROJECT_DIR$/tools/filetool.py" afterDir="false" />
</list>
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
<option name="SHOW_DIALOG" value="false" />
......@@ -20,8 +26,8 @@
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/models_kit/general_methods.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="306">
<caret line="51" column="30" lean-forward="true" selection-start-line="51" selection-start-column="30" selection-end-line="51" selection-end-column="30" />
<state relative-caret-position="303">
<caret line="44" column="32" selection-start-line="44" selection-start-column="32" selection-end-line="44" selection-end-column="32" />
<folding>
<element signature="e#0#31#0" expanded="true" />
</folding>
......@@ -29,11 +35,11 @@
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="true">
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/mvp/allocator.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="119">
<caret line="58" column="22" lean-forward="true" selection-start-line="58" selection-start-column="22" selection-end-line="58" selection-end-column="22" />
<state relative-caret-position="340">
<caret line="59" column="90" selection-start-line="59" selection-start-column="77" selection-end-line="59" selection-end-column="90" />
<folding>
<element signature="e#4120#4150#0" expanded="true" />
</folding>
......@@ -41,11 +47,11 @@
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<file pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/mvp/refit.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="204">
<caret line="171" lean-forward="true" selection-start-line="171" selection-end-line="171" />
<state relative-caret-position="187">
<caret line="86" column="13" selection-start-line="86" selection-start-column="13" selection-end-line="86" selection-end-column="13" />
<folding>
<element signature="e#0#30#0" expanded="true" />
</folding>
......@@ -57,7 +63,7 @@
<entry file="file://$PROJECT_DIR$/mvp/lgbreport.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="187">
<caret line="11" lean-forward="true" selection-start-line="11" selection-end-line="11" />
<caret line="11" selection-start-line="11" selection-end-line="11" />
<folding>
<element signature="e#0#19#0" expanded="true" />
</folding>
......@@ -69,7 +75,7 @@
<entry file="file://$PROJECT_DIR$/tools/filetool.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="473">
<caret line="46" lean-forward="true" selection-start-line="46" selection-end-line="46" />
<caret line="46" selection-start-line="46" selection-end-line="46" />
<folding>
<element signature="e#0#9#0" expanded="true" />
</folding>
......@@ -81,7 +87,7 @@
<entry file="file://$PROJECT_DIR$/tools/datacal.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="595">
<caret line="35" column="36" lean-forward="true" selection-start-line="35" selection-start-column="36" selection-end-line="35" selection-end-column="36" />
<caret line="35" column="36" selection-start-line="35" selection-start-column="36" selection-end-line="35" selection-end-column="36" />
<folding>
<element signature="e#0#19#0" expanded="true" />
</folding>
......@@ -113,8 +119,8 @@
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/graph/matplot.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="152">
<caret line="377" lean-forward="true" selection-start-line="377" selection-end-line="377" />
<state relative-caret-position="867">
<caret line="63" column="23" lean-forward="true" selection-start-line="63" selection-start-column="23" selection-end-line="63" selection-end-column="23" />
</state>
</provider>
</entry>
......@@ -129,12 +135,12 @@
<list>
<option value="$PROJECT_DIR$/models_kit/lightgbm.py" />
<option value="$PROJECT_DIR$/models_obj/dhb_obj.py" />
<option value="$PROJECT_DIR$/models_kit/general_methods.py" />
<option value="$PROJECT_DIR$/tools/filetool.py" />
<option value="$PROJECT_DIR$/tools/datacal.py" />
<option value="$PROJECT_DIR$/mvp/refit.py" />
<option value="$PROJECT_DIR$/graph/matplot.py" />
<option value="$PROJECT_DIR$/models_kit/general_methods.py" />
<option value="$PROJECT_DIR$/mvp/allocator.py" />
<option value="$PROJECT_DIR$/graph/matplot.py" />
<option value="$PROJECT_DIR$/mvp/refit.py" />
</list>
</option>
</component>
......@@ -149,8 +155,8 @@
</option>
</component>
<component name="ProjectFrameBounds" extendedState="6">
<option name="x" value="261" />
<option name="y" value="251" />
<option name="x" value="233" />
<option name="y" value="99" />
<option name="width" value="1400" />
<option name="height" value="831" />
</component>
......@@ -321,7 +327,7 @@
<window_info id="Favorites" order="2" side_tool="true" />
<window_info anchor="bottom" id="Message" order="0" />
<window_info anchor="bottom" id="Find" order="1" />
<window_info anchor="bottom" id="Run" order="2" weight="0.3290461" />
<window_info active="true" anchor="bottom" id="Run" order="2" visible="true" weight="0.3290461" />
<window_info anchor="bottom" id="Debug" order="3" weight="0.39978564" />
<window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
<window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
......@@ -329,7 +335,7 @@
<window_info anchor="bottom" id="Version Control" order="7" />
<window_info anchor="bottom" id="Terminal" order="8" weight="0.3290461" />
<window_info anchor="bottom" id="Event Log" order="9" side_tool="true" />
<window_info active="true" anchor="bottom" id="Python Console" order="10" visible="true" weight="0.46623793" />
<window_info anchor="bottom" id="Python Console" order="10" weight="0.1800643" />
<window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
<window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
<window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
......@@ -403,16 +409,6 @@
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tools/filetool.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="473">
<caret line="46" lean-forward="true" selection-start-line="46" selection-end-line="46" />
<folding>
<element signature="e#0#9#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/README.md">
<provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]">
<state split_layout="SPLIT">
......@@ -421,66 +417,76 @@
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tools/datacal.py">
<entry file="file://$PROJECT_DIR$/models_kit/lightgbm.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="595">
<caret line="35" column="36" lean-forward="true" selection-start-line="35" selection-start-column="36" selection-end-line="35" selection-end-column="36" />
<folding>
<element signature="e#0#19#0" expanded="true" />
</folding>
<state relative-caret-position="-34">
<caret line="1" selection-start-line="1" selection-end-line="1" selection-end-column="41" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/models_kit/general_methods.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="306">
<caret line="51" column="30" lean-forward="true" selection-start-line="51" selection-start-column="30" selection-end-line="51" selection-end-column="30" />
<state relative-caret-position="303">
<caret line="44" column="32" selection-start-line="44" selection-start-column="32" selection-end-line="44" selection-end-column="32" />
<folding>
<element signature="e#0#31#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/models_kit/lightgbm.py">
<entry file="file://$PROJECT_DIR$/mvp/lgbreport.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-34">
<caret line="1" selection-start-line="1" selection-end-line="1" selection-end-column="41" />
<state relative-caret-position="187">
<caret line="11" selection-start-line="11" selection-end-line="11" />
<folding>
<element signature="e#0#19#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/graph/matplot.py">
<entry file="file://$PROJECT_DIR$/tools/datacal.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="152">
<caret line="377" lean-forward="true" selection-start-line="377" selection-end-line="377" />
<state relative-caret-position="595">
<caret line="35" column="36" selection-start-line="35" selection-start-column="36" selection-end-line="35" selection-end-column="36" />
<folding>
<element signature="e#0#19#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/mvp/refit.py">
<entry file="file://$PROJECT_DIR$/tools/filetool.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="204">
<caret line="171" lean-forward="true" selection-start-line="171" selection-end-line="171" />
<state relative-caret-position="473">
<caret line="46" selection-start-line="46" selection-end-line="46" />
<folding>
<element signature="e#0#30#0" expanded="true" />
<element signature="e#0#9#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/mvp/lgbreport.py">
<entry file="file://$PROJECT_DIR$/mvp/allocator.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="187">
<caret line="11" lean-forward="true" selection-start-line="11" selection-end-line="11" />
<state relative-caret-position="340">
<caret line="59" column="90" selection-start-line="59" selection-start-column="77" selection-end-line="59" selection-end-column="90" />
<folding>
<element signature="e#0#19#0" expanded="true" />
<element signature="e#4120#4150#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/mvp/allocator.py">
<entry file="file://$PROJECT_DIR$/graph/matplot.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="119">
<caret line="58" column="22" lean-forward="true" selection-start-line="58" selection-start-column="22" selection-end-line="58" selection-end-column="22" />
<state relative-caret-position="867">
<caret line="63" column="23" lean-forward="true" selection-start-line="63" selection-start-column="23" selection-end-line="63" selection-end-column="23" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/mvp/refit.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="187">
<caret line="86" column="13" selection-start-line="86" selection-start-column="13" selection-end-line="86" selection-end-column="13" />
<folding>
<element signature="e#4120#4150#0" expanded="true" />
<element signature="e#0#30#0" expanded="true" />
</folding>
</state>
</provider>
......
......@@ -57,7 +57,7 @@ def plot_table_list(datalist, auc, datalist_description=None, title='untitled',
# 每个table需要只有一个index,一个values
x = range(len(datalist[table_index].index))
y = datalist[table_index].values
axs.plot(x, y, label=datalist_description[table_index])
axs.plot(x, y, label=datalist_description[table_index] + "AUC: "+ str(round(auc[table_index],3)))
if len(x) == 1:
plot_tab = False
if plot_tab:
......@@ -100,7 +100,7 @@ def plot_table_list(datalist, auc, datalist_description=None, title='untitled',
plt.xticks([])
# otherwise, nothing to do here
the_table.auto_set_font_size(False)
the_table.set_fontsize(8)
the_table.set_fontsize(6)
fig.subplots_adjust(bottom=0.2)
plt.grid()
if y_label is not None:
......
File added
......@@ -39,24 +39,26 @@ def topN_feature_importance_list(features, clf, topN=3):
return importanct_feat
def model_selection(algorthm,clf,df_train,df_val,df_test,target,score,optimal_model,model_obj):
# model matrix 存储不同模型指标的矩阵
model_matrix_index = ['name', 'Params', 'trainAUC', 'validationAUC']
model_matrix = pd.DataFrame(['NULL', 'NULL', roc_auc_score(df_train[target], df_train[score]),
roc_auc_score(df_train[target], df_train[score])], index=model_matrix_index,
columns=['线上模型'])
#TODO here
# 定义最优参指针
pointer = 0
# 遍历最优参组合
for param in optimal_para:
if algorthm == "lightGBM":
train_auc, val_auc, lgbm = lightgbm.train_lgbm(lightgbm.params_lgb, df_train, df_val, model_obj.features,
adds_on=param, target=target)
model_matrix = pd.concat([model_matrix,
pd.DataFrame(['lightGBM', param, train_auc, val_auc], index=model_matrix_index,
columns=[pointer])], axis=1)
pointer += 1
# 简单选取一下validation set auc 最高的 params
best_params = model_matrix.T.sort_values(by='validationAUC', ascending=False).iloc[0, :].loc['Params']
\ No newline at end of file
# def model_selection(algorthm,clf,df_train,df_val,df_test,target,score,optimal_model,model_obj):
# # model matrix 存储不同模型指标的矩阵
# model_matrix_index = ['name', 'Params', 'trainAUC', 'validationAUC']
# model_matrix = pd.DataFrame(['NULL', 'NULL', roc_auc_score(df_train[target], df_train[score]),
# roc_auc_score(df_train[target], df_train[score])], index=model_matrix_index,
# columns=['线上模型'])
#
# # 定义最优参指针
# pointer = 0
# # 遍历最优参组合
# for param in optimal_para:
# if algorthm == "lightGBM":
# train_auc, val_auc, lgbm = lightgbm.train_lgbm(lightgbm.params_lgb, df_train, df_val, model_obj.features,
# adds_on=param, target=target)
# model_matrix = pd.concat([model_matrix,
# pd.DataFrame(['lightGBM', param, train_auc, val_auc], index=model_matrix_index,
# columns=[pointer])], axis=1)
# pointer += 1
#
# # 简单选取一下validation set auc 最高的 params
# best_params = model_matrix.T.sort_values(by='validationAUC', ascending=False).iloc[0, :].loc['Params']
\ No newline at end of file
......@@ -62,11 +62,11 @@ df_train, df_val, df_test = datacal.train_test_split_general(df_sample, val_size
del df_sample
# 模型refit
model_matrix, lgbm = refit.model_fit(df_sample, dhb, target, score)
model_matrix, lgbm = refit.model_fit(df_train, df_val, df_test, dhb, target, score)
print(model_matrix)
# 生成报告
status = refit.model_report(lgbm, df_train, df_val, df_test, dhb, target,
status = refit.model_report(lgbm, df_train, df_val, df_test, dhb, target, model_matrix,
score, prediction, report_path, report_name, applied_from, applied_type, topN=3)
......
......@@ -83,6 +83,9 @@ def model_report(clf, df_train, df_val, df_test, model_obj, target,model_matrix,
:return:
status : 返回1表示执行完成
'''
# 样本分布
df_train_ = len(df_train)
# 用新模型预测结果 xgb还需要加一个proba (TODO here)
predictions ,test_auc = lightgbm.predict(clf,df_test,model_obj.features,target)
......@@ -126,7 +129,7 @@ def model_report(clf, df_train, df_val, df_test, model_obj, target,model_matrix,
#univarChart = matplot.plot_table_list([univar_train,univar_val,univar_test], [1,2,3], datalist_description=None, title= i +' univar Chart', X_label=None, y_label=None,
# tab_df_list=tab_df_list, plot_tab=False,
# saved_path='./mvp/plots/cache/')
document.add_picture(univar_chart,width=Inches(8))
document.add_picture(univar_chart,width=Inches(7))
# 新增pdp段
document.add_paragraph('PDP_chart')
......@@ -136,45 +139,65 @@ def model_report(clf, df_train, df_val, df_test, model_obj, target,model_matrix,
# pdpChart = matplot.plot_table_df(pdp, ['1'], title=i + ' PDP Chart', X_label=None, y_label=None,
# tab_df=None, plot_tab=True, saved_path='./mvp/plots/cache/')
pdpChart = matplot.pdpCharts9(clf, df_test, importanct_feat, model_obj.features, n_bins=10, dfltValue=-99999, maxValRatio=1, saved_path="./mvp/plots/cache/")
document.add_picture(pdpChart,width=Inches(8))
document.add_picture(pdpChart,width=Inches(7))
# 新增liftchart段
document.add_paragraph('lift_chart')
# 遍历给定渠道 & 客群 默认等频画出liftchart
try:
lift_pred = datacal.cal_lift(df_test, score=prediction)
lift_online = datacal.cal_lift(df_test, score=score)
# liftChart = matplot.plot_table(lift, title=i +' lift Chart',saved_path='./mvp/plots/cache/')
liftChart = matplot.plot_table_list([lift_pred['mean'], lift_online['mean']],
[roc_auc_score(df_test[target], df_test[prediction]),
roc_auc_score(df_test[target], df_test[score])], datalist_description=None,
roc_auc_score(df_test[target], df_test[score])], datalist_description=['新模型预测','线上模型'],
title='全渠道全量客群测试集上的 lift Chart',
X_label=None, y_label=None,
tab_df_list=None, plot_tab=False,
X_label=None, y_label='逾期率',
tab_df_list=[lift_pred['count'], lift_online['count']], plot_tab=False,
saved_path='./mvp/plots/cache/')
document.add_picture(liftChart, width=Inches(8))
# 遍历渠道
document.add_picture(liftChart, width=Inches(7))
# 遍历渠道
for channel in applied_from.keys():
# 遍历客群类型
for type in applied_type.keys():
print('lift ',type,channel)
print('lift ',channel)
# 数据切片
df_sliced = df_test[
df_test.applied_type.map(lambda x: True if str(x) in type.split(',') else False) & df_test.applied_from.map(
lambda x: True if str(x) in channel.split(',') else False)]
#
lift_pred = datacal.cal_lift(df_sliced, score=prediction)
lift_online = datacal.cal_lift(df_sliced, score=score)
# liftChart = matplot.plot_table(lift, title=i +' lift Chart',saved_path='./mvp/plots/cache/')
liftChart = matplot.plot_table_list([lift_pred['mean'], lift_online['mean']],
[roc_auc_score(df_test[target], df_test[prediction]),
roc_auc_score(df_test[target], df_test[score])],
datalist_description=['新模型预测', '线上模型'],
title=applied_from[channel] + ' lift Chart',
X_label=None, y_label='逾期率',
tab_df_list=[lift_pred['count'], lift_online['count']], plot_tab=False,
saved_path='./mvp/plots/cache/')
document.add_picture(liftChart, width=Inches(5.5))
# 遍历客群类型
for type in applied_type.keys():
print('lift ',type)
# 数据切片
df_sliced = df_test[df_test.applied_type.map(lambda x : True if str(x) in type.split(',') else False) & df_test.applied_from.map(lambda x : True if str(x) in channel.split(',') else False)]
#
lift_pred = datacal.cal_lift(df_sliced,score=prediction)
lift_online = datacal.cal_lift(df_sliced,score=score)
#liftChart = matplot.plot_table(lift, title=i +' lift Chart',saved_path='./mvp/plots/cache/')
liftChart = matplot.plot_table_list([lift_pred['mean'], lift_online['mean']], [roc_auc_score(df_test[target],df_test[prediction]),roc_auc_score(df_test[target],df_test[score])], datalist_description=None,
title= applied_from[channel]+applied_type[type]+ ' lift Chart', X_label=None, y_label=None,
tab_df_list=[lift_pred['count'], lift_online['count']], plot_tab=True,
liftChart = matplot.plot_table_list([lift_pred['mean'], lift_online['mean']], [roc_auc_score(df_test[target],df_test[prediction]),roc_auc_score(df_test[target],df_test[score])], datalist_description=['新模型预测','线上模型'],
title= applied_type[type]+ ' lift Chart', X_label=None, y_label='逾期率',
tab_df_list=[lift_pred['count'], lift_online['count']], plot_tab=False,
saved_path='./mvp/plots/cache/')
document.add_picture(liftChart,width=Inches(8))
# 存在某些渠道量很少的情况,加入try catch异常处理
document.add_picture(liftChart,width=Inches(5.5))
except Exception as e:
print('Exception: ',e)
print(e)
pass
# 存在某些渠道量很少的情况,加入try catch异常处理
# docx 保存
filetool.saveDocument(document, report_path, report_name)
return 1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment