Commit bd18c3b0 authored by 王家华's avatar 王家华

bug修正

parent 589bfcb3
......@@ -2,11 +2,9 @@
<project version="4">
<component name="ChangeListManager">
<list default="true" id="c45d2e80-934e-41cc-8f01-c6d0d282db9d" name="Default Changelist" comment="">
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/graph/matplot.py" beforeDir="false" afterPath="$PROJECT_DIR$/graph/matplot.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/models_kit/general_methods.py" beforeDir="false" afterPath="$PROJECT_DIR$/models_kit/general_methods.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/models_kit/lightgbm.py" beforeDir="false" afterPath="$PROJECT_DIR$/models_kit/lightgbm.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/mvp/refit.py" beforeDir="false" afterPath="$PROJECT_DIR$/mvp/refit.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/tools/datacal.py" beforeDir="false" afterPath="$PROJECT_DIR$/tools/datacal.py" afterDir="false" />
</list>
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
<option name="SHOW_DIALOG" value="false" />
......@@ -16,15 +14,6 @@
</component>
<component name="FileEditorManager">
<leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/models_kit/general_methods.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="289">
<caret line="17" selection-start-line="17" selection-end-line="17" />
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/mvp/allocator.py">
<provider selected="true" editor-type-id="text-editor">
......@@ -39,8 +28,8 @@
<file pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/mvp/refit.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-986">
<caret line="26" column="13" lean-forward="true" selection-start-line="26" selection-start-column="13" selection-end-line="26" selection-end-column="13" />
<state relative-caret-position="222">
<caret line="70" column="19" selection-start-line="70" selection-start-column="14" selection-end-line="70" selection-end-column="19" />
<folding>
<element signature="e#0#30#0" expanded="true" />
</folding>
......@@ -48,6 +37,15 @@
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://C:/ProgramData/Anaconda3/Lib/site-packages/matplotlib/table.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-291">
<caret line="639" column="21" lean-forward="true" selection-start-line="639" selection-start-column="21" selection-end-line="639" selection-end-column="21" />
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/tools/filetool.py">
<provider selected="true" editor-type-id="text-editor">
......@@ -62,8 +60,8 @@
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/tools/datacal.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="289">
<caret line="68" column="46" lean-forward="true" selection-start-line="68" selection-start-column="46" selection-end-line="68" selection-end-column="46" />
<state relative-caret-position="1139">
<caret line="67" column="36" lean-forward="true" selection-start-line="67" selection-start-column="36" selection-end-line="67" selection-end-column="36" />
<folding>
<element signature="e#0#19#0" expanded="true" />
</folding>
......@@ -98,8 +96,8 @@
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/graph/matplot.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="452">
<caret line="101" column="43" lean-forward="true" selection-start-line="101" selection-start-column="43" selection-end-line="101" selection-end-column="43" />
<state relative-caret-position="442">
<caret line="29" column="21" lean-forward="true" selection-start-line="29" selection-start-column="21" selection-end-line="29" selection-end-column="21" />
</state>
</provider>
</entry>
......@@ -107,8 +105,8 @@
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/models_kit/lightgbm.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="408">
<caret line="165" selection-start-line="165" selection-end-line="169" />
<state relative-caret-position="214">
<caret line="167" column="10" lean-forward="true" selection-start-line="167" selection-start-column="10" selection-end-line="167" selection-end-column="52" />
<folding>
<element signature="e#0#22#0" expanded="true" />
</folding>
......@@ -119,7 +117,7 @@
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/models_kit/xgboost.py">
<provider selected="true" editor-type-id="text-editor">
<state>
<state relative-caret-position="-1173">
<folding>
<element signature="e#0#19#0" expanded="true" />
</folding>
......@@ -137,9 +135,9 @@
<list>
<option value="$PROJECT_DIR$/models_kit/general_methods.py" />
<option value="$PROJECT_DIR$/models_kit/lightgbm.py" />
<option value="$PROJECT_DIR$/tools/datacal.py" />
<option value="$PROJECT_DIR$/mvp/refit.py" />
<option value="$PROJECT_DIR$/graph/matplot.py" />
<option value="$PROJECT_DIR$/tools/datacal.py" />
</list>
</option>
</component>
......@@ -231,6 +229,7 @@
</component>
<component name="PropertiesComponent">
<property name="ASKED_SHARE_PROJECT_CONFIGURATION_FILES" value="true" />
<property name="SHARE_PROJECT_CONFIGURATION_FILES" value="true" />
<property name="restartRequiresConfirmation" value="false" />
<property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
</component>
......@@ -246,7 +245,7 @@
</list>
</option>
</component>
<component name="RunManager" selected="Python.lightgbm">
<component name="RunManager" selected="Python.refit">
<configuration name="lightgbm" type="PythonConfigurationType" factoryName="Python" temporary="true">
<module name="model_mvp" />
<option name="INTERPRETER_OPTIONS" value="" />
......@@ -291,8 +290,8 @@
</configuration>
<recent_temporary>
<list>
<item itemvalue="Python.lightgbm" />
<item itemvalue="Python.refit" />
<item itemvalue="Python.lightgbm" />
</list>
</recent_temporary>
</component>
......@@ -311,13 +310,14 @@
</component>
<component name="ToolWindowManager">
<frame x="-8" y="-8" width="1936" height="1066" extended-state="6" />
<editor active="true" />
<layout>
<window_info content_ui="combo" id="Project" order="0" visible="true" weight="0.17492098" />
<window_info content_ui="combo" id="Project" order="0" visible="true" weight="0.28556374" />
<window_info id="Structure" order="1" side_tool="true" weight="0.25" />
<window_info id="Favorites" order="2" side_tool="true" />
<window_info anchor="bottom" id="Message" order="0" />
<window_info anchor="bottom" id="Find" order="1" />
<window_info anchor="bottom" id="Run" order="2" weight="0.3290461" />
<window_info active="true" anchor="bottom" id="Run" order="2" visible="true" weight="0.3290461" />
<window_info anchor="bottom" id="Debug" order="3" weight="0.39978564" />
<window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
<window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
......@@ -325,7 +325,7 @@
<window_info anchor="bottom" id="Version Control" order="7" />
<window_info anchor="bottom" id="Terminal" order="8" weight="0.3290461" />
<window_info anchor="bottom" id="Event Log" order="9" side_tool="true" />
<window_info active="true" anchor="bottom" id="Python Console" order="10" visible="true" weight="0.31511253" />
<window_info anchor="bottom" id="Python Console" order="10" weight="0.31511253" />
<window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
<window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
<window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
......@@ -362,75 +362,89 @@
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/models_kit/xgboost.py">
<entry file="file://$PROJECT_DIR$/datasource/mongodb.py">
<provider selected="true" editor-type-id="text-editor">
<state>
<state relative-caret-position="493">
<caret line="29" selection-start-line="29" selection-end-line="29" />
<folding>
<element signature="e#0#19#0" expanded="true" />
<element signature="e#0#14#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/models_kit/lightgbm.py">
<entry file="file://$PROJECT_DIR$/tools/filetool.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="408">
<caret line="165" selection-start-line="165" selection-end-line="169" />
<state relative-caret-position="-102">
<folding>
<element signature="e#0#22#0" expanded="true" />
<element signature="e#0#9#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/datasource/mongodb.py">
<entry file="file://C:/ProgramData/Anaconda3/Lib/site-packages/pandas/core/generic.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="493">
<caret line="29" selection-start-line="29" selection-end-line="29" />
<folding>
<element signature="e#0#14#0" expanded="true" />
</folding>
<state relative-caret-position="373">
<caret line="4377" lean-forward="true" selection-start-line="4377" selection-end-line="4377" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/models_obj/dhb_obj.py">
<entry file="file://$PROJECT_DIR$/models_kit/xgboost.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-952">
<caret line="19" column="137" selection-start-line="19" selection-start-column="125" selection-end-line="19" selection-end-column="137" />
<state relative-caret-position="-1173">
<folding>
<element signature="e#0#19#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tools/filetool.py">
<entry file="file://$PROJECT_DIR$/tools/datacal.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-102">
<state relative-caret-position="1139">
<caret line="67" column="36" lean-forward="true" selection-start-line="67" selection-start-column="36" selection-end-line="67" selection-end-column="36" />
<folding>
<element signature="e#0#9#0" expanded="true" />
<element signature="e#0#19#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/graph/matplot.py">
<entry file="file://C:/ProgramData/Anaconda3/Lib/site-packages/matplotlib/table.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="452">
<caret line="101" column="43" lean-forward="true" selection-start-line="101" selection-start-column="43" selection-end-line="101" selection-end-column="43" />
<state relative-caret-position="-291">
<caret line="639" column="21" lean-forward="true" selection-start-line="639" selection-start-column="21" selection-end-line="639" selection-end-column="21" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tools/datacal.py">
<entry file="file://$PROJECT_DIR$/models_kit/lightgbm.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="289">
<caret line="68" column="46" lean-forward="true" selection-start-line="68" selection-start-column="46" selection-end-line="68" selection-end-column="46" />
<state relative-caret-position="214">
<caret line="167" column="10" lean-forward="true" selection-start-line="167" selection-start-column="10" selection-end-line="167" selection-end-column="52" />
<folding>
<element signature="e#0#22#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/models_obj/dhb_obj.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-952">
<caret line="19" column="137" selection-start-line="19" selection-start-column="125" selection-end-line="19" selection-end-column="137" />
<folding>
<element signature="e#0#19#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/graph/matplot.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="442">
<caret line="29" column="21" lean-forward="true" selection-start-line="29" selection-start-column="21" selection-end-line="29" selection-end-column="21" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/mvp/refit.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-986">
<caret line="26" column="13" lean-forward="true" selection-start-line="26" selection-start-column="13" selection-end-line="26" selection-end-column="13" />
<state relative-caret-position="222">
<caret line="70" column="19" selection-start-line="70" selection-start-column="14" selection-end-line="70" selection-end-column="19" />
<folding>
<element signature="e#0#30#0" expanded="true" />
</folding>
......
......@@ -26,7 +26,7 @@ def topN_feature_importance(model, clf, title="untitled", save_path='./mvp/plots
model.plot_importance(clf, max_num_features=topN)
plt.title("Feature Importances")
path = save_path + title + " featureImportance.png"
path = save_path + title + "_featureImportance.png"
plt.savefig(path)
plt.show()
return path
......@@ -67,13 +67,14 @@ def plot_table_list(datalist, auc, datalist_description=None, title='untitled',
tab_df = []
if tab_df_list is None:
for data in datalist:
data = data.fillna(-1)
tab_df.append(
pd.Series(data.index).astype(str).map(lambda x: x.replace(' ', '').replace('0.', '.')).tolist())
tab_df.append(
pd.Series(data.values).astype(str).map(lambda x: x.replace(' ', '').replace('0.', '.')).tolist())
# validate tab_rows
if tab_rows is None:
table_rows.append('index');
table_rows.append('index')
table_rows.append('values')
else:
# tab_rows was given by
......@@ -113,12 +114,11 @@ def plot_table_list(datalist, auc, datalist_description=None, title='untitled',
if saved_path is not None:
plt.savefig(saved_path + title + ".png")
plt.show()
return 1
return saved_path + title + ".png"
def plot_table_df(dataset, auc, title='untitled', X_label=None, y_label=None,
def plot_table_df(dataset, auc=None, title='untitled', X_label=None, y_label=None,
tab_df=None, plot_tab=True, saved_path=None):
print(tab_df)
'''
instructions : visualization of pivot with single dataframe
Params :
......@@ -132,7 +132,24 @@ def plot_table_df(dataset, auc, title='untitled', X_label=None, y_label=None,
'''
fig, axs = plt.subplots(1, 1, figsize=(13, 9), linewidth=0.1)
if isinstance(dataset, pd.DataFrame):
table_rows = dataset.columns
else:
axs.plot(range(len(dataset.index)), dataset.values)
fig.subplots_adjust(bottom=0.2)
plt.grid()
if y_label is not None:
plt.ylabel(y_label)
if X_label is not None:
plt.xlabel(X_label)
# plt.vlines(xrange(len(cols))0],y,color='lightgrey',linestyle='--')
plt.title(title)
if saved_path is not None:
plt.savefig(saved_path + title + ".png")
plt.show()
return saved_path + title + ".png"
table_cols = pd.Series(dataset.index).astype(str).map(lambda x: x.replace(' ', '')).map(
lambda x : x.replace('0.', '.'))
......@@ -140,7 +157,10 @@ def plot_table_df(dataset, auc, title='untitled', X_label=None, y_label=None,
for i in range(len(table_rows)):
x = range(len(table_cols))
y = dataset.iloc[:, i]
if auc != None:
axs.plot(x, y, label=str(table_rows[i]) + ' AUC: ' + str(auc[i]))
else:
axs.plot(x, y, label=str(table_rows[i]))
# if table should be plot
if plot_tab:
if tab_df is None:
......@@ -158,7 +178,7 @@ def plot_table_df(dataset, auc, title='untitled', X_label=None, y_label=None,
plt.xticks([])
# otherwise, nothing to do here
the_table.auto_set_font_size(False)
the_table.set_fontsize(9)
the_table.set_fontsize(8)
fig.subplots_adjust(bottom=0.2)
plt.grid()
if y_label is not None:
......@@ -171,7 +191,7 @@ def plot_table_df(dataset, auc, title='untitled', X_label=None, y_label=None,
if saved_path is not None:
plt.savefig(saved_path + title + ".png")
plt.show()
return 1
return saved_path + title + ".png"
......
......@@ -22,8 +22,6 @@ score = 'score'
prediction = 'predict'
############################
# 备份df_sample
#df_sample.to_csv(str(datetime.date.today())+"dhb_samples.xlsx")
......@@ -61,15 +59,24 @@ predictions ,test_auc = lightgbm.predict(lgbm,df_test,dhb.features,target)
# 把新的预测结果加入test
df_test[prediction] = predictions
####### allocator cache ############
applied_from = {'1,214,217,198': '内部', '333': '融360', '159537': '360金融'}
applied_type = {'1,2':'首贷','1,2,3':'全量客群','1':'首申','2':'复申','3':'复贷'}
####################################
### report
import os
os.chdir("E:/bla/model_mvp/")
# plot feature importance
topnfeat_path = matplot.topN_feature_importance(lgb, lgbm, title="untitled", save_path='./mvp/plots/', topN=20)
topnfeat_path = matplot.topN_feature_importance(lgb, lgbm, title="untitled", save_path='./mvp/plots/cache/', topN=20)
importanct_feat = pd.DataFrame({
'column': dhb.features,
'importance': lgbm.feature_importance(),
}).sort_values(by='importance',ascending=False).column.tolist()[:3]
# report file
report_path = "E:/bla/model_mvp/"
......@@ -91,21 +98,23 @@ document.add_picture(topnfeat_path)
document.add_paragraph('univar_chart')
# 遍历目标features画出univarchart
for i in dhb.features[:3]:
for i in importanct_feat:
univar_train = datacal.cal_univar(df_train, i, target, qcut=10)
univar_val = datacal.cal_univar(df_val, i, target, qcut=10)
univar_test = datacal.cal_univar(df_test, i, target, qcut=10)
tab_df_list = [univar_train,univar_val,univar_test]
univarChart = matplot.plot_table_list([univar_train,univar_val,univar_test], [1,2,3], datalist_description=None, title= i +' univar Chart', X_label=None, y_label=None,
tab_df_list=None, plot_tab=True,
tab_df_list=tab_df_list, plot_tab=False,
saved_path='./mvp/plots/cache/')
document.add_picture('./mvp/plots/cache/' + i +' univar Chart' + ".png")
document.add_picture(univarChart)
document.add_paragraph('PDP_chart')
# 遍历目标features 画出对应PDP
for i in dhb.features[:3]:
for i in importanct_feat:
pdp = datacal.cal_pdp(df=df_test, score=prediction, feature=i, qcut=10)
pdpChart = matplot.plot_table(pdp,title= i +' PDP Chart',saved_path='./mvp/plots/cache/')
document.add_picture('./mvp/plots/cache/' + i +' PDP Chart' + ".png")
pdpChart = matplot.plot_table_df(pdp, ['1'], title=i + ' PDP Chart', X_label=None, y_label=None,
tab_df=None, plot_tab=True, saved_path='./mvp/plots/cache/')
document.add_picture(pdpChart)
......@@ -113,13 +122,20 @@ filetool.saveDocument(document, report_path, report_name)
document.add_paragraph('lift_chart')
# 遍历给定渠道 & 客群 默认等频画出liftchart
for channel in applied_from:
for type in applied_type:
try:
for channel in ['333','159537','1,214,217,198']:
for type in ['1','2','3']:
df_sliced = df_test[df_test.applied_type.map(lambda x : True if str(x) in type.split(',') else False) & df_test.applied_from.map(lambda x : True if str(x) in channel.split(',') else False)]
lift = datacal.cal_liftchart(df_sliced,score=prediction)
liftChart = matplot.plot_table(lift, title=i +' lift Chart',saved_path='./mvp/plots/cache')
document.add_picture("./mvp/plots/cache" + i + ' lift Chart.png')
lift_pred = datacal.cal_lift(df_sliced,score=prediction)
lift_online = datacal.cal_lift(df_sliced,score=score)
#liftChart = matplot.plot_table(lift, title=i +' lift Chart',saved_path='./mvp/plots/cache/')
liftChart = matplot.plot_table_list([lift_pred, lift_online], [roc_auc_score(df_test[target],df_test[prediction]),roc_auc_score(df_test[target],df_test[score])], datalist_description=None,
title= applied_from[channel]+applied_type[type]+ ' lift Chart', X_label=None, y_label=None,
tab_df_list=tab_df_list, plot_tab=False,
saved_path='./mvp/plots/cache/')
document.add_picture(liftChart)
except:
pass
filetool.saveDocument(document, report_path, report_name)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment