Commit a7f74ece authored by 舒皓月's avatar 舒皓月

... 2019 07 28

parent 14de033a
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ChangeListManager">
<list default="true" id="e1b3e57f-dd82-4187-916a-8212c6c521a7" name="Default Changelist" comment="">
<change beforePath="$PROJECT_DIR$/model_monitor_PSI_AUC.py" beforeDir="false" afterPath="$PROJECT_DIR$/model_monitor_PSI_AUC.py" afterDir="false" />
</list>
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="FileEditorManager">
<leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/tmp.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="72">
<caret line="505" column="9" selection-start-line="505" selection-start-column="9" selection-end-line="505" selection-end-column="9" />
<folding>
<element signature="e#632#3545#0" />
<element signature="e#2837#3048#0" />
<element signature="e#3336#3551#0" />
<element signature="e#3336#3435#1" />
<element signature="e#3607#3880#0" />
<element signature="e#3607#3737#1" />
<element signature="e#3916#4115#0" />
<element signature="e#3916#4023#1" />
<element signature="e#4161#4777#0" />
<element signature="e#4161#4263#1" />
<element signature="e#4341#4612#0" />
<element signature="e#4641#4777#0" />
<element signature="e#4725#4777#0" />
<element signature="e#4829#5295#0" />
<element signature="e#4829#4948#1" />
<element signature="e#4974#5255#0" />
<element signature="e#5390#5525#1" />
<element signature="e#7372#9721#0" />
<element signature="e#7372#7507#1" />
<element signature="e#8609#9986#0" />
<element signature="e#8236#8291#0" />
<element signature="e#8913#9147#0" />
<element signature="e#9184#9274#0" />
<element signature="e#9347#9673#0" />
<element signature="e#9382#9455#0" />
<element signature="e#9490#9673#0" />
<element signature="e#10198#10349#0" />
<element signature="e#10363#10407#0" />
<element signature="e#10383#10577#0" />
<element signature="e#10658#10670#0" />
<element signature="e#11551#11561#0" />
<element signature="e#12151#12436#0" />
<element signature="e#12197#12436#0" />
<element signature="e#12542#12815#0" />
<element signature="e#12917#13003#0" />
<element signature="e#13207#13955#0" />
<element signature="e#13992#14437#0" />
<element signature="e#14193#14203#0" />
<element signature="e#14029#14521#0" />
<element signature="e#14119#14215#0" />
<element signature="e#14281#14442#0" />
<element signature="e#14562#20471#0" />
<element signature="e#15032#15255#0" />
<element signature="e#15282#15547#0" />
<element signature="e#15670#15821#0" />
<element signature="e#15777#15821#0" />
<element signature="e#15855#16049#0" />
<element signature="e#16130#16142#0" />
<element signature="e#17221#17231#0" />
<element signature="e#17827#18112#0" />
<element signature="e#17873#18112#0" />
<element signature="e#18218#18510#0" />
<element signature="e#18612#18698#0" />
<element signature="e#18902#19890#0" />
<element signature="e#19072#19635#0" />
<element signature="e#19191#19240#0" />
<element signature="e#19486#19633#0" />
<element signature="e#19964#20471#0" />
<element signature="e#19975#20071#0" />
<element signature="e#20137#20391#0" />
<element signature="e#21094#25008#0" />
<element signature="e#21690#22301#0" />
<element signature="e#21184#21532#0" />
<element signature="e#22479#22998#0" />
<element signature="e#21846#21988#0" />
<element signature="e#21864#21987#0" />
<element signature="e#22777#22898#0" />
<element signature="e#23042#23131#0" />
<element signature="e#23216#23473#0" />
<element signature="e#23278#23411#0" />
<element signature="e#23576#23789#0" />
<element signature="e#23955#24198#0" />
<element signature="e#23992#24193#0" />
<element signature="e#24920#24925#0" />
<element signature="e#24946#24974#0" />
</folding>
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/test.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="180">
<caret line="5" column="19" selection-start-line="5" selection-start-column="19" selection-end-line="5" selection-end-column="19" />
<folding>
<element signature="e#0#15#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/model_monitor_PSI_AUC.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="80">
<caret line="510" column="29" lean-forward="true" selection-start-line="510" selection-start-column="29" selection-end-line="510" selection-end-column="29" />
<folding>
<element signature="e#632#3598#0" />
<element signature="e#2883#3094#0" />
<element signature="e#3638#3853#0" />
<element signature="e#3638#3737#1" />
<element signature="e#3909#4182#0" />
<element signature="e#3909#4039#1" />
<element signature="e#4218#4417#0" />
<element signature="e#4218#4325#1" />
<element signature="e#4463#5079#0" />
<element signature="e#4463#4565#1" />
<element signature="e#4643#4914#0" />
<element signature="e#4943#5079#0" />
<element signature="e#5027#5079#0" />
<element signature="e#5131#5394#0" />
<element signature="e#5131#5242#1" />
<element signature="e#5268#5354#0" />
<element signature="e#5489#5624#1" />
<element signature="e#8037#10605#0" />
<element signature="e#8037#8172#1" />
<element signature="e#8723#10100#0" />
<element signature="e#8901#8956#0" />
<element signature="e#9578#9812#0" />
<element signature="e#9849#9939#0" />
<element signature="e#10173#10557#0" />
<element signature="e#10208#10339#0" />
<element signature="e#10374#10557#0" />
<element signature="e#11082#11272#0" />
<element signature="e#11189#11272#0" />
<element signature="e#11306#11500#0" />
<element signature="e#11581#11593#0" />
<element signature="e#12474#12484#0" />
<element signature="e#13074#13359#0" />
<element signature="e#13120#13359#0" />
<element signature="e#13465#13738#0" />
<element signature="e#13840#13926#0" />
<element signature="e#14173#15071#0" />
<element signature="e#14343#14816#0" />
<element signature="e#14343#14353#0" />
<element signature="e#15145#15637#0" />
<element signature="e#15235#15331#0" />
<element signature="e#15397#15558#0" />
<element signature="e#15678#21662#0" />
<element signature="e#16148#16371#0" />
<element signature="e#16398#16663#0" />
<element signature="e#16786#16976#0" />
<element signature="e#16893#16976#0" />
<element signature="e#17010#17204#0" />
<element signature="e#17285#17297#0" />
<element signature="e#18376#18386#0" />
<element signature="e#18982#19267#0" />
<element signature="e#19028#19267#0" />
<element signature="e#19373#19665#0" />
<element signature="e#19767#19853#0" />
<element signature="e#20093#21081#0" />
<element signature="e#20263#20826#0" />
<element signature="e#20382#20431#0" />
<element signature="e#20677#20824#0" />
<element signature="e#21155#21662#0" />
<element signature="e#21166#21262#0" />
<element signature="e#21328#21582#0" />
<element signature="e#22597#26725#0" />
<element signature="e#22689#23300#0" />
<element signature="e#22720#23084#0" />
<element signature="e#23478#23987#0" />
<element signature="e#23490#23636#0" />
<element signature="e#23508#23635#0" />
<element signature="e#24495#24616#0" />
<element signature="e#24760#24849#0" />
<element signature="e#24934#25191#0" />
<element signature="e#24996#25129#0" />
<element signature="e#25294#25507#0" />
<element signature="e#25673#25916#0" />
<element signature="e#25710#25911#0" />
<element signature="e#25926#25931#0" />
<element signature="e#25010#25038#0" />
</folding>
</state>
</provider>
</entry>
</file>
</leaf>
</component>
<component name="FileTemplateManagerImpl">
<option name="RECENT_TEMPLATES">
<list>
<option value="Python Script" />
</list>
</option>
</component>
<component name="Git.Settings">
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
</component>
<component name="IdeDocumentHistory">
<option name="CHANGED_PATHS">
<list>
<option value="$PROJECT_DIR$/.gitignore" />
<option value="$PROJECT_DIR$/test.py" />
<option value="$PROJECT_DIR$/tmp.py" />
<option value="$PROJECT_DIR$/model_monitor_PSI_AUC.py" />
</list>
</option>
</component>
<component name="ProjectConfigurationFiles">
<option name="files">
<list>
<option value="$PROJECT_DIR$/.idea/model_monitor.iml" />
<option value="$PROJECT_DIR$/.idea/vcs.xml" />
<option value="$PROJECT_DIR$/.idea/misc.xml" />
<option value="$PROJECT_DIR$/.idea/modules.xml" />
</list>
</option>
</component>
<component name="ProjectFrameBounds" extendedState="6">
<option name="x" value="310" />
<option name="y" value="135" />
<option name="width" value="1230" />
<option name="height" value="675" />
</component>
<component name="ProjectView">
<navigator proportions="" version="1">
<foldersAlwaysOnTop value="true" />
</navigator>
<panes>
<pane id="ProjectPane">
<subPane>
<expand>
<path>
<item name="model_monitor" type="b2602c69:ProjectViewProjectNode" />
<item name="model_monitor" type="462c0819:PsiDirectoryNode" />
</path>
</expand>
<select />
</subPane>
</pane>
<pane id="Scope" />
</panes>
</component>
<component name="PropertiesComponent">
<property name="SHARE_PROJECT_CONFIGURATION_FILES" value="true" />
<property name="last_opened_file_path" value="$PROJECT_DIR$" />
</component>
<component name="RunDashboard">
<option name="ruleStates">
<list>
<RuleState>
<option name="name" value="ConfigurationTypeDashboardGroupingRule" />
</RuleState>
<RuleState>
<option name="name" value="StatusDashboardGroupingRule" />
</RuleState>
</list>
</option>
</component>
<component name="RunManager">
<configuration name="test" type="PythonConfigurationType" factoryName="Python" temporary="true">
<module name="model_monitor" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/test.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<option name="MODULE_MODE" value="false" />
<option name="REDIRECT_INPUT" value="false" />
<option name="INPUT_FILE" value="" />
<method v="2" />
</configuration>
<recent_temporary>
<list>
<item itemvalue="Python.test" />
</list>
</recent_temporary>
</component>
<component name="SvnConfiguration">
<configuration />
</component>
<component name="TaskManager">
<task active="true" id="Default" summary="Default task">
<changelist id="e1b3e57f-dd82-4187-916a-8212c6c521a7" name="Default Changelist" comment="" />
<created>1564209012222</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1564209012222</updated>
</task>
<servers />
</component>
<component name="ToolWindowManager">
<frame x="-7" y="-7" width="1550" height="838" extended-state="6" />
<editor active="true" />
<layout>
<window_info content_ui="combo" id="Project" order="0" weight="0.24966975" />
<window_info id="Structure" order="1" side_tool="true" weight="0.25" />
<window_info id="Favorites" order="2" side_tool="true" />
<window_info anchor="bottom" id="Message" order="0" />
<window_info anchor="bottom" id="Find" order="1" />
<window_info anchor="bottom" id="Run" order="2" weight="0.49433428" />
<window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
<window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
<window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
<window_info anchor="bottom" id="TODO" order="6" />
<window_info anchor="bottom" id="Version Control" order="7" />
<window_info anchor="bottom" id="Terminal" order="8" />
<window_info anchor="bottom" id="Event Log" order="9" side_tool="true" />
<window_info anchor="bottom" id="Python Console" order="10" />
<window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
<window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
<window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
</layout>
</component>
<component name="editorHistoryManager">
<entry file="file://$PROJECT_DIR$/.gitignore">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="36">
<caret line="1" column="7" selection-start-line="1" selection-start-column="7" selection-end-line="1" selection-end-column="7" />
</state>
</provider>
</entry>
<entry file="file://$APPLICATION_HOME_DIR$/helpers/typeshed/stdlib/2and3/datetime.pyi">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="226">
<caret line="140" column="12" selection-start-line="140" selection-start-column="12" selection-end-line="140" selection-end-column="12" />
</state>
</provider>
</entry>
<entry file="file://$USER_HOME$/.PyCharmCE2019.1/system/python_stubs/1626812534/builtins.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="226">
<caret line="504" column="4" selection-start-line="504" selection-start-column="4" selection-end-line="504" selection-end-column="4" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/test.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="180">
<caret line="5" column="19" selection-start-line="5" selection-start-column="19" selection-end-line="5" selection-end-column="19" />
<folding>
<element signature="e#0#15#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tmp.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="72">
<caret line="505" column="9" selection-start-line="505" selection-start-column="9" selection-end-line="505" selection-end-column="9" />
<folding>
<element signature="e#632#3545#0" />
<element signature="e#2837#3048#0" />
<element signature="e#3336#3551#0" />
<element signature="e#3336#3435#1" />
<element signature="e#3607#3880#0" />
<element signature="e#3607#3737#1" />
<element signature="e#3916#4115#0" />
<element signature="e#3916#4023#1" />
<element signature="e#4161#4777#0" />
<element signature="e#4161#4263#1" />
<element signature="e#4341#4612#0" />
<element signature="e#4641#4777#0" />
<element signature="e#4725#4777#0" />
<element signature="e#4829#5295#0" />
<element signature="e#4829#4948#1" />
<element signature="e#4974#5255#0" />
<element signature="e#5390#5525#1" />
<element signature="e#7372#9721#0" />
<element signature="e#7372#7507#1" />
<element signature="e#8609#9986#0" />
<element signature="e#8236#8291#0" />
<element signature="e#8913#9147#0" />
<element signature="e#9184#9274#0" />
<element signature="e#9347#9673#0" />
<element signature="e#9382#9455#0" />
<element signature="e#9490#9673#0" />
<element signature="e#10198#10349#0" />
<element signature="e#10363#10407#0" />
<element signature="e#10383#10577#0" />
<element signature="e#10658#10670#0" />
<element signature="e#11551#11561#0" />
<element signature="e#12151#12436#0" />
<element signature="e#12197#12436#0" />
<element signature="e#12542#12815#0" />
<element signature="e#12917#13003#0" />
<element signature="e#13207#13955#0" />
<element signature="e#13992#14437#0" />
<element signature="e#14193#14203#0" />
<element signature="e#14029#14521#0" />
<element signature="e#14119#14215#0" />
<element signature="e#14281#14442#0" />
<element signature="e#14562#20471#0" />
<element signature="e#15032#15255#0" />
<element signature="e#15282#15547#0" />
<element signature="e#15670#15821#0" />
<element signature="e#15777#15821#0" />
<element signature="e#15855#16049#0" />
<element signature="e#16130#16142#0" />
<element signature="e#17221#17231#0" />
<element signature="e#17827#18112#0" />
<element signature="e#17873#18112#0" />
<element signature="e#18218#18510#0" />
<element signature="e#18612#18698#0" />
<element signature="e#18902#19890#0" />
<element signature="e#19072#19635#0" />
<element signature="e#19191#19240#0" />
<element signature="e#19486#19633#0" />
<element signature="e#19964#20471#0" />
<element signature="e#19975#20071#0" />
<element signature="e#20137#20391#0" />
<element signature="e#21094#25008#0" />
<element signature="e#21690#22301#0" />
<element signature="e#21184#21532#0" />
<element signature="e#22479#22998#0" />
<element signature="e#21846#21988#0" />
<element signature="e#21864#21987#0" />
<element signature="e#22777#22898#0" />
<element signature="e#23042#23131#0" />
<element signature="e#23216#23473#0" />
<element signature="e#23278#23411#0" />
<element signature="e#23576#23789#0" />
<element signature="e#23955#24198#0" />
<element signature="e#23992#24193#0" />
<element signature="e#24920#24925#0" />
<element signature="e#24946#24974#0" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/model_monitor_PSI_AUC.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="80">
<caret line="510" column="29" lean-forward="true" selection-start-line="510" selection-start-column="29" selection-end-line="510" selection-end-column="29" />
<folding>
<element signature="e#632#3598#0" />
<element signature="e#2883#3094#0" />
<element signature="e#3638#3853#0" />
<element signature="e#3638#3737#1" />
<element signature="e#3909#4182#0" />
<element signature="e#3909#4039#1" />
<element signature="e#4218#4417#0" />
<element signature="e#4218#4325#1" />
<element signature="e#4463#5079#0" />
<element signature="e#4463#4565#1" />
<element signature="e#4643#4914#0" />
<element signature="e#4943#5079#0" />
<element signature="e#5027#5079#0" />
<element signature="e#5131#5394#0" />
<element signature="e#5131#5242#1" />
<element signature="e#5268#5354#0" />
<element signature="e#5489#5624#1" />
<element signature="e#8037#10605#0" />
<element signature="e#8037#8172#1" />
<element signature="e#8723#10100#0" />
<element signature="e#8901#8956#0" />
<element signature="e#9578#9812#0" />
<element signature="e#9849#9939#0" />
<element signature="e#10173#10557#0" />
<element signature="e#10208#10339#0" />
<element signature="e#10374#10557#0" />
<element signature="e#11082#11272#0" />
<element signature="e#11189#11272#0" />
<element signature="e#11306#11500#0" />
<element signature="e#11581#11593#0" />
<element signature="e#12474#12484#0" />
<element signature="e#13074#13359#0" />
<element signature="e#13120#13359#0" />
<element signature="e#13465#13738#0" />
<element signature="e#13840#13926#0" />
<element signature="e#14173#15071#0" />
<element signature="e#14343#14816#0" />
<element signature="e#14343#14353#0" />
<element signature="e#15145#15637#0" />
<element signature="e#15235#15331#0" />
<element signature="e#15397#15558#0" />
<element signature="e#15678#21662#0" />
<element signature="e#16148#16371#0" />
<element signature="e#16398#16663#0" />
<element signature="e#16786#16976#0" />
<element signature="e#16893#16976#0" />
<element signature="e#17010#17204#0" />
<element signature="e#17285#17297#0" />
<element signature="e#18376#18386#0" />
<element signature="e#18982#19267#0" />
<element signature="e#19028#19267#0" />
<element signature="e#19373#19665#0" />
<element signature="e#19767#19853#0" />
<element signature="e#20093#21081#0" />
<element signature="e#20263#20826#0" />
<element signature="e#20382#20431#0" />
<element signature="e#20677#20824#0" />
<element signature="e#21155#21662#0" />
<element signature="e#21166#21262#0" />
<element signature="e#21328#21582#0" />
<element signature="e#22597#26725#0" />
<element signature="e#22689#23300#0" />
<element signature="e#22720#23084#0" />
<element signature="e#23478#23987#0" />
<element signature="e#23490#23636#0" />
<element signature="e#23508#23635#0" />
<element signature="e#24495#24616#0" />
<element signature="e#24760#24849#0" />
<element signature="e#24934#25191#0" />
<element signature="e#24996#25129#0" />
<element signature="e#25294#25507#0" />
<element signature="e#25673#25916#0" />
<element signature="e#25710#25911#0" />
<element signature="e#25926#25931#0" />
<element signature="e#25010#25038#0" />
</folding>
</state>
</provider>
</entry>
</component>
</project>
\ No newline at end of file
...@@ -23,7 +23,8 @@ warnings.filterwarnings('ignore') ...@@ -23,7 +23,8 @@ warnings.filterwarnings('ignore')
class ModelMonitor: class ModelMonitor:
def __init__(self, excel_path='./model_score.xlsx', sheet_name='mongo_model', def __init__(self, excel_path='./model_score.xlsx', sheet_name='mongo_model',
passdue_day=15, save_path='./image/', passdue_day=15, save_path='./image/',
num_month=4, min_user_group=500, max_psi=0.1, min_aucr=0.85): num_month=4, min_user_group=500, max_psi=0.1, min_aucr=0.85,
if_save=True, if_load=False):
# 考虑到数据库配置基本不变, 所以不设置创建对象时对应输入变量. # 考虑到数据库配置基本不变, 所以不设置创建对象时对应输入变量.
self.mysql_engine = pymysql.connect(host='172.20.6.9', self.mysql_engine = pymysql.connect(host='172.20.6.9',
...@@ -82,6 +83,13 @@ class ModelMonitor: ...@@ -82,6 +83,13 @@ class ModelMonitor:
self.na_enough_data_psi_set = set() # 一些新的模型没有足够数据用于统计. self.na_enough_data_psi_set = set() # 一些新的模型没有足够数据用于统计.
self.na_enough_data_auc_set = set() # 一些新的模型没有足够数据用于统计. self.na_enough_data_auc_set = set() # 一些新的模型没有足够数据用于统计.
self.filed_bench_bins_ratio = None # 每个模型分在总体样本上第一个月的分箱比例.
self.filed_bench_auc = None # 每个模型分在总体样本上第一个月的AUC
# 程序数据读写模式.
self.if_save = if_save # 是否保存从数据库抽取的数据.
self.if_load = if_load # 是否从保存的数据加载数据, 而不从数据库读取.
def sql_query(self, sql): def sql_query(self, sql):
''' '''
连接MySQL数据库, 根据SQL返回数据. 连接MySQL数据库, 根据SQL返回数据.
...@@ -140,16 +148,11 @@ class ModelMonitor: ...@@ -140,16 +148,11 @@ class ModelMonitor:
def calc_psi(self, array_1, array_2): def calc_psi(self, array_1, array_2):
''' '''
计算PSI. 计算PSI.
:param array_1: pd.Series :param array_1: array
:param array_2: pd.Series :param array_2: array
:return: PSI :return: PSI
''' '''
try: try:
# print(array_2)
array_1 = array_1.values
array_2 = array_2.values
array_1 = array_1 / array_1.sum()
array_2 = array_2 / array_2.sum()
psi = ((array_1 - array_2) * np.log10(array_1 / array_2)).sum() psi = ((array_1 - array_2) * np.log10(array_1 / array_2)).sum()
return psi return psi
except: except:
...@@ -174,24 +177,33 @@ class ModelMonitor: ...@@ -174,24 +177,33 @@ class ModelMonitor:
df_g = df_g.sort_values(['month_label', 'bins']) df_g = df_g.sort_values(['month_label', 'bins'])
for i, m in enumerate(month_list): for i, m in enumerate(month_list):
amt_in_bins = df_g.loc[df_g['month_label'] == m, field].values amt_in_bins = df_g.loc[df_g['month_label'] == m, field].values
# 某月样本量小于阈值, 放弃记录信息. ## 某月样本量小于阈值, 放弃记录信息.
if amt_in_bins.sum() < self.min_user_group: if amt_in_bins.sum() < self.min_user_group:
print('%d月样本量过小, 放弃提取信息.' % m) print('%d月样本量过小, 放弃提取信息.' % m)
continue continue
info_dict[user_group_name][str(m) + '月'] = {} info_dict[user_group_name][str(m) + '月'] = {}
info_dict[user_group_name][str(m) + '月']['该月样本量'] = amt_in_bins.sum() info_dict[user_group_name][str(m) + '月']['该月样本量'] = amt_in_bins.sum()
info_dict[user_group_name][str(m) + '月']['各分箱样本量'] = amt_in_bins info_dict[user_group_name][str(m) + '月']['各分箱样本量'] = amt_in_bins
info_dict[user_group_name][str(m) + '月']['各分箱样本占比'] = amt_in_bins / amt_in_bins.sum() info_dict[user_group_name][str(m) + '月']['各分箱样本占比'] = np.array([x[0] for x in amt_in_bins / amt_in_bins.sum()])
## 若为某模型分全样本, 则记录分箱比例.
if user_group_name == '全样本' and self.filed_bench_bins_ratio is None:
self.filed_bench_bins_ratio = info_dict[user_group_name][str(m) + '月']['各分箱样本占比']
print('%d月样本量: %d' % (m, info_dict[user_group_name][str(m) + '月']['该月样本量'])) print('%d月样本量: %d' % (m, info_dict[user_group_name][str(m) + '月']['该月样本量']))
# 计算PSI, 以样本量达标的第一个月为基准. # 计算PSI, 以样本量达标的第一个月为基准.
for i, m in enumerate(info_dict[user_group_name]): for i, m in enumerate(info_dict[user_group_name]):
if i == 0: if i == 0:
info_dict[user_group_name][m]['psi'] = 0 if user_group_name == '全样本':
bench_month = m psi = 0
else: else:
psi = self.calc_psi( psi = self.calc_psi(self.filed_bench_bins_ratio, info_dict[user_group_name][m]['各分箱样本占比'])
df_g.loc[df_g['month_label'] == int(bench_month[0]), field], if psi is not None:
df_g.loc[df_g['month_label'] == int(m[0]), field]) info_dict[user_group_name][m]['psi'] = psi
else:
info_dict[user_group_name][m]['psi'] = -999
print('计算PSI出现错误.')
bench_bins_ratio = info_dict[user_group_name][m]['各分箱样本占比']
else:
psi = self.calc_psi(bench_bins_ratio, info_dict[user_group_name][m]['各分箱样本占比'])
if psi: if psi:
info_dict[user_group_name][m]['psi'] = psi info_dict[user_group_name][m]['psi'] = psi
else: else:
...@@ -240,10 +252,12 @@ class ModelMonitor: ...@@ -240,10 +252,12 @@ class ModelMonitor:
except: except:
print('AUC计算发生错误.') print('AUC计算发生错误.')
info_dict[user_group_name][str(m) + '月']['auc'] = -999 info_dict[user_group_name][str(m) + '月']['auc'] = -999
if user_group_name == '全样本' and self.filed_bench_auc is None:
self.filed_bench_auc = info_dict[user_group_name][str(m) + '月']['auc']
for i, m in enumerate(info_dict[user_group_name]): for i, m in enumerate(info_dict[user_group_name]):
if i == 0: # 基准月. if i == 0: # 基准月.
info_dict[user_group_name][m]['aucR'] = 1 info_dict[user_group_name][m]['aucR'] = info_dict[user_group_name][m]['auc'] / self.filed_bench_auc
bench_month = m bench_month = m
else: else:
info_dict[user_group_name][m]['aucR'] = info_dict[user_group_name][m]['auc'] / \ info_dict[user_group_name][m]['aucR'] = info_dict[user_group_name][m]['auc'] / \
...@@ -265,7 +279,7 @@ class ModelMonitor: ...@@ -265,7 +279,7 @@ class ModelMonitor:
for m in range(self.first_month, self.current_month + 1): for m in range(self.first_month, self.current_month + 1):
bins = self.make_bin(df_copy.loc[df_copy['month_label'] == m, field]) bins = self.make_bin(df_copy.loc[df_copy['month_label'] == m, field])
if bins: if bins:
print('%d月为基准月.' % m) print('%s以%d月为基准月.' % (self.model_feild_name_dict[field], m))
break break
if not bins: if not bins:
self.na_enough_data_psi_set.add(self.model_feild_name_dict[field]) self.na_enough_data_psi_set.add(self.model_feild_name_dict[field])
...@@ -326,6 +340,7 @@ class ModelMonitor: ...@@ -326,6 +340,7 @@ class ModelMonitor:
remove_list.append(user_group_name) remove_list.append(user_group_name)
for user_group_name in remove_list: for user_group_name in remove_list:
del info_dict[user_group_name] del info_dict[user_group_name]
self.filed_bench_bins_ratio = None
# 画图. # 画图.
print('开始画图.') print('开始画图.')
...@@ -334,8 +349,11 @@ class ModelMonitor: ...@@ -334,8 +349,11 @@ class ModelMonitor:
print(self.model_feild_name_dict[field] + '-' + user_group_name) print(self.model_feild_name_dict[field] + '-' + user_group_name)
plt.figure(figsize=(16, 8)) plt.figure(figsize=(16, 8))
for m in info_dict[user_group_name]: for m in info_dict[user_group_name]:
# print(m)
# print(info_dict[user_group_name][m]['psi'])
# print(info_dict[user_group_name][m]['该月样本量'])
plt.plot(range(len(info_dict[user_group_name][m]['各分箱样本占比'])), plt.plot(range(len(info_dict[user_group_name][m]['各分箱样本占比'])),
[round(x[0], 3) for x in info_dict[user_group_name][m]['各分箱样本占比']], [round(x, 3) for x in info_dict[user_group_name][m]['各分箱样本占比']],
label='%s PSI: %.3f \n 样本量: %d' % ( label='%s PSI: %.3f \n 样本量: %d' % (
m, info_dict[user_group_name][m]['psi'], info_dict[user_group_name][m]['该月样本量'])) m, info_dict[user_group_name][m]['psi'], info_dict[user_group_name][m]['该月样本量']))
plt.legend(loc='upper right') plt.legend(loc='upper right')
...@@ -376,7 +394,7 @@ class ModelMonitor: ...@@ -376,7 +394,7 @@ class ModelMonitor:
for m in range(self.first_month, self.response_month + 1): for m in range(self.first_month, self.response_month + 1):
bins = self.make_bin(df_copy.loc[df_copy['month_label'] == m, field]) bins = self.make_bin(df_copy.loc[df_copy['month_label'] == m, field])
if bins: if bins:
print('%d月为基准月.' % m) print('%s以%d月为基准月.' % (self.model_feild_name_dict[field], m))
break break
if not bins: if not bins:
self.na_enough_data_auc_set.add(self.model_feild_name_dict[field]) self.na_enough_data_auc_set.add(self.model_feild_name_dict[field])
...@@ -441,6 +459,7 @@ class ModelMonitor: ...@@ -441,6 +459,7 @@ class ModelMonitor:
remove_list.append(user_group_name) remove_list.append(user_group_name)
for user_group_name in remove_list: for user_group_name in remove_list:
del info_dict[user_group_name] del info_dict[user_group_name]
self.filed_bench_auc = None
# 画图. # 画图.
print('开始画图.') print('开始画图.')
...@@ -476,8 +495,16 @@ class ModelMonitor: ...@@ -476,8 +495,16 @@ class ModelMonitor:
def abnormal_psi(self): def abnormal_psi(self):
def is_abnormal_psi(data): def is_abnormal_psi(data):
for i in data.index: first = True
if 'PSI' in i and data[i] > self.max_psi: for idx in data.index:
if 'PSI' in idx and pd.notna(data[idx]):
if first:
if data[idx] > self.max_psi * 5:
return True
else:
first = False
else:
if data[idx] > self.max_psi:
return True return True
return False return False
...@@ -494,6 +521,7 @@ class ModelMonitor: ...@@ -494,6 +521,7 @@ class ModelMonitor:
def run(self): def run(self):
# 获取MySQL数据, 取近期num_month个月数据(如今天7.27, 则这27天算进7月). # 获取MySQL数据, 取近期num_month个月数据(如今天7.27, 则这27天算进7月).
if not self.if_load:
self.mysql_df = self.sql_query('''SELECT order_no, applied_at, self.mysql_df = self.sql_query('''SELECT order_no, applied_at,
applied_type, applied_from, applied_channel, transacted, passdue_day applied_type, applied_from, applied_channel, transacted, passdue_day
FROM risk_analysis FROM risk_analysis
...@@ -501,10 +529,13 @@ class ModelMonitor: ...@@ -501,10 +529,13 @@ class ModelMonitor:
AND applied_at <= "%s 00:00:00"''' AND applied_at <= "%s 00:00:00"'''
% (self.first_date, self.current_date)) % (self.first_date, self.current_date))
print('MySQL数据获取成功.') print('MySQL数据获取成功.')
# self.mysql_df.to_csv('./mysql_data.csv', index=False) if self.if_save:
# self.mysql_df = pd.read_csv('./mysql_data.csv') self.mysql_df.to_csv('./mysql_data.csv', index=False)
else:
self.mysql_df = pd.read_csv('./mysql_data.csv')
# 获取MongoDB数据, 取近期num_month个月数据(如今天7.27, 则这27天算进7月). # 获取MongoDB数据, 取近期num_month个月数据(如今天7.27, 则这27天算进7月).
if not self.if_load:
condition = {'wf_created_at': {'$gte': '%s 00:00:00' % self.first_date, condition = {'wf_created_at': {'$gte': '%s 00:00:00' % self.first_date,
'$lte': '%s 00:00:00' % self.current_date}} '$lte': '%s 00:00:00' % self.current_date}}
fields = {'wf_biz_no': 1, 'wf_created_at': 1} fields = {'wf_biz_no': 1, 'wf_created_at': 1}
...@@ -512,9 +543,10 @@ class ModelMonitor: ...@@ -512,9 +543,10 @@ class ModelMonitor:
fields[f] = 1 fields[f] = 1
self.mongo_df = self.mongo_query(condition, fields) self.mongo_df = self.mongo_query(condition, fields)
print('MongoDB数据获取成功.') print('MongoDB数据获取成功.')
if self.if_save:
# self.mongo_df.to_csv('./mongo_data.csv', index=False) self.mongo_df.to_csv('./mongo_data.csv', index=False)
# self.mongo_df = pd.read_csv('./mongo_data.csv') else:
self.mongo_df = pd.read_csv('./mongo_data.csv')
# MySQL数据去重. # MySQL数据去重.
self.mysql_df = self.mysql_df.sort_values('passdue_day') self.mysql_df = self.mysql_df.sort_values('passdue_day')
...@@ -567,15 +599,14 @@ class ModelMonitor: ...@@ -567,15 +599,14 @@ class ModelMonitor:
# 画图. # 画图.
## AUC.
print('开始画图-AUC.')
for field in self.model_feild_list:
self.plot_auc(field)
## PSI. ## PSI.
print('开始画图-PSI.') print('开始画图-PSI.')
for field in self.model_feild_list: for field in self.model_feild_list:
self.plot_psi(field) self.plot_psi(field)
## AUC.
print('开始画图-AUC.')
for field in self.model_feild_list:
self.plot_auc(field)
# 输出数据不足的模型. # 输出数据不足的模型.
print('PSI 数据不足以统计的模型:') print('PSI 数据不足以统计的模型:')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment