Commit cb1c5b5d authored by 舒皓月's avatar 舒皓月

添加新功能 07 27

parent f0404627
tmp.py
test.py
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (data_test_work_space)" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TestRunnerService">
<option name="projectConfiguration" value="pytest" />
<option name="PROJECT_TEST_RUNNER" value="pytest" />
</component>
</module>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/model_monitor.iml" filepath="$PROJECT_DIR$/.idea/model_monitor.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ChangeListManager">
<list default="true" id="e1b3e57f-dd82-4187-916a-8212c6c521a7" name="Default Changelist" comment="">
<change afterPath="$PROJECT_DIR$/.gitignore" afterDir="false" />
<change afterPath="$PROJECT_DIR$/model_score.xlsx" afterDir="false" />
<change beforePath="$PROJECT_DIR$/README.md" beforeDir="false" afterPath="$PROJECT_DIR$/README.md" afterDir="false" />
<change beforePath="$PROJECT_DIR$/doc/image/31EA97A8-19B7-45c6-8302-4148D19BAABA.png" beforeDir="false" />
<change beforePath="$PROJECT_DIR$/doc/image/C6640ABE-9017-42b5-A92A-2DE5601A15D8.png" beforeDir="false" />
<change beforePath="$PROJECT_DIR$/model_monitor_PSI_AUC.py" beforeDir="false" afterPath="$PROJECT_DIR$/model_monitor_PSI_AUC.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/test.py" beforeDir="false" />
</list>
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="FileEditorManager">
<leaf>
<file pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/tmp.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-136">
<caret line="495" column="19" selection-start-line="495" selection-start-column="19" selection-end-line="495" selection-end-column="19" />
<folding>
<element signature="e#2742#2953#0" />
<element signature="e#2931#3547#0" />
<element signature="e#2931#3033#1" />
<element signature="e#3111#3382#0" />
<element signature="e#3411#3547#0" />
<element signature="e#3495#3547#0" />
<element signature="e#3599#4065#0" />
<element signature="e#3599#3718#1" />
<element signature="e#3744#4025#0" />
<element signature="e#4164#6209#0" />
<element signature="e#4164#4319#1" />
<element signature="e#4813#5476#0" />
<element signature="e#4993#5052#0" />
<element signature="e#5593#6156#0" />
<element signature="e#5624#5700#0" />
<element signature="e#5743#6156#0" />
<element signature="e#6069#6156#0" />
<element signature="e#7045#9518#0" />
<element signature="e#6308#6463#1" />
<element signature="e#7059#8337#0" />
<element signature="e#7249#7308#0" />
<element signature="e#7928#8198#0" />
<element signature="e#8243#8337#0" />
<element signature="e#8418#8702#0" />
<element signature="e#8457#8534#0" />
<element signature="e#9654#9805#0" />
<element signature="e#9351#9395#0" />
<element signature="e#9595#9607#0" />
<element signature="e#10489#10499#0" />
<element signature="e#11089#11353#0" />
<element signature="e#11135#11353#0" />
<element signature="e#11490#11717#0" />
<element signature="e#11819#11905#0" />
<element signature="e#12107#12803#0" />
<element signature="e#12277#12548#0" />
<element signature="e#12877#13384#0" />
<element signature="e#13413#13513#0" />
<element signature="e#13575#13737#0" />
<element signature="e#14089#14240#0" />
<element signature="e#14196#14240#0" />
<element signature="e#14440#14452#0" />
<element signature="e#15532#15542#0" />
<element signature="e#16138#16423#0" />
<element signature="e#16184#16423#0" />
<element signature="e#16560#16784#0" />
<element signature="e#16886#16972#0" />
<element signature="e#17176#18131#0" />
<element signature="e#17346#17876#0" />
<element signature="e#17465#17514#0" />
<element signature="e#18750#18852#0" />
<element signature="e#18958#19210#0" />
<element signature="e#20524#20613#0" />
<element signature="e#21073#21362#0" />
<element signature="e#21143#21284#0" />
<element signature="e#21923#22137#0" />
<element signature="e#21768#22011#0" />
<element signature="e#20784#21020#0" />
<element signature="e#22603#22608#0" />
<element signature="e#21479#21507#0" />
</folding>
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/test.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="216">
<caret line="6" column="40" selection-start-line="6" selection-start-column="28" selection-end-line="6" selection-end-column="40" />
<folding>
<element signature="e#0#15#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/model_monitor_PSI_AUC.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="152">
<caret line="481" column="24" selection-start-line="481" selection-start-column="24" selection-end-line="481" selection-end-column="24" />
<folding>
<element signature="e#16#34#0" expanded="true" />
<element signature="e#509#2431#0" />
<element signature="e#2143#2314#0" />
<element signature="e#2471#2578#0" />
<element signature="e#2634#2768#0" />
<element signature="e#2804#2887#0" />
<element signature="e#2933#3549#0" />
<element signature="e#2933#3035#1" />
<element signature="e#3113#3384#0" />
<element signature="e#3413#3549#0" />
<element signature="e#3497#3549#0" />
<element signature="e#3601#4067#0" />
<element signature="e#3601#3720#1" />
<element signature="e#3746#4027#0" />
<element signature="e#4166#6211#0" />
<element signature="e#4166#4321#1" />
<element signature="e#4815#5478#0" />
<element signature="e#4995#5054#0" />
<element signature="e#5595#6158#0" />
<element signature="e#5626#5702#0" />
<element signature="e#5745#6158#0" />
<element signature="e#6071#6158#0" />
<element signature="e#6310#8760#0" />
<element signature="e#6310#6465#1" />
<element signature="e#7061#8339#0" />
<element signature="e#7251#7310#0" />
<element signature="e#7930#8200#0" />
<element signature="e#8245#8339#0" />
<element signature="e#8420#8704#0" />
<element signature="e#8459#8536#0" />
<element signature="e#8801#13386#0" />
<element signature="e#9246#9397#0" />
<element signature="e#9353#9397#0" />
<element signature="e#9431#9516#0" />
<element signature="e#9597#9609#0" />
<element signature="e#10491#10501#0" />
<element signature="e#11091#11355#0" />
<element signature="e#11137#11355#0" />
<element signature="e#11492#11719#0" />
<element signature="e#11821#11907#0" />
<element signature="e#12109#12805#0" />
<element signature="e#12279#12550#0" />
<element signature="e#12879#13386#0" />
<element signature="e#12969#13077#0" />
<element signature="e#13143#13307#0" />
<element signature="e#13427#18635#0" />
<element signature="e#14091#14242#0" />
<element signature="e#14198#14242#0" />
<element signature="e#14276#14361#0" />
<element signature="e#14442#14454#0" />
<element signature="e#15534#15544#0" />
<element signature="e#16140#16425#0" />
<element signature="e#16186#16425#0" />
<element signature="e#16562#16786#0" />
<element signature="e#16888#16974#0" />
<element signature="e#17178#18133#0" />
<element signature="e#17348#17878#0" />
<element signature="e#17467#17516#0" />
<element signature="e#18207#18635#0" />
<element signature="e#18218#18326#0" />
<element signature="e#18392#18556#0" />
<element signature="e#18742#19104#0" />
<element signature="e#19265#19320#0" />
<element signature="e#19452#19653#0" />
<element signature="e#19470#19652#0" />
<element signature="e#19894#19949#0" />
<element signature="e#20774#21074#0" />
<element signature="e#20811#21047#0" />
<element signature="e#21084#21089#0" />
<element signature="e#21487#21515#0" />
</folding>
</state>
</provider>
</entry>
</file>
</leaf>
</component>
<component name="FileTemplateManagerImpl">
<option name="RECENT_TEMPLATES">
<list>
<option value="Python Script" />
</list>
</option>
</component>
<component name="Git.Settings">
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
</component>
<component name="IdeDocumentHistory">
<option name="CHANGED_PATHS">
<list>
<option value="$PROJECT_DIR$/.gitignore" />
<option value="$PROJECT_DIR$/model_monitor_PSI_AUC.py" />
<option value="$PROJECT_DIR$/test.py" />
<option value="$PROJECT_DIR$/tmp.py" />
</list>
</option>
</component>
<component name="ProjectConfigurationFiles">
<option name="files">
<list>
<option value="$PROJECT_DIR$/.idea/model_monitor.iml" />
<option value="$PROJECT_DIR$/.idea/vcs.xml" />
<option value="$PROJECT_DIR$/.idea/misc.xml" />
<option value="$PROJECT_DIR$/.idea/modules.xml" />
</list>
</option>
</component>
<component name="ProjectFrameBounds" extendedState="6">
<option name="x" value="310" />
<option name="y" value="135" />
<option name="width" value="1230" />
<option name="height" value="675" />
</component>
<component name="ProjectView">
<navigator proportions="" version="1">
<foldersAlwaysOnTop value="true" />
</navigator>
<panes>
<pane id="Scope" />
<pane id="ProjectPane">
<subPane>
<expand>
<path>
<item name="model_monitor" type="b2602c69:ProjectViewProjectNode" />
<item name="model_monitor" type="462c0819:PsiDirectoryNode" />
</path>
</expand>
<select />
</subPane>
</pane>
</panes>
</component>
<component name="PropertiesComponent">
<property name="last_opened_file_path" value="$PROJECT_DIR$" />
</component>
<component name="RunDashboard">
<option name="ruleStates">
<list>
<RuleState>
<option name="name" value="ConfigurationTypeDashboardGroupingRule" />
</RuleState>
<RuleState>
<option name="name" value="StatusDashboardGroupingRule" />
</RuleState>
</list>
</option>
</component>
<component name="RunManager">
<configuration name="test" type="PythonConfigurationType" factoryName="Python" temporary="true">
<module name="model_monitor" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/test.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<option name="MODULE_MODE" value="false" />
<option name="REDIRECT_INPUT" value="false" />
<option name="INPUT_FILE" value="" />
<method v="2" />
</configuration>
<recent_temporary>
<list>
<item itemvalue="Python.test" />
</list>
</recent_temporary>
</component>
<component name="SvnConfiguration">
<configuration />
</component>
<component name="TaskManager">
<task active="true" id="Default" summary="Default task">
<changelist id="e1b3e57f-dd82-4187-916a-8212c6c521a7" name="Default Changelist" comment="" />
<created>1564209012222</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1564209012222</updated>
</task>
<servers />
</component>
<component name="ToolWindowManager">
<frame x="-7" y="-7" width="1550" height="838" extended-state="6" />
<editor active="true" />
<layout>
<window_info id="Favorites" side_tool="true" />
<window_info content_ui="combo" id="Project" order="0" weight="0.24966975" />
<window_info id="Structure" order="1" side_tool="true" weight="0.25" />
<window_info anchor="bottom" id="Version Control" />
<window_info anchor="bottom" id="Python Console" />
<window_info anchor="bottom" id="Terminal" />
<window_info anchor="bottom" id="Event Log" side_tool="true" />
<window_info anchor="bottom" id="Message" order="0" />
<window_info anchor="bottom" id="Find" order="1" />
<window_info anchor="bottom" id="Run" order="2" weight="0.49433428" />
<window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
<window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
<window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
<window_info anchor="bottom" id="TODO" order="6" />
<window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
<window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
<window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
</layout>
</component>
<component name="editorHistoryManager">
<entry file="file://$PROJECT_DIR$/.gitignore">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="36">
<caret line="1" column="7" selection-start-line="1" selection-start-column="7" selection-end-line="1" selection-end-column="7" />
</state>
</provider>
</entry>
<entry file="file://$APPLICATION_HOME_DIR$/helpers/typeshed/stdlib/2and3/datetime.pyi">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="226">
<caret line="140" column="12" selection-start-line="140" selection-start-column="12" selection-end-line="140" selection-end-column="12" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/model_monitor_PSI_AUC.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="152">
<caret line="481" column="24" selection-start-line="481" selection-start-column="24" selection-end-line="481" selection-end-column="24" />
<folding>
<element signature="e#16#34#0" expanded="true" />
<element signature="e#509#2431#0" />
<element signature="e#2143#2314#0" />
<element signature="e#2471#2578#0" />
<element signature="e#2634#2768#0" />
<element signature="e#2804#2887#0" />
<element signature="e#2933#3549#0" />
<element signature="e#2933#3035#1" />
<element signature="e#3113#3384#0" />
<element signature="e#3413#3549#0" />
<element signature="e#3497#3549#0" />
<element signature="e#3601#4067#0" />
<element signature="e#3601#3720#1" />
<element signature="e#3746#4027#0" />
<element signature="e#4166#6211#0" />
<element signature="e#4166#4321#1" />
<element signature="e#4815#5478#0" />
<element signature="e#4995#5054#0" />
<element signature="e#5595#6158#0" />
<element signature="e#5626#5702#0" />
<element signature="e#5745#6158#0" />
<element signature="e#6071#6158#0" />
<element signature="e#6310#8760#0" />
<element signature="e#6310#6465#1" />
<element signature="e#7061#8339#0" />
<element signature="e#7251#7310#0" />
<element signature="e#7930#8200#0" />
<element signature="e#8245#8339#0" />
<element signature="e#8420#8704#0" />
<element signature="e#8459#8536#0" />
<element signature="e#8801#13386#0" />
<element signature="e#9246#9397#0" />
<element signature="e#9353#9397#0" />
<element signature="e#9431#9516#0" />
<element signature="e#9597#9609#0" />
<element signature="e#10491#10501#0" />
<element signature="e#11091#11355#0" />
<element signature="e#11137#11355#0" />
<element signature="e#11492#11719#0" />
<element signature="e#11821#11907#0" />
<element signature="e#12109#12805#0" />
<element signature="e#12279#12550#0" />
<element signature="e#12879#13386#0" />
<element signature="e#12969#13077#0" />
<element signature="e#13143#13307#0" />
<element signature="e#13427#18635#0" />
<element signature="e#14091#14242#0" />
<element signature="e#14198#14242#0" />
<element signature="e#14276#14361#0" />
<element signature="e#14442#14454#0" />
<element signature="e#15534#15544#0" />
<element signature="e#16140#16425#0" />
<element signature="e#16186#16425#0" />
<element signature="e#16562#16786#0" />
<element signature="e#16888#16974#0" />
<element signature="e#17178#18133#0" />
<element signature="e#17348#17878#0" />
<element signature="e#17467#17516#0" />
<element signature="e#18207#18635#0" />
<element signature="e#18218#18326#0" />
<element signature="e#18392#18556#0" />
<element signature="e#18742#19104#0" />
<element signature="e#19265#19320#0" />
<element signature="e#19452#19653#0" />
<element signature="e#19470#19652#0" />
<element signature="e#19894#19949#0" />
<element signature="e#20774#21074#0" />
<element signature="e#20811#21047#0" />
<element signature="e#21084#21089#0" />
<element signature="e#21487#21515#0" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$USER_HOME$/.PyCharmCE2019.1/system/python_stubs/1626812534/builtins.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="226">
<caret line="504" column="4" selection-start-line="504" selection-start-column="4" selection-end-line="504" selection-end-column="4" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/test.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="216">
<caret line="6" column="40" selection-start-line="6" selection-start-column="28" selection-end-line="6" selection-end-column="40" />
<folding>
<element signature="e#0#15#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tmp.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-136">
<caret line="495" column="19" selection-start-line="495" selection-start-column="19" selection-end-line="495" selection-end-column="19" />
<folding>
<element signature="e#2742#2953#0" />
<element signature="e#2931#3547#0" />
<element signature="e#2931#3033#1" />
<element signature="e#3111#3382#0" />
<element signature="e#3411#3547#0" />
<element signature="e#3495#3547#0" />
<element signature="e#3599#4065#0" />
<element signature="e#3599#3718#1" />
<element signature="e#3744#4025#0" />
<element signature="e#4164#6209#0" />
<element signature="e#4164#4319#1" />
<element signature="e#4813#5476#0" />
<element signature="e#4993#5052#0" />
<element signature="e#5593#6156#0" />
<element signature="e#5624#5700#0" />
<element signature="e#5743#6156#0" />
<element signature="e#6069#6156#0" />
<element signature="e#7045#9518#0" />
<element signature="e#6308#6463#1" />
<element signature="e#7059#8337#0" />
<element signature="e#7249#7308#0" />
<element signature="e#7928#8198#0" />
<element signature="e#8243#8337#0" />
<element signature="e#8418#8702#0" />
<element signature="e#8457#8534#0" />
<element signature="e#9654#9805#0" />
<element signature="e#9351#9395#0" />
<element signature="e#9595#9607#0" />
<element signature="e#10489#10499#0" />
<element signature="e#11089#11353#0" />
<element signature="e#11135#11353#0" />
<element signature="e#11490#11717#0" />
<element signature="e#11819#11905#0" />
<element signature="e#12107#12803#0" />
<element signature="e#12277#12548#0" />
<element signature="e#12877#13384#0" />
<element signature="e#13413#13513#0" />
<element signature="e#13575#13737#0" />
<element signature="e#14089#14240#0" />
<element signature="e#14196#14240#0" />
<element signature="e#14440#14452#0" />
<element signature="e#15532#15542#0" />
<element signature="e#16138#16423#0" />
<element signature="e#16184#16423#0" />
<element signature="e#16560#16784#0" />
<element signature="e#16886#16972#0" />
<element signature="e#17176#18131#0" />
<element signature="e#17346#17876#0" />
<element signature="e#17465#17514#0" />
<element signature="e#18750#18852#0" />
<element signature="e#18958#19210#0" />
<element signature="e#20524#20613#0" />
<element signature="e#21073#21362#0" />
<element signature="e#21143#21284#0" />
<element signature="e#21923#22137#0" />
<element signature="e#21768#22011#0" />
<element signature="e#20784#21020#0" />
<element signature="e#22603#22608#0" />
<element signature="e#21479#21507#0" />
</folding>
</state>
</provider>
</entry>
</component>
</project>
\ No newline at end of file
......@@ -15,11 +15,21 @@
- Lift Chart
# 版本信息 - 新代码
# 版本信息
- V 0.0.1
基本完成对PSI和Lift Chart关于模型分在MongoDB的重写.
- V 0.0.2
- 删除last_month参数, 设定运行当天的前一天为最新日期, 往前的(num_month - 1)月的1号为起始日期, 最新日期往前45天(若设定passdue_day=15)那天为有响应的最新日期.
- 对PSI的计算, 时间跨度为(起始日期 --> 最新日期)
- 对AUC的计算, 时间跨度为(起始日期 --> 有响应最新日期)
- 添加对PSI和AUCR(后面月份相对基准月的AUC比率)的异常检测.
# VLM
......@@ -29,49 +39,44 @@
- 因为这两个指标的统计都需要用到模型分, 所以放到一起.
## 代码流程
## 计算流程
- 首先对需要计算的模型分, 在指定的统一时间跨度内进行数据抽取(在MySQL和MongoDB中). 包括如下一些主要字段:
- 模型分1, 模型分2, ...
- 订单号, 申请时间, 申请类型, 渠道类型, 逾期天数.
- 根据预定义逾期阈值, 得到是否逾期标签.
- 根据该模型分有记录的第一个月, 计算分箱规则(由模型分等频分箱区间, PSI在申请集上计算, AUC在放款集上计算).
- 分别对全样本, 首申/复申/复贷, 以及各达标客群(客群数量大于预设阈值), 计算每月统计信息:
- PSI:
- 样本量.
- 各分箱样本量
- 各分箱样本量占比
- 以该客群模型分有记录的第一个月为基准的PSI.
- Lift Chart:
- 样本量
- 各分箱样本量
- 各分箱逾期率
- AUC
- 以该客群模型分有记录的第一个月为基准的AUC ratio.
- 统计表格信息, 方便筛选排序查看.
- 包含以下字段:
- 模型分名称.
- a月样本量, b月样本量...
- a月PSI, b月PSI...
- a月AUC, b月AUC...
- NOTE:
- 当某月样本量很小, 或者没有样本时, 标记为NaN. 对应的PSI, AUC也为NaN.
- 当某月样本量比较小, 导致PSI, AUC计算异常(如某些分箱没有样本, 全为非逾期样本), 则标记为-999.
- 基准月的PSI为0.
- 示例:
![PSI](doc/image/C6640ABE-9017-42b5-A92A-2DE5601A15D8.png)
![](doc/image/31EA97A8-19B7-45c6-8302-4148D19BAABA.png)
## 代码使用方法
- 分别对全样本, 首申/复申/复贷, 以及各达标客群(客群数量大于预设阈值), 计算每月统计信息.
- PSI统计信息:
- 样本量.
- 各分箱样本量
- 各分箱样本量占比
- 以该客群模型分有记录的第一个月为基准的PSI.
- Lift Chart统计信息:
- 样本量
- 各分箱样本量
- 各分箱逾期率
- AUC
- 以该客群模型分有记录的第一个月为基准的AUC ratio.
- 统计表格信息, 方便筛选排序查看, 包含以下字段:
- 模型分名称.
- a月样本量, b月样本量...
- a月PSI, b月PSI...
- a月AUC, b月AUC...
- 某个客群是否异常(AUC明显下降, PSI较大).
- NOTE:
- 当某月样本量很小, 或者没有样本时, 标记为NaN. 对应的PSI, AUC也为NaN.
- 当某月样本量比较小, 导致PSI, AUC计算异常(如某些分箱没有样本, 全为非逾期样本), 则标记为-999.
- 基准月的PSI为0.
## 使用方法
- 准备一个Excel表格, 其中放置模型分名称, 以及对应的在数据库中的字段名.
- 创建一个模型监控对象(这样你就有对象了).
- 创建一个模型监控对象(这样我们就有对象了^v^).
```python
mm = ModelMonitor(excel_path='./model_score.xlsx', save_path='./image/', last_month=7, num_month=4, min_user_group=200)
mm = ModelMonitor(excel_path='./model_score.xlsx', save_path='./image/', num_month=4, min_user_group=500, max_psi=0.1, min_aucr=0.8)
```
- excel_path: Excel文件路径.
......@@ -80,19 +85,17 @@
不用自己再另外手动创建文件夹, 代码会判断文件夹是否存在并创建.
- last_month: 想要统计的最后一个月.
- num_month: 想要统计几个月.
如last_month=7, num_month=3, 表示统计4, 5, 6三个月的信息.
NOTE: AUC的计算逻辑为了保证样本有响应, 在此基础上还要往前推一个月, 会统计4, 5月的信息.
- num_month: 想要统计近期几个月(包含运行程序时所在的日期前一天, 如运行时为7.1, 则不包含7月数据, 如运行时为7.10, 则将7月9天算作7月数据).
- min_user_group: 最小客群数量.
这个参数越大, 颗粒越粗, 最后的统计图(客群数量)会越少.
反之颗粒越小, 最后统计图会越多.
- max_psi: 最大PSI, 大于则视为该客群异常.
- min_aucr: 最小AUCR, 小于则视为该客群异常.
- 执行run函数.
......@@ -100,19 +103,20 @@
mm.run()
```
- 输出
- 输出:
- 图片保存在./image中.
- PSI: ./image/PSI
- Lift Chart: ./image/AUC
- 统计信息.
- PSI统计信息: ./psi_info.csv
- AUC统计信息: ./auc_info.csv
- 图片保存在./image中.
- PSI: ./image/PSI
- Lift Chart: ./image/AUC
- 统计信息.
- PSI统计信息: ./psi_info.csv
- AUC统计信息: ./auc_info.csv
# TODO
- 添加对存在MySQL中模型分计算PSI, AUC的代码.
- 完成对VLM的重写.
- 部分(量信分, app模型)模型分报错, 进一步与模型维护者交流, 看是否字段名或者其它地方有问题.
# 贡献
......
......@@ -19,8 +19,8 @@ from collections import OrderedDict
class ModelMonitor:
def __init__(self, excel_path='../model_score.xlsx', sheet_name='mongo_model',
passdue_day=15, save_path='../image/',
def __init__(self, excel_path='./model_score.xlsx', sheet_name='mongo_model',
passdue_day=15, save_path='./image/',
last_month=7, num_month=4, min_user_group=500):
# 考虑到数据库配置基本不变, 所以不设置创建对象时对应输入变量.
......@@ -426,28 +426,27 @@ class ModelMonitor:
def run(self):
# 获取MySQL数据, 取last_month往前num_month个月数据.
# self.mysql_df = self.sql_query('''SELECT order_no, applied_at,
# applied_type, applied_from, applied_channel, transacted, passdue_day
# FROM risk_analysis
# WHERE applied_at > "2019-%s-01 00:00:00"
# AND applied_at < "2019-%s-01 00:00:00"'''
# % (self.int2str(self.last_month - self.num_month), self.int2str(self.last_month)))
# print('MySQL数据获取成功.')
self.mysql_df = self.sql_query('''SELECT order_no, applied_at,
applied_type, applied_from, applied_channel, transacted, passdue_day
FROM risk_analysis
WHERE applied_at > "2019-%s-01 00:00:00"
AND applied_at < "2019-%s-01 00:00:00"'''
% (self.int2str(self.last_month - self.num_month), self.int2str(self.last_month)))
print('MySQL数据获取成功.')
# self.mysql_df.to_csv('./mysql_data.csv', index=False)
self.mysql_df = pd.read_csv('./mysql_data.csv')
# self.mysql_df = pd.read_csv('./mysql_data.csv')
# 获取MongoDB数据, 取last_month往前num_month个月数据.
# condition = {'wf_created_at': {'$gte': '2019-%s-01 00:00:00' % self.int2str(self.last_month - self.num_month),
# '$lte': '2019-%s-01 00:00:00' % self.int2str(self.last_month)}}
# fields = {'wf_biz_no': 1, 'wf_created_at': 1}
# for f in self.model_feild_list: # 加入Excel中预置的模型分名称
# fields[f] = 1
# self.mongo_df = self.mongo_query(condition, fields)
# print('MongoDB数据获取成功.')
# self.mongo_df.to_csv('./mongo_data.csv', index=False)
condition = {'wf_created_at': {'$gte': '2019-%s-01 00:00:00' % self.int2str(self.last_month - self.num_month),
'$lte': '2019-%s-01 00:00:00' % self.int2str(self.last_month)}}
fields = {'wf_biz_no': 1, 'wf_created_at': 1}
for f in self.model_feild_list: # 加入Excel中预置的模型分名称
fields[f] = 1
self.mongo_df = self.mongo_query(condition, fields)
print('MongoDB数据获取成功.')
self.mongo_df = pd.read_csv('./mongo_data.csv')
# self.mongo_df.to_csv('./mongo_data.csv', index=False)
# self.mongo_df = pd.read_csv('./mongo_data.csv')
# MySQL数据去重.
self.mysql_df = self.mysql_df.sort_values('passdue_day')
......@@ -459,8 +458,27 @@ class ModelMonitor:
left_on='order_no', right_on='wf_biz_no', how='left')
## 定义逾期用户.
self.merge_data['overdue'] = self.merge_data['passdue_day'] > self.passdue_day
# 清洗数据.
def clean_data(data):
try:
return float(data)
except:
return np.nan
na_field_list = []
for field in self.model_feild_list:
self.merge_data[field] = self.merge_data[field].astype('float')
if field in self.merge_data.columns.tolist():
print('正在清洗%s' % self.model_feild_name_dict[field])
self.merge_data[field] = self.merge_data[field].apply(clean_data)
else:
na_field_list.append(field)
## 去除因为一些原因未抽取到的字段.
print('不包含以下字段:')
for field in na_field_list:
self.model_feild_list.remove(field)
self.model_name_list.remove(self.model_feild_name_dict[field])
del self.model_feild_name_dict[field]
print(self.model_feild_name_dict[field])
print('数据拼接完成.')
# 数据按月划分.
......@@ -489,6 +507,6 @@ class ModelMonitor:
print('统计信息保存成功.')
if __name__ == '__main__':
pass
mm = ModelMonitor(excel_path='./model_score.xlsx', save_path='./image/', last_month=7, num_month=2)
# if __name__ == '__main__':
# pass
# mm = ModelMonitor(excel_path='./model_score.xlsx', save_path='./image/', last_month=7, num_month=2)
class Solution:
def __init__(self):
pass
def find_max_length(self, array, k):
if not array:
return 0
sum_subarray = array[0]
left, right = 0, 1
max_length = 0
while right < len(array):
if sum_subarray == k:
max_length = max(max_length, right - left)
sum_subarray += array[right]
right += 1
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment