Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
M
model_monitoring_monthly
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
decision-science
model_monitoring_monthly
Commits
edc7499b
Commit
edc7499b
authored
Jul 27, 2019
by
舒皓月
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
...
parent
cb1c5b5d
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
367 additions
and
311 deletions
+367
-311
workspace.xml
.idea/workspace.xml
+132
-166
README.md
README.md
+1
-0
model_monitor_PSI_AUC.py
model_monitor_PSI_AUC.py
+234
-145
No files found.
.idea/workspace.xml
View file @
edc7499b
...
...
@@ -2,13 +2,8 @@
<project
version=
"4"
>
<component
name=
"ChangeListManager"
>
<list
default=
"true"
id=
"e1b3e57f-dd82-4187-916a-8212c6c521a7"
name=
"Default Changelist"
comment=
""
>
<change
afterPath=
"$PROJECT_DIR$/.gitignore"
afterDir=
"false"
/>
<change
afterPath=
"$PROJECT_DIR$/model_score.xlsx"
afterDir=
"false"
/>
<change
beforePath=
"$PROJECT_DIR$/README.md"
beforeDir=
"false"
afterPath=
"$PROJECT_DIR$/README.md"
afterDir=
"false"
/>
<change
beforePath=
"$PROJECT_DIR$/doc/image/31EA97A8-19B7-45c6-8302-4148D19BAABA.png"
beforeDir=
"false"
/>
<change
beforePath=
"$PROJECT_DIR$/doc/image/C6640ABE-9017-42b5-A92A-2DE5601A15D8.png"
beforeDir=
"false"
/>
<change
beforePath=
"$PROJECT_DIR$/model_monitor_PSI_AUC.py"
beforeDir=
"false"
afterPath=
"$PROJECT_DIR$/model_monitor_PSI_AUC.py"
afterDir=
"false"
/>
<change
beforePath=
"$PROJECT_DIR$/test.py"
beforeDir=
"false"
/>
</list>
<option
name=
"EXCLUDED_CONVERTED_TO_IGNORED"
value=
"true"
/>
<option
name=
"SHOW_DIALOG"
value=
"false"
/>
...
...
@@ -18,11 +13,11 @@
</component>
<component
name=
"FileEditorManager"
>
<leaf>
<file
pinned=
"false"
current-in-tab=
"
tru
e"
>
<file
pinned=
"false"
current-in-tab=
"
fals
e"
>
<entry
file=
"file://$PROJECT_DIR$/tmp.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
-136
"
>
<caret
line=
"
495"
column=
"19"
selection-start-line=
"495"
selection-start-column=
"19"
selection-end-line=
"495"
selection-end-column=
"1
9"
/>
<state
relative-caret-position=
"
332
"
>
<caret
line=
"
334"
column=
"39"
selection-start-line=
"334"
selection-start-column=
"39"
selection-end-line=
"334"
selection-end-column=
"3
9"
/>
<folding>
<element
signature=
"e#2742#2953#0"
/>
<element
signature=
"e#2931#3547#0"
/>
...
...
@@ -57,8 +52,6 @@
<element
signature=
"e#11135#11353#0"
/>
<element
signature=
"e#11490#11717#0"
/>
<element
signature=
"e#11819#11905#0"
/>
<element
signature=
"e#12107#12803#0"
/>
<element
signature=
"e#12277#12548#0"
/>
<element
signature=
"e#12877#13384#0"
/>
<element
signature=
"e#13413#13513#0"
/>
<element
signature=
"e#13575#13737#0"
/>
...
...
@@ -100,81 +93,68 @@
</provider>
</entry>
</file>
<file
pinned=
"false"
current-in-tab=
"
fals
e"
>
<file
pinned=
"false"
current-in-tab=
"
tru
e"
>
<entry
file=
"file://$PROJECT_DIR$/model_monitor_PSI_AUC.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
152
"
>
<caret
line=
"
481"
column=
"24"
selection-start-line=
"481"
selection-start-column=
"24"
selection-end-line=
"481"
selection-end-column=
"24
"
/>
<state
relative-caret-position=
"
368
"
>
<caret
line=
"
337"
column=
"39"
selection-start-line=
"337"
selection-start-column=
"39"
selection-end-line=
"337"
selection-end-column=
"39
"
/>
<folding>
<element
signature=
"e#16#34#0"
expanded=
"true"
/>
<element
signature=
"e#509#2431#0"
/>
<element
signature=
"e#2143#2314#0"
/>
<element
signature=
"e#2471#2578#0"
/>
<element
signature=
"e#2634#2768#0"
/>
<element
signature=
"e#2804#2887#0"
/>
<element
signature=
"e#2933#3549#0"
/>
<element
signature=
"e#2933#3035#1"
/>
<element
signature=
"e#3113#3384#0"
/>
<element
signature=
"e#3413#3549#0"
/>
<element
signature=
"e#3497#3549#0"
/>
<element
signature=
"e#3601#4067#0"
/>
<element
signature=
"e#3601#3720#1"
/>
<element
signature=
"e#3746#4027#0"
/>
<element
signature=
"e#4166#6211#0"
/>
<element
signature=
"e#4166#4321#1"
/>
<element
signature=
"e#4815#5478#0"
/>
<element
signature=
"e#4995#5054#0"
/>
<element
signature=
"e#5595#6158#0"
/>
<element
signature=
"e#5626#5702#0"
/>
<element
signature=
"e#5745#6158#0"
/>
<element
signature=
"e#6071#6158#0"
/>
<element
signature=
"e#6310#8760#0"
/>
<element
signature=
"e#6310#6465#1"
/>
<element
signature=
"e#7061#8339#0"
/>
<element
signature=
"e#7251#7310#0"
/>
<element
signature=
"e#7930#8200#0"
/>
<element
signature=
"e#8245#8339#0"
/>
<element
signature=
"e#8420#8704#0"
/>
<element
signature=
"e#8459#8536#0"
/>
<element
signature=
"e#8801#13386#0"
/>
<element
signature=
"e#9246#9397#0"
/>
<element
signature=
"e#9353#9397#0"
/>
<element
signature=
"e#9431#9516#0"
/>
<element
signature=
"e#9597#9609#0"
/>
<element
signature=
"e#10491#10501#0"
/>
<element
signature=
"e#11091#11355#0"
/>
<element
signature=
"e#11137#11355#0"
/>
<element
signature=
"e#11492#11719#0"
/>
<element
signature=
"e#11821#11907#0"
/>
<element
signature=
"e#12109#12805#0"
/>
<element
signature=
"e#12279#12550#0"
/>
<element
signature=
"e#12879#13386#0"
/>
<element
signature=
"e#12969#13077#0"
/>
<element
signature=
"e#13143#13307#0"
/>
<element
signature=
"e#13427#18635#0"
/>
<element
signature=
"e#14091#14242#0"
/>
<element
signature=
"e#14198#14242#0"
/>
<element
signature=
"e#14276#14361#0"
/>
<element
signature=
"e#14442#14454#0"
/>
<element
signature=
"e#15534#15544#0"
/>
<element
signature=
"e#16140#16425#0"
/>
<element
signature=
"e#16186#16425#0"
/>
<element
signature=
"e#16562#16786#0"
/>
<element
signature=
"e#16888#16974#0"
/>
<element
signature=
"e#17178#18133#0"
/>
<element
signature=
"e#17348#17878#0"
/>
<element
signature=
"e#17467#17516#0"
/>
<element
signature=
"e#18207#18635#0"
/>
<element
signature=
"e#18218#18326#0"
/>
<element
signature=
"e#18392#18556#0"
/>
<element
signature=
"e#18742#19104#0"
/>
<element
signature=
"e#19265#19320#0"
/>
<element
signature=
"e#19452#19653#0"
/>
<element
signature=
"e#19470#19652#0"
/>
<element
signature=
"e#19894#19949#0"
/>
<element
signature=
"e#20774#21074#0"
/>
<element
signature=
"e#20811#21047#0"
/>
<element
signature=
"e#21084#21089#0"
/>
<element
signature=
"e#2839#3050#0"
/>
<element
signature=
"e#4164#4780#0"
/>
<element
signature=
"e#4164#4266#1"
/>
<element
signature=
"e#4344#4615#0"
/>
<element
signature=
"e#4644#4780#0"
/>
<element
signature=
"e#4728#4780#0"
/>
<element
signature=
"e#4832#5298#0"
/>
<element
signature=
"e#4832#4951#1"
/>
<element
signature=
"e#4977#5258#0"
/>
<element
signature=
"e#5397#7442#0"
/>
<element
signature=
"e#5397#5552#1"
/>
<element
signature=
"e#6046#6709#0"
/>
<element
signature=
"e#6226#6285#0"
/>
<element
signature=
"e#6826#7389#0"
/>
<element
signature=
"e#6857#6933#0"
/>
<element
signature=
"e#6976#7389#0"
/>
<element
signature=
"e#7302#7389#0"
/>
<element
signature=
"e#7541#9990#0"
/>
<element
signature=
"e#7541#7696#1"
/>
<element
signature=
"e#8291#9569#0"
/>
<element
signature=
"e#8481#8540#0"
/>
<element
signature=
"e#9160#9430#0"
/>
<element
signature=
"e#9475#9569#0"
/>
<element
signature=
"e#9650#9934#0"
/>
<element
signature=
"e#9689#9766#0"
/>
<element
signature=
"e#10467#10618#0"
/>
<element
signature=
"e#10574#10618#0"
/>
<element
signature=
"e#10925#10937#0"
/>
<element
signature=
"e#11819#11829#0"
/>
<element
signature=
"e#12419#12683#0"
/>
<element
signature=
"e#12465#12683#0"
/>
<element
signature=
"e#12789#13016#0"
/>
<element
signature=
"e#13118#13204#0"
/>
<element
signature=
"e#14176#14668#0"
/>
<element
signature=
"e#14266#14362#0"
/>
<element
signature=
"e#14428#14589#0"
/>
<element
signature=
"e#15686#15837#0"
/>
<element
signature=
"e#15793#15837#0"
/>
<element
signature=
"e#16144#16156#0"
/>
<element
signature=
"e#17236#17246#0"
/>
<element
signature=
"e#17842#18127#0"
/>
<element
signature=
"e#17888#18127#0"
/>
<element
signature=
"e#18233#18457#0"
/>
<element
signature=
"e#18559#18645#0"
/>
<element
signature=
"e#18849#19804#0"
/>
<element
signature=
"e#19019#19549#0"
/>
<element
signature=
"e#19138#19187#0"
/>
<element
signature=
"e#19889#19985#0"
/>
<element
signature=
"e#20051#20305#0"
/>
<element
signature=
"e#22953#23042#0"
/>
<element
signature=
"e#23127#23384#0"
/>
<element
signature=
"e#23189#23322#0"
/>
<element
signature=
"e#23487#23700#0"
/>
<element
signature=
"e#23866#24109#0"
/>
<element
signature=
"e#23903#24104#0"
/>
<element
signature=
"e#24119#24124#0"
/>
<element
signature=
"e#21487#21515#0"
/>
</folding>
</state>
...
...
@@ -197,9 +177,9 @@
<option
name=
"CHANGED_PATHS"
>
<list>
<option
value=
"$PROJECT_DIR$/.gitignore"
/>
<option
value=
"$PROJECT_DIR$/model_monitor_PSI_AUC.py"
/>
<option
value=
"$PROJECT_DIR$/test.py"
/>
<option
value=
"$PROJECT_DIR$/tmp.py"
/>
<option
value=
"$PROJECT_DIR$/model_monitor_PSI_AUC.py"
/>
</list>
</option>
</component>
...
...
@@ -239,6 +219,7 @@
</panes>
</component>
<component
name=
"PropertiesComponent"
>
<property
name=
"SHARE_PROJECT_CONFIGURATION_FILES"
value=
"true"
/>
<property
name=
"last_opened_file_path"
value=
"$PROJECT_DIR$"
/>
</component>
<component
name=
"RunDashboard"
>
...
...
@@ -332,85 +313,6 @@
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/model_monitor_PSI_AUC.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"152"
>
<caret
line=
"481"
column=
"24"
selection-start-line=
"481"
selection-start-column=
"24"
selection-end-line=
"481"
selection-end-column=
"24"
/>
<folding>
<element
signature=
"e#16#34#0"
expanded=
"true"
/>
<element
signature=
"e#509#2431#0"
/>
<element
signature=
"e#2143#2314#0"
/>
<element
signature=
"e#2471#2578#0"
/>
<element
signature=
"e#2634#2768#0"
/>
<element
signature=
"e#2804#2887#0"
/>
<element
signature=
"e#2933#3549#0"
/>
<element
signature=
"e#2933#3035#1"
/>
<element
signature=
"e#3113#3384#0"
/>
<element
signature=
"e#3413#3549#0"
/>
<element
signature=
"e#3497#3549#0"
/>
<element
signature=
"e#3601#4067#0"
/>
<element
signature=
"e#3601#3720#1"
/>
<element
signature=
"e#3746#4027#0"
/>
<element
signature=
"e#4166#6211#0"
/>
<element
signature=
"e#4166#4321#1"
/>
<element
signature=
"e#4815#5478#0"
/>
<element
signature=
"e#4995#5054#0"
/>
<element
signature=
"e#5595#6158#0"
/>
<element
signature=
"e#5626#5702#0"
/>
<element
signature=
"e#5745#6158#0"
/>
<element
signature=
"e#6071#6158#0"
/>
<element
signature=
"e#6310#8760#0"
/>
<element
signature=
"e#6310#6465#1"
/>
<element
signature=
"e#7061#8339#0"
/>
<element
signature=
"e#7251#7310#0"
/>
<element
signature=
"e#7930#8200#0"
/>
<element
signature=
"e#8245#8339#0"
/>
<element
signature=
"e#8420#8704#0"
/>
<element
signature=
"e#8459#8536#0"
/>
<element
signature=
"e#8801#13386#0"
/>
<element
signature=
"e#9246#9397#0"
/>
<element
signature=
"e#9353#9397#0"
/>
<element
signature=
"e#9431#9516#0"
/>
<element
signature=
"e#9597#9609#0"
/>
<element
signature=
"e#10491#10501#0"
/>
<element
signature=
"e#11091#11355#0"
/>
<element
signature=
"e#11137#11355#0"
/>
<element
signature=
"e#11492#11719#0"
/>
<element
signature=
"e#11821#11907#0"
/>
<element
signature=
"e#12109#12805#0"
/>
<element
signature=
"e#12279#12550#0"
/>
<element
signature=
"e#12879#13386#0"
/>
<element
signature=
"e#12969#13077#0"
/>
<element
signature=
"e#13143#13307#0"
/>
<element
signature=
"e#13427#18635#0"
/>
<element
signature=
"e#14091#14242#0"
/>
<element
signature=
"e#14198#14242#0"
/>
<element
signature=
"e#14276#14361#0"
/>
<element
signature=
"e#14442#14454#0"
/>
<element
signature=
"e#15534#15544#0"
/>
<element
signature=
"e#16140#16425#0"
/>
<element
signature=
"e#16186#16425#0"
/>
<element
signature=
"e#16562#16786#0"
/>
<element
signature=
"e#16888#16974#0"
/>
<element
signature=
"e#17178#18133#0"
/>
<element
signature=
"e#17348#17878#0"
/>
<element
signature=
"e#17467#17516#0"
/>
<element
signature=
"e#18207#18635#0"
/>
<element
signature=
"e#18218#18326#0"
/>
<element
signature=
"e#18392#18556#0"
/>
<element
signature=
"e#18742#19104#0"
/>
<element
signature=
"e#19265#19320#0"
/>
<element
signature=
"e#19452#19653#0"
/>
<element
signature=
"e#19470#19652#0"
/>
<element
signature=
"e#19894#19949#0"
/>
<element
signature=
"e#20774#21074#0"
/>
<element
signature=
"e#20811#21047#0"
/>
<element
signature=
"e#21084#21089#0"
/>
<element
signature=
"e#21487#21515#0"
/>
</folding>
</state>
</provider>
</entry>
<entry
file=
"file://$USER_HOME$/.PyCharmCE2019.1/system/python_stubs/1626812534/builtins.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"226"
>
...
...
@@ -430,8 +332,8 @@
</entry>
<entry
file=
"file://$PROJECT_DIR$/tmp.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
-136
"
>
<caret
line=
"
495"
column=
"19"
selection-start-line=
"495"
selection-start-column=
"19"
selection-end-line=
"495"
selection-end-column=
"1
9"
/>
<state
relative-caret-position=
"
332
"
>
<caret
line=
"
334"
column=
"39"
selection-start-line=
"334"
selection-start-column=
"39"
selection-end-line=
"334"
selection-end-column=
"3
9"
/>
<folding>
<element
signature=
"e#2742#2953#0"
/>
<element
signature=
"e#2931#3547#0"
/>
...
...
@@ -466,8 +368,6 @@
<element
signature=
"e#11135#11353#0"
/>
<element
signature=
"e#11490#11717#0"
/>
<element
signature=
"e#11819#11905#0"
/>
<element
signature=
"e#12107#12803#0"
/>
<element
signature=
"e#12277#12548#0"
/>
<element
signature=
"e#12877#13384#0"
/>
<element
signature=
"e#13413#13513#0"
/>
<element
signature=
"e#13575#13737#0"
/>
...
...
@@ -496,5 +396,71 @@
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/model_monitor_PSI_AUC.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"368"
>
<caret
line=
"337"
column=
"39"
selection-start-line=
"337"
selection-start-column=
"39"
selection-end-line=
"337"
selection-end-column=
"39"
/>
<folding>
<element
signature=
"e#2839#3050#0"
/>
<element
signature=
"e#4164#4780#0"
/>
<element
signature=
"e#4164#4266#1"
/>
<element
signature=
"e#4344#4615#0"
/>
<element
signature=
"e#4644#4780#0"
/>
<element
signature=
"e#4728#4780#0"
/>
<element
signature=
"e#4832#5298#0"
/>
<element
signature=
"e#4832#4951#1"
/>
<element
signature=
"e#4977#5258#0"
/>
<element
signature=
"e#5397#7442#0"
/>
<element
signature=
"e#5397#5552#1"
/>
<element
signature=
"e#6046#6709#0"
/>
<element
signature=
"e#6226#6285#0"
/>
<element
signature=
"e#6826#7389#0"
/>
<element
signature=
"e#6857#6933#0"
/>
<element
signature=
"e#6976#7389#0"
/>
<element
signature=
"e#7302#7389#0"
/>
<element
signature=
"e#7541#9990#0"
/>
<element
signature=
"e#7541#7696#1"
/>
<element
signature=
"e#8291#9569#0"
/>
<element
signature=
"e#8481#8540#0"
/>
<element
signature=
"e#9160#9430#0"
/>
<element
signature=
"e#9475#9569#0"
/>
<element
signature=
"e#9650#9934#0"
/>
<element
signature=
"e#9689#9766#0"
/>
<element
signature=
"e#10467#10618#0"
/>
<element
signature=
"e#10574#10618#0"
/>
<element
signature=
"e#10925#10937#0"
/>
<element
signature=
"e#11819#11829#0"
/>
<element
signature=
"e#12419#12683#0"
/>
<element
signature=
"e#12465#12683#0"
/>
<element
signature=
"e#12789#13016#0"
/>
<element
signature=
"e#13118#13204#0"
/>
<element
signature=
"e#14176#14668#0"
/>
<element
signature=
"e#14266#14362#0"
/>
<element
signature=
"e#14428#14589#0"
/>
<element
signature=
"e#15686#15837#0"
/>
<element
signature=
"e#15793#15837#0"
/>
<element
signature=
"e#16144#16156#0"
/>
<element
signature=
"e#17236#17246#0"
/>
<element
signature=
"e#17842#18127#0"
/>
<element
signature=
"e#17888#18127#0"
/>
<element
signature=
"e#18233#18457#0"
/>
<element
signature=
"e#18559#18645#0"
/>
<element
signature=
"e#18849#19804#0"
/>
<element
signature=
"e#19019#19549#0"
/>
<element
signature=
"e#19138#19187#0"
/>
<element
signature=
"e#19889#19985#0"
/>
<element
signature=
"e#20051#20305#0"
/>
<element
signature=
"e#22953#23042#0"
/>
<element
signature=
"e#23127#23384#0"
/>
<element
signature=
"e#23189#23322#0"
/>
<element
signature=
"e#23487#23700#0"
/>
<element
signature=
"e#23866#24109#0"
/>
<element
signature=
"e#23903#24104#0"
/>
<element
signature=
"e#24119#24124#0"
/>
<element
signature=
"e#21487#21515#0"
/>
</folding>
</state>
</provider>
</entry>
</component>
</project>
\ No newline at end of file
README.md
View file @
edc7499b
...
...
@@ -97,6 +97,7 @@
-
min_aucr: 最小AUCR, 小于则视为该客群异常.
-
执行run函数.
```
python
...
...
model_monitor_PSI_AUC.py
View file @
edc7499b
...
...
@@ -13,15 +13,17 @@ import pymongo
import
os
import
pickle
import
warnings
warnings
.
filterwarnings
(
'ignore'
)
import
datetime
from
dateutil.relativedelta
import
relativedelta
from
collections
import
OrderedDict
warnings
.
filterwarnings
(
'ignore'
)
class
ModelMonitor
:
def
__init__
(
self
,
excel_path
=
'./model_score.xlsx'
,
sheet_name
=
'mongo_model'
,
passdue_day
=
15
,
save_path
=
'./image/'
,
last_month
=
7
,
num_month
=
4
,
min_user_group
=
500
):
num_month
=
4
,
min_user_group
=
500
,
max_psi
=
0.1
,
min_aucr
=
0.85
):
# 考虑到数据库配置基本不变, 所以不设置创建对象时对应输入变量.
self
.
mysql_engine
=
pymysql
.
connect
(
host
=
'172.20.6.9'
,
...
...
@@ -45,9 +47,19 @@ class ModelMonitor:
# 一些定义的常量
self
.
passdue_day
=
passdue_day
# 逾期天数, 默认15.
self
.
save_path
=
save_path
# 图片保存位置, 默认./image.
self
.
last_month
=
last_month
# 取数的最后一个月.
self
.
num_month
=
num_month
# 取数的月数.
self
.
min_user_group
=
min_user_group
# 最小客群数量.
self
.
max_psi
=
max_psi
# 最大PSI, 超过视为异常.
self
.
min_aucr
=
min_aucr
# 最小AUC比率, 小于视为异常.
# 获取当天日期信息.
self
.
current_date
=
(
datetime
.
date
.
today
()
+
relativedelta
(
days
=-
1
))
.
strftime
(
'
%
Y-
%
m-
%
d'
)
self
.
response_date
=
(
datetime
.
date
.
today
()
+
relativedelta
(
days
=-
(
31
+
self
.
passdue_day
)))
.
strftime
(
'
%
Y-
%
m-
%
d'
)
self
.
first_date
=
(
datetime
.
date
.
today
()
+
relativedelta
(
months
=-
self
.
num_month
+
1
))
.
strftime
(
'
%
Y-
%
m-01'
)
self
.
current_month
=
(
datetime
.
date
.
today
()
+
datetime
.
timedelta
(
days
=-
1
))
.
month
self
.
response_month
=
(
datetime
.
date
.
today
()
+
relativedelta
(
days
=-
46
))
.
month
self
.
first_month
=
self
.
current_month
-
self
.
num_month
+
1
# 将会从数据库中读取的数据.
self
.
mysql_df
=
None
...
...
@@ -55,30 +67,51 @@ class ModelMonitor:
self
.
merge_data
=
None
# 统计数据记录.
psi_cols
=
[
'model_name'
,
'group_name'
]
auc_cols
=
[
'model_name'
,
'group_name'
]
for
m
in
range
(
self
.
last_month
-
self
.
num_month
,
self
.
last_month
):
psi_cols
.
append
(
str
(
m
)
+
'm_num'
)
psi_cols
.
append
(
str
(
m
)
+
'm_psi'
)
auc_cols
.
append
(
str
(
m
)
+
'm_num'
)
auc_cols
.
append
(
str
(
m
)
+
'm_auc'
)
psi_cols
=
[
'模型名称'
,
'客群名称'
]
auc_cols
=
[
'模型名称'
,
'客群名称'
]
for
m
in
range
(
self
.
first_month
,
self
.
current_month
+
1
):
psi_cols
.
append
(
str
(
m
)
+
'月数量'
)
psi_cols
.
append
(
str
(
m
)
+
'月PSI'
)
auc_cols
.
append
(
str
(
m
)
+
'月数量'
)
auc_cols
.
append
(
str
(
m
)
+
'月AUC'
)
auc_cols
.
append
(
str
(
m
)
+
'月AUCR'
)
self
.
psi_info_df
=
pd
.
DataFrame
(
columns
=
psi_cols
)
self
.
auc_info_df
=
pd
.
DataFrame
(
columns
=
auc_cols
)
self
.
na_enough_data_psi_set
=
set
()
# 一些新的模型没有足够数据用于统计.
self
.
na_enough_data_auc_set
=
set
()
# 一些新的模型没有足够数据用于统计.
def
sql_query
(
self
,
sql
):
'''
连接MySQL数据库, 根据SQL返回数据.
:param sql: str.
:return: DataFrame.
'''
try
:
return
pd
.
read_sql
(
sql
,
self
.
mysql_engine
)
except
:
print
(
'SQL查询出现错误.'
)
def
mongo_query
(
self
,
condition
,
fields
):
'''
连接MongoDB, 根据查询返回数据.
:param condition: dict
:param fields: dict
:return: DataFrame
'''
try
:
return
pd
.
DataFrame
(
list
(
self
.
mongo_table
.
find
(
condition
,
fields
)))
except
:
print
(
'Mongo查询出现错误.'
)
def
int2str
(
self
,
x
):
'''
将int转换为str, 用于日期.
e.g. 5 --> 05
:param x: int
:return: str.
'''
if
x
>=
10
:
return
str
(
x
)
else
:
...
...
@@ -123,98 +156,101 @@ class ModelMonitor:
return
None
def
helper_psi
(
self
,
user_group_name
=
None
,
df
=
None
,
info_dict
=
None
,
field
=
None
):
'''
信息提取函数.
:param user_group_name: str, 客群名称.
:param df: Dataframe, 对应客群数据.
:return: None.
'''
print
(
'正在处理
%
s客群数据.'
%
user_group_name
)
info_dict
[
user_group_name
]
=
OrderedDict
()
month_list
=
list
(
sorted
(
df
[
'month_label'
]
.
unique
()
.
tolist
()))
if
0
in
month_list
:
month_list
.
remove
(
0
)
df_g
=
df
.
groupby
([
'month_label'
,
'bins'
])
.
agg
({
field
:
[
'count'
]})
df_g
=
df_g
.
reset_index
()
df_g
=
df_g
.
sort_values
([
'month_label'
,
'bins'
])
for
i
,
m
in
enumerate
(
month_list
):
amt_in_bins
=
df_g
.
loc
[
df_g
[
'month_label'
]
==
m
,
field
]
.
values
# 某月样本量小于阈值, 放弃记录信息.
if
amt_in_bins
.
sum
()
<
self
.
min_user_group
:
print
(
'
%
d月样本量过小, 放弃提取信息.'
%
m
)
continue
info_dict
[
user_group_name
][
str
(
m
)
+
'月'
]
=
{}
info_dict
[
user_group_name
][
str
(
m
)
+
'月'
][
'该月样本量'
]
=
amt_in_bins
.
sum
()
info_dict
[
user_group_name
][
str
(
m
)
+
'月'
][
'各分箱样本量'
]
=
amt_in_bins
info_dict
[
user_group_name
][
str
(
m
)
+
'月'
][
'各分箱样本占比'
]
=
amt_in_bins
/
amt_in_bins
.
sum
()
print
(
'
%
d月样本量:
%
d'
%
(
m
,
info_dict
[
user_group_name
][
str
(
m
)
+
'月'
][
'该月样本量'
]))
# 计算PSI, 以样本量达标的第一个月为基准.
for
i
,
m
in
enumerate
(
info_dict
[
user_group_name
]):
if
i
==
0
:
info_dict
[
user_group_name
][
m
][
'psi'
]
=
0
bench_month
=
m
'''
信息提取函数.
:param user_group_name: str, 客群名称.
:param df: Dataframe, 对应客群数据.
:return: None.
'''
print
(
'正在处理
%
s客群数据.'
%
user_group_name
)
info_dict
[
user_group_name
]
=
OrderedDict
()
month_list
=
list
(
sorted
(
df
[
'month_label'
]
.
unique
()
.
tolist
()))
if
0
in
month_list
:
month_list
.
remove
(
0
)
df_g
=
df
.
groupby
([
'month_label'
,
'bins'
])
.
agg
({
field
:
[
'count'
]})
df_g
=
df_g
.
reset_index
()
df_g
=
df_g
.
sort_values
([
'month_label'
,
'bins'
])
for
i
,
m
in
enumerate
(
month_list
):
amt_in_bins
=
df_g
.
loc
[
df_g
[
'month_label'
]
==
m
,
field
]
.
values
# 某月样本量小于阈值, 放弃记录信息.
if
amt_in_bins
.
sum
()
<
self
.
min_user_group
:
print
(
'
%
d月样本量过小, 放弃提取信息.'
%
m
)
continue
info_dict
[
user_group_name
][
str
(
m
)
+
'月'
]
=
{}
info_dict
[
user_group_name
][
str
(
m
)
+
'月'
][
'该月样本量'
]
=
amt_in_bins
.
sum
()
info_dict
[
user_group_name
][
str
(
m
)
+
'月'
][
'各分箱样本量'
]
=
amt_in_bins
info_dict
[
user_group_name
][
str
(
m
)
+
'月'
][
'各分箱样本占比'
]
=
amt_in_bins
/
amt_in_bins
.
sum
()
print
(
'
%
d月样本量:
%
d'
%
(
m
,
info_dict
[
user_group_name
][
str
(
m
)
+
'月'
][
'该月样本量'
]))
# 计算PSI, 以样本量达标的第一个月为基准.
for
i
,
m
in
enumerate
(
info_dict
[
user_group_name
]):
if
i
==
0
:
info_dict
[
user_group_name
][
m
][
'psi'
]
=
0
bench_month
=
m
else
:
psi
=
self
.
calc_psi
(
df_g
.
loc
[
df_g
[
'month_label'
]
==
int
(
bench_month
[
0
]),
field
],
df_g
.
loc
[
df_g
[
'month_label'
]
==
int
(
m
[
0
]),
field
])
if
psi
:
info_dict
[
user_group_name
][
m
][
'psi'
]
=
psi
else
:
psi
=
self
.
calc_psi
(
df_g
.
loc
[
df_g
[
'month_label'
]
==
int
(
bench_month
[
0
]),
field
],
df_g
.
loc
[
df_g
[
'month_label'
]
==
int
(
m
[
0
]),
field
])
if
psi
:
info_dict
[
user_group_name
][
m
][
'psi'
]
=
psi
else
:
info_dict
[
user_group_name
][
m
][
'psi'
]
=
-
999
print
(
'计算PSI出现错误.'
)
print
(
'处理完成.'
)
print
(
'='
*
40
)
info_dict
[
user_group_name
][
m
][
'psi'
]
=
-
999
print
(
'计算PSI出现错误.'
)
print
(
'处理完成.'
)
print
(
'='
*
40
)
def
helper_auc
(
self
,
user_group_name
=
None
,
df
=
None
,
info_dict
=
None
,
field
=
None
):
'''
信息提取函数.
:param user_group_name: str, 客群名称.
:param df: Dataframe, 对应客群数据.
:return: None.
'''
print
(
'正在处理
%
s客群数据.'
%
user_group_name
)
info_dict
[
user_group_name
]
=
OrderedDict
()
month_list
=
list
(
sorted
(
df
[
'month_label'
]
.
unique
()
.
tolist
()))
if
0
in
month_list
:
month_list
.
remove
(
0
)
df_g
=
df
.
groupby
([
'month_label'
,
'bins'
])[
'overdue'
]
.
agg
({
'overdue'
:
[
'count'
,
'sum'
,
'mean'
]})
df_g
.
columns
=
[
'_'
.
join
(
x
)
for
x
in
df_g
.
columns
.
ravel
()]
df_g
=
df_g
.
reset_index
()
df_g
=
df_g
.
sort_values
([
'month_label'
,
'bins'
])
for
i
,
m
in
enumerate
(
month_list
):
amt_in_bins
=
df_g
.
loc
[
df_g
[
'month_label'
]
==
m
,
'overdue_count'
]
.
values
# 某月样本量小于阈值, 放弃记录信息.
if
amt_in_bins
.
sum
()
<
self
.
min_user_group
:
print
(
'
%
d月样本量过小, 放弃提取信息.'
%
m
)
continue
info_dict
[
user_group_name
][
str
(
m
)
+
'月'
]
=
{}
info_dict
[
user_group_name
][
str
(
m
)
+
'月'
][
'该月样本量'
]
=
amt_in_bins
.
sum
()
info_dict
[
user_group_name
][
str
(
m
)
+
'月'
][
'各分箱样本量'
]
=
amt_in_bins
info_dict
[
user_group_name
][
str
(
m
)
+
'月'
][
'各分箱逾期样本量'
]
=
df_g
.
loc
[
df_g
[
'month_label'
]
==
m
,
'overdue_sum'
]
.
values
info_dict
[
user_group_name
][
str
(
m
)
+
'月'
][
'各分箱逾期率'
]
=
df_g
.
loc
[
df_g
[
'month_label'
]
==
m
,
'overdue_mean'
]
.
values
print
(
'
%
d月样本量:
%
d'
%
(
m
,
info_dict
[
user_group_name
][
str
(
m
)
+
'月'
][
'该月样本量'
]))
try
:
info_dict
[
user_group_name
][
str
(
m
)
+
'月'
][
'auc'
]
=
roc_auc_score
(
df
.
loc
[(
df
[
'month_label'
]
==
m
)
&
(
df
[
field
]
.
notna
()),
'overdue'
],
df
.
loc
[(
df
[
'month_label'
]
==
m
)
&
(
df
[
field
]
.
notna
()),
field
])
except
:
print
(
'AUC计算发生错误.'
)
info_dict
[
user_group_name
][
str
(
m
)
+
'月'
][
'auc'
]
=
-
999
for
i
,
m
in
enumerate
(
info_dict
[
user_group_name
]):
if
i
==
0
:
# 基准月.
info_dict
[
user_group_name
][
m
][
'aucR'
]
=
1
bench_month
=
m
else
:
info_dict
[
user_group_name
][
m
][
'aucR'
]
=
info_dict
[
user_group_name
][
m
][
'auc'
]
/
info_dict
[
user_group_name
][
bench_month
][
'auc'
]
'''
信息提取函数.
:param user_group_name: str, 客群名称.
:param df: Dataframe, 对应客群数据.
:return: None.
'''
print
(
'正在处理
%
s客群数据.'
%
user_group_name
)
info_dict
[
user_group_name
]
=
OrderedDict
()
month_list
=
list
(
sorted
(
df
[
'month_label'
]
.
unique
()
.
tolist
()))
if
0
in
month_list
:
month_list
.
remove
(
0
)
df_g
=
df
.
groupby
([
'month_label'
,
'bins'
])[
'overdue'
]
.
agg
({
'overdue'
:
[
'count'
,
'sum'
,
'mean'
]})
df_g
.
columns
=
[
'_'
.
join
(
x
)
for
x
in
df_g
.
columns
.
ravel
()]
df_g
=
df_g
.
reset_index
()
df_g
=
df_g
.
sort_values
([
'month_label'
,
'bins'
])
for
i
,
m
in
enumerate
(
month_list
):
amt_in_bins
=
df_g
.
loc
[
df_g
[
'month_label'
]
==
m
,
'overdue_count'
]
.
values
# 某月样本量小于阈值, 放弃记录信息.
if
amt_in_bins
.
sum
()
<
self
.
min_user_group
:
print
(
'
%
d月样本量过小, 放弃提取信息.'
%
m
)
continue
info_dict
[
user_group_name
][
str
(
m
)
+
'月'
]
=
{}
info_dict
[
user_group_name
][
str
(
m
)
+
'月'
][
'该月样本量'
]
=
amt_in_bins
.
sum
()
info_dict
[
user_group_name
][
str
(
m
)
+
'月'
][
'各分箱样本量'
]
=
amt_in_bins
info_dict
[
user_group_name
][
str
(
m
)
+
'月'
][
'各分箱逾期样本量'
]
=
df_g
.
loc
[
df_g
[
'month_label'
]
==
m
,
'overdue_sum'
]
.
values
info_dict
[
user_group_name
][
str
(
m
)
+
'月'
][
'各分箱逾期率'
]
=
df_g
.
loc
[
df_g
[
'month_label'
]
==
m
,
'overdue_mean'
]
.
values
print
(
'
%
d月样本量:
%
d'
%
(
m
,
info_dict
[
user_group_name
][
str
(
m
)
+
'月'
][
'该月样本量'
]))
try
:
info_dict
[
user_group_name
][
str
(
m
)
+
'月'
][
'auc'
]
=
roc_auc_score
(
df
.
loc
[(
df
[
'month_label'
]
==
m
)
&
(
df
[
field
]
.
notna
()),
'overdue'
],
df
.
loc
[(
df
[
'month_label'
]
==
m
)
&
(
df
[
field
]
.
notna
()),
field
])
except
:
print
(
'AUC计算发生错误.'
)
info_dict
[
user_group_name
][
str
(
m
)
+
'月'
][
'auc'
]
=
-
999
print
(
'处理完成.'
)
print
(
'='
*
40
)
for
i
,
m
in
enumerate
(
info_dict
[
user_group_name
]):
if
i
==
0
:
# 基准月.
info_dict
[
user_group_name
][
m
][
'aucR'
]
=
1
bench_month
=
m
else
:
info_dict
[
user_group_name
][
m
][
'aucR'
]
=
info_dict
[
user_group_name
][
m
][
'auc'
]
/
\
info_dict
[
user_group_name
][
bench_month
][
'auc'
]
print
(
'处理完成.'
)
print
(
'='
*
40
)
def
plot_psi
(
self
,
field
):
# 创建文件夹保存图片.
...
...
@@ -226,14 +262,15 @@ class ModelMonitor:
df_copy
=
self
.
merge_data
[[
field
,
'month_label'
,
'applied_type'
,
'applied_channel'
]]
.
copy
()
# 对模型分进行分箱, 选取数据中该模型分最开始的那个月作为基准.
for
m
in
range
(
self
.
last_month
-
self
.
num_month
,
self
.
last_month
):
for
m
in
range
(
self
.
first_month
,
self
.
current_month
+
1
):
bins
=
self
.
make_bin
(
df_copy
.
loc
[
df_copy
[
'month_label'
]
==
m
,
field
])
if
bins
:
print
(
'以
%
d月为基准月.'
%
m
)
break
if
not
bins
:
print
(
'
%
s 数据时间跨度不足, 放弃画图.'
%
field
)
print
(
'='
*
40
)
self
.
na_enough_data_psi_set
.
add
(
self
.
model_feild_name_dict
[
field
])
print
(
'
%
s 数据时间跨度不足, 放弃画图.'
%
self
.
model_feild_name_dict
[
field
])
print
(
'='
*
40
)
return
None
df_copy
[
'bins'
]
=
pd
.
cut
(
df_copy
[
field
],
bins
)
# 根据分箱规则进行分箱.
...
...
@@ -252,7 +289,6 @@ class ModelMonitor:
# '各分箱样本占比': [...]}}}
info_dict
=
{}
# 全样本
self
.
helper_psi
(
'全样本'
,
df_copy
,
info_dict
,
field
)
# 按申请类型划分.
...
...
@@ -273,13 +309,15 @@ class ModelMonitor:
app_chan_set
=
df_copy_g
[
'applied_channel'
]
.
unique
()
for
app_type
in
app_type_set
:
for
app_chan
in
app_chan_set
:
if
df_copy_g
.
loc
[(
df_copy_g
[
'applied_type'
]
==
app_type
)
&
(
df_copy_g
[
'applied_channel'
]
==
app_chan
)]
.
shape
[
0
]
!=
0
:
if
df_copy_g
.
loc
[
(
df_copy_g
[
'applied_type'
]
==
app_type
)
&
(
df_copy_g
[
'applied_channel'
]
==
app_chan
)]
.
shape
[
0
]
!=
0
:
user_group_dict
[
app_type_dict
[
app_type
]
+
'-'
+
app_chan
]
=
(
app_type
,
app_chan
)
del
df_copy_g
## 按划分的客群处理数据.
print
(
user_group_dict
)
for
user_group_name
in
user_group_dict
:
self
.
helper_psi
(
user_group_name
,
df_copy
.
loc
[(
df_copy
[
'applied_type'
]
==
user_group_dict
[
user_group_name
][
0
])
&
(
df_copy
[
'applied_channel'
]
==
user_group_dict
[
user_group_name
][
1
])],
self
.
helper_psi
(
user_group_name
,
df_copy
.
loc
[
(
df_copy
[
'applied_type'
]
==
user_group_dict
[
user_group_name
][
0
])
&
(
df_copy
[
'applied_channel'
]
==
user_group_dict
[
user_group_name
][
1
])],
info_dict
,
field
)
# 过滤不包含信息的客群.
remove_list
=
[]
...
...
@@ -291,14 +329,15 @@ class ModelMonitor:
# 画图.
print
(
'开始画图.'
)
print
(
'='
*
40
)
print
(
'='
*
40
)
for
user_group_name
in
info_dict
:
print
(
self
.
model_feild_name_dict
[
field
]
+
'-'
+
user_group_name
)
plt
.
figure
(
figsize
=
(
16
,
8
))
for
m
in
info_dict
[
user_group_name
]:
plt
.
plot
(
range
(
len
(
info_dict
[
user_group_name
][
m
][
'各分箱样本占比'
])),
info_dict
[
user_group_name
][
m
][
'各分箱样本占比'
],
label
=
'
%
s PSI:
%.3
f
\n
样本量:
%
d'
%
(
m
,
info_dict
[
user_group_name
][
m
][
'psi'
],
info_dict
[
user_group_name
][
m
][
'该月样本量'
]))
[
round
(
x
[
0
],
3
)
for
x
in
info_dict
[
user_group_name
][
m
][
'各分箱样本占比'
]],
label
=
'
%
s PSI:
%.3
f
\n
样本量:
%
d'
%
(
m
,
info_dict
[
user_group_name
][
m
][
'psi'
],
info_dict
[
user_group_name
][
m
][
'该月样本量'
]))
plt
.
legend
(
loc
=
'upper right'
)
plt
.
title
(
self
.
model_feild_name_dict
[
field
]
+
'-'
+
user_group_name
)
plt
.
savefig
(
self
.
save_path
+
'PSI/'
+
self
.
model_feild_name_dict
[
field
]
+
'-'
+
user_group_name
)
...
...
@@ -307,11 +346,11 @@ class ModelMonitor:
# 保存统计信息.
for
user_group_name
in
info_dict
:
# print(self.model_feild_name_dict[field] + '-' + user_group_name)
tmp_dict
=
{
'
model_name
'
:
[
self
.
model_feild_name_dict
[
field
]],
'
group_name
'
:
[
user_group_name
]}
tmp_dict
=
{
'
模型名称
'
:
[
self
.
model_feild_name_dict
[
field
]],
'
客群名称
'
:
[
user_group_name
]}
for
m
in
info_dict
[
user_group_name
]:
tmp_dict
[
m
[
0
]
+
'
m_num
'
]
=
[
int
(
info_dict
[
user_group_name
][
m
][
'该月样本量'
])]
tmp_dict
[
m
[
0
]
+
'
m_psi
'
]
=
[
round
(
info_dict
[
user_group_name
][
m
][
'psi'
],
3
)]
tmp_dict
[
m
[
0
]
+
'
月数量
'
]
=
[
int
(
info_dict
[
user_group_name
][
m
][
'该月样本量'
])]
tmp_dict
[
m
[
0
]
+
'
月PSI
'
]
=
[
round
(
info_dict
[
user_group_name
][
m
][
'psi'
],
3
)]
self
.
psi_info_df
=
self
.
psi_info_df
.
append
(
pd
.
DataFrame
(
tmp_dict
))
def
plot_auc
(
self
,
field
):
...
...
@@ -321,20 +360,28 @@ class ModelMonitor:
if
not
os
.
path
.
exists
(
self
.
save_path
+
'AUC/'
):
os
.
mkdir
(
self
.
save_path
+
'AUC/'
)
# 分离数据.
df_copy
=
self
.
merge_data
[[
field
,
'month_label'
,
'applied_type'
,
'applied_channel'
,
'overdue'
,
'passdue_day'
]]
.
copy
()
df_copy
=
self
.
merge_data
[
[
field
,
'month_label'
,
'applied_type'
,
'applied_channel'
,
'overdue'
,
'passdue_day'
,
'applied_at'
]]
.
copy
()
## 筛选出放款, 且逾期表现的数据.
df_copy
=
df_copy
.
loc
[(
df_copy
[
field
]
.
notna
())
&
(
df_copy
[
'month_label'
]
!=
self
.
last_month
-
1
)
&
(
df_copy
[
field
]
>
0
)
&
(
df_copy
[
'passdue_day'
]
.
notna
())]
if
repr
(
df_copy
[
'applied_at'
]
.
dtype
)
==
"dtype('O')"
:
df_copy
=
df_copy
.
loc
[
(
df_copy
[
field
]
.
notna
())
&
(
df_copy
[
'applied_at'
]
.
apply
(
lambda
x
:
x
[:
10
])
<=
self
.
response_date
)
&
(
df_copy
[
field
]
>
0
)
&
(
df_copy
[
'passdue_day'
]
.
notna
())]
else
:
df_copy
=
df_copy
.
loc
[(
df_copy
[
field
]
.
notna
())
&
(
df_copy
[
'applied_at'
]
.
apply
(
lambda
x
:
x
.
strftime
(
'
%
Y-
%
m-
%
d'
))
<=
self
.
response_date
)
&
(
df_copy
[
field
]
>
0
)
&
(
df_copy
[
'passdue_day'
]
.
notna
())]
# 对模型分进行分箱, 选取数据中该模型分最开始的那个月作为基准.
for
m
in
range
(
self
.
last_month
-
self
.
num_month
,
self
.
last_month
):
for
m
in
range
(
self
.
first_month
,
self
.
response_month
+
1
):
bins
=
self
.
make_bin
(
df_copy
.
loc
[
df_copy
[
'month_label'
]
==
m
,
field
])
if
bins
:
print
(
'以
%
d月为基准月.'
%
m
)
break
if
not
bins
:
print
(
'
%
s 数据时间跨度不足, 放弃画图.'
%
field
)
print
(
'='
*
40
)
self
.
na_enough_data_auc_set
.
add
(
self
.
model_feild_name_dict
[
field
])
print
(
'
%
s 数据时间跨度不足, 放弃画图.'
%
self
.
model_feild_name_dict
[
field
])
print
(
'='
*
40
)
return
None
df_copy
[
'bins'
]
=
pd
.
cut
(
df_copy
[
field
],
bins
)
# 根据分箱规则进行分箱.
...
...
@@ -357,7 +404,6 @@ class ModelMonitor:
# 'aucR': float}}}
info_dict
=
{}
# 全样本
self
.
helper_auc
(
'全样本'
,
df_copy
,
info_dict
,
field
)
# 按申请类型划分.
...
...
@@ -383,10 +429,11 @@ class ModelMonitor:
user_group_dict
[
app_type_dict
[
app_type
]
+
'-'
+
app_chan
]
=
(
app_type
,
app_chan
)
del
df_copy_g
## 按划分的客群处理数据.
print
(
user_group_dict
)
for
user_group_name
in
user_group_dict
:
self
.
helper_auc
(
user_group_name
,
df_copy
.
loc
[(
df_copy
[
'applied_type'
]
==
user_group_dict
[
user_group_name
][
0
])
&
(
df_copy
[
'applied_channel'
]
==
user_group_dict
[
user_group_name
][
1
])],
info_dict
,
field
)
self
.
helper_auc
(
user_group_name
,
df_copy
.
loc
[(
df_copy
[
'applied_type'
]
==
user_group_dict
[
user_group_name
][
0
])
&
(
df_copy
[
'applied_channel'
]
==
user_group_dict
[
user_group_name
][
1
])],
info_dict
,
field
)
# 过滤不包含信息的客群.
remove_list
=
[]
for
user_group_name
in
info_dict
:
...
...
@@ -409,7 +456,8 @@ class ModelMonitor:
plt
.
plot
(
range
(
len
(
info_dict
[
user_group_name
][
m
][
'各分箱逾期率'
])),
info_dict
[
user_group_name
][
m
][
'各分箱逾期率'
],
label
=
'
%
s AUC:
%.3
f AUCR:
%.3
f
\n
样本量:
%
d'
%
(
m
,
info_dict
[
user_group_name
][
m
][
'auc'
],
info_dict
[
user_group_name
][
m
][
'aucR'
],
info_dict
[
user_group_name
][
m
][
'该月样本量'
]))
m
,
info_dict
[
user_group_name
][
m
][
'auc'
],
info_dict
[
user_group_name
][
m
][
'aucR'
],
info_dict
[
user_group_name
][
m
][
'该月样本量'
]))
plt
.
legend
(
loc
=
'upper right'
)
plt
.
title
(
self
.
model_feild_name_dict
[
field
]
+
'-'
+
user_group_name
)
plt
.
savefig
(
self
.
save_path
+
'AUC/'
+
self
.
model_feild_name_dict
[
field
]
+
'-'
+
user_group_name
)
...
...
@@ -417,28 +465,48 @@ class ModelMonitor:
# 保存统计信息.
for
user_group_name
in
info_dict
:
tmp_dict
=
{
'
model_name
'
:
[
self
.
model_feild_name_dict
[
field
]],
'
group_name
'
:
[
user_group_name
]}
tmp_dict
=
{
'
模型名称
'
:
[
self
.
model_feild_name_dict
[
field
]],
'
客群名称
'
:
[
user_group_name
]}
for
m
in
info_dict
[
user_group_name
]:
tmp_dict
[
m
[
0
]
+
'm_num'
]
=
[
int
(
info_dict
[
user_group_name
][
m
][
'该月样本量'
])]
tmp_dict
[
m
[
0
]
+
'm_auc'
]
=
[
round
(
info_dict
[
user_group_name
][
m
][
'auc'
],
3
)]
tmp_dict
[
m
[
0
]
+
'月数量'
]
=
[
int
(
info_dict
[
user_group_name
][
m
][
'该月样本量'
])]
tmp_dict
[
m
[
0
]
+
'月AUC'
]
=
[
round
(
info_dict
[
user_group_name
][
m
][
'auc'
],
3
)]
tmp_dict
[
m
[
0
]
+
'月AUCR'
]
=
[
round
(
info_dict
[
user_group_name
][
m
][
'aucR'
],
3
)]
self
.
auc_info_df
=
self
.
auc_info_df
.
append
(
pd
.
DataFrame
(
tmp_dict
))
def
abnormal_psi
(
self
):
def
is_abnormal_psi
(
data
):
for
i
in
data
.
index
:
if
'PSI'
in
i
and
data
[
i
]
>
self
.
max_psi
:
return
True
return
False
self
.
psi_info_df
[
'是否异常'
]
=
self
.
psi_info_df
.
apply
(
is_abnormal_psi
,
axis
=
1
)
def
abnormal_auc
(
self
):
def
is_abnormal_auc
(
data
):
for
i
in
data
.
index
:
if
'AUCR'
in
i
and
data
[
i
]
<
self
.
min_aucr
:
return
True
return
False
self
.
auc_info_df
[
'是否异常'
]
=
self
.
auc_info_df
.
apply
(
is_abnormal_auc
,
axis
=
1
)
def
run
(
self
):
# 获取MySQL数据, 取
last_month往前num_month个月数据
.
# 获取MySQL数据, 取
近期num_month个月数据(如今天7.27, 则这27天算进7月)
.
self
.
mysql_df
=
self
.
sql_query
(
'''SELECT order_no, applied_at,
applied_type, applied_from, applied_channel, transacted, passdue_day
FROM risk_analysis
WHERE applied_at >
"2019-
%
s-01
00:00:00"
AND applied_at <
"2019-
%
s-01
00:00:00"'''
%
(
self
.
int2str
(
self
.
last_month
-
self
.
num_month
),
self
.
int2str
(
self
.
last_month
)
))
WHERE applied_at >
= "
%
s
00:00:00"
AND applied_at <
= "
%
s
00:00:00"'''
%
(
self
.
first_date
,
self
.
current_date
))
print
(
'MySQL数据获取成功.'
)
# self.mysql_df.to_csv('./mysql_data.csv', index=False)
# self.mysql_df = pd.read_csv('./mysql_data.csv')
# 获取MongoDB数据, 取
last_month往前num_month个月数据
.
condition
=
{
'wf_created_at'
:
{
'$gte'
:
'
2019-
%
s-01 00:00:00'
%
self
.
int2str
(
self
.
last_month
-
self
.
num_month
)
,
'$lte'
:
'
2019-
%
s-01 00:00:00'
%
self
.
int2str
(
self
.
last_month
)
}}
# 获取MongoDB数据, 取
近期num_month个月数据(如今天7.27, 则这27天算进7月)
.
condition
=
{
'wf_created_at'
:
{
'$gte'
:
'
%
s 00:00:00'
%
self
.
first_date
,
'$lte'
:
'
%
s 00:00:00'
%
self
.
current_date
}}
fields
=
{
'wf_biz_no'
:
1
,
'wf_created_at'
:
1
}
for
f
in
self
.
model_feild_list
:
# 加入Excel中预置的模型分名称
fields
[
f
]
=
1
...
...
@@ -456,8 +524,15 @@ class ModelMonitor:
# 拼接数据.
self
.
merge_data
=
pd
.
merge
(
left
=
self
.
mysql_df
,
right
=
self
.
mongo_df
,
left_on
=
'order_no'
,
right_on
=
'wf_biz_no'
,
how
=
'left'
)
## 定义逾期用户.
self
.
merge_data
[
'overdue'
]
=
self
.
merge_data
[
'passdue_day'
]
>
self
.
passdue_day
def
overdue
(
data
):
if
pd
.
isnull
(
data
):
return
np
.
nan
else
:
return
float
(
data
>
self
.
passdue_day
)
self
.
merge_data
[
'overdue'
]
=
self
.
merge_data
[
'passdue_day'
]
.
apply
(
overdue
)
# 清洗数据.
def
clean_data
(
data
):
...
...
@@ -465,6 +540,7 @@ class ModelMonitor:
return
float
(
data
)
except
:
return
np
.
nan
na_field_list
=
[]
for
field
in
self
.
model_feild_list
:
if
field
in
self
.
merge_data
.
columns
.
tolist
():
...
...
@@ -475,38 +551,51 @@ class ModelMonitor:
## 去除因为一些原因未抽取到的字段.
print
(
'不包含以下字段:'
)
for
field
in
na_field_list
:
print
(
self
.
model_feild_name_dict
[
field
])
self
.
model_feild_list
.
remove
(
field
)
self
.
model_name_list
.
remove
(
self
.
model_feild_name_dict
[
field
])
del
self
.
model_feild_name_dict
[
field
]
print
(
self
.
model_feild_name_dict
[
field
])
print
(
'数据拼接完成.'
)
# 数据按月划分.
self
.
merge_data
[
'month_label'
]
=
0
for
m
in
range
(
self
.
num_month
):
for
m
in
range
(
self
.
first_month
,
self
.
current_month
+
1
):
self
.
merge_data
.
loc
[
(
self
.
merge_data
[
'applied_at'
]
>
'2019-
%
s-01 00:00:00'
%
self
.
int2str
(
self
.
last_month
-
m
-
1
))
&
(
self
.
merge_data
[
'applied_at'
]
<
'2019-
%
s-01 00:00:00'
%
self
.
int2str
(
self
.
last_month
-
m
)),
'month_label'
]
=
self
.
last_month
-
m
-
1
(
self
.
merge_data
[
'applied_at'
]
>
=
'2019-
%
s-01 00:00:00'
%
self
.
int2str
(
m
))
&
(
self
.
merge_data
[
'applied_at'
]
<
'2019-
%
s-01 00:00:00'
%
self
.
int2str
(
m
+
1
)),
'month_label'
]
=
m
# 画图.
## AUC.
print
(
'开始画图-AUC.'
)
for
field
in
self
.
model_feild_list
:
self
.
plot_auc
(
field
)
## PSI.
print
(
'开始画图-PSI.'
)
for
field
in
self
.
model_feild_list
:
self
.
plot_psi
(
field
)
## AUC.
print
(
'开始画图-AUC.'
)
for
field
in
self
.
model_feild_list
:
self
.
plot_auc
(
field
)
# 输出数据不足的模型.
print
(
'PSI 数据不足以统计的模型:'
)
for
model_name
in
self
.
na_enough_data_psi_set
:
print
(
model_name
)
print
(
'='
*
40
)
print
(
'AUC 数据不足以统计的模型:'
)
for
model_name
in
self
.
na_enough_data_auc_set
:
print
(
model_name
)
print
(
'='
*
40
)
# 检测是否异常.
self
.
abnormal_psi
()
self
.
abnormal_auc
()
# 保存统计信息.
self
.
psi_info_df
.
to_csv
(
'./psi_info.csv'
,
index
=
False
)
self
.
auc_info_df
.
to_csv
(
'./auc_info.csv'
,
index
=
False
)
print
(
'统计信息保存成功.'
)
# if __name__ == '__main__':
# pass
# mm = ModelMonitor(excel_path='./model_score.xlsx', save_path='./image/', last_month=7, num_month=2)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment