Commit 9a3d42b6 authored by 王家华's avatar 王家华

V1.0

parent 78c9b003
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (model_mvp)" project-jdk-type="Python SDK" />
<component name="PyCharmProfessionalAdvertiser">
<option name="shown" value="true" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TestRunnerService">
<option name="PROJECT_TEST_RUNNER" value="Unittests" />
</component>
</module>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/model_monitor.iml" filepath="$PROJECT_DIR$/.idea/model_monitor.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ChangeListManager">
<list default="true" id="1ecd0b9f-60aa-441d-b8e6-0ca91e7a02ef" name="Default Changelist" comment="">
<change beforePath="$PROJECT_DIR$/Monitor_risk_analysis/Monitor_VLM_riskanalysi.py" beforeDir="false" afterPath="$PROJECT_DIR$/Monitor_risk_analysis/Monitor_VLM_riskanalysi.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/Monitor_risk_analysis/PSI&amp;VAL_riskanalysis.py" beforeDir="false" afterPath="$PROJECT_DIR$/Monitor_risk_analysis/PSI&amp;VAL_riskanalysis.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/query_score.xlsx" beforeDir="false" afterPath="$PROJECT_DIR$/query_score.xlsx" afterDir="false" />
</list>
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="FileEditorManager">
<leaf>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/Monitor_risk_analysis/PSI&amp;VAL_riskanalysis.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-2104">
<caret line="103" lean-forward="true" selection-start-line="103" selection-end-line="103" />
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/Monitor_mongoDB/psi_from_mongodb.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="238">
<caret line="14" column="33" lean-forward="true" selection-start-line="14" selection-start-column="33" selection-end-line="14" selection-end-column="33" />
<folding>
<element signature="e#50#79#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/Monitor_mongoDB/monitoring_VLM_mongodb.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="374">
<caret line="23" column="44" lean-forward="true" selection-start-line="23" selection-start-column="44" selection-end-line="23" selection-end-column="44" />
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/Monitor_risk_analysis/Monitor_VLM_riskanalysi.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="136">
<caret line="8" column="17" selection-start-line="8" selection-start-column="17" selection-end-line="8" selection-end-column="17" />
</state>
</provider>
</entry>
</file>
</leaf>
</component>
<component name="FindInProjectRecents">
<findStrings>
<find>E:</find>
<find>E:\</find>
</findStrings>
</component>
<component name="Git.Settings">
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
</component>
<component name="IdeDocumentHistory">
<option name="CHANGED_PATHS">
<list>
<option value="$PROJECT_DIR$/Monitor_risk_analysis/PSI&amp;VAL_riskanalysis.py" />
<option value="$PROJECT_DIR$/Monitor_mongoDB/psi_from_mongodb.py" />
<option value="$PROJECT_DIR$/Monitor_risk_analysis/Monitor_VLM_riskanalysi.py" />
<option value="$PROJECT_DIR$/Monitor_mongoDB/monitoring_VLM_mongodb.py" />
</list>
</option>
</component>
<component name="ProjectConfigurationFiles">
<option name="files">
<list>
<option value="$PROJECT_DIR$/.idea/model_monitor.iml" />
<option value="$PROJECT_DIR$/.idea/vcs.xml" />
<option value="$PROJECT_DIR$/.idea/misc.xml" />
<option value="$PROJECT_DIR$/.idea/modules.xml" />
</list>
</option>
</component>
<component name="ProjectFrameBounds" extendedState="6">
<option name="x" value="312" />
<option name="y" value="137" />
<option name="width" value="1228" />
<option name="height" value="675" />
</component>
<component name="ProjectView">
<navigator proportions="" version="1">
<foldersAlwaysOnTop value="true" />
</navigator>
<panes>
<pane id="Scope" />
<pane id="ProjectPane">
<subPane>
<expand>
<path>
<item name="model_monitor" type="b2602c69:ProjectViewProjectNode" />
<item name="model_monitor" type="462c0819:PsiDirectoryNode" />
</path>
<path>
<item name="model_monitor" type="b2602c69:ProjectViewProjectNode" />
<item name="model_monitor" type="462c0819:PsiDirectoryNode" />
<item name="Monitor_mongoDB" type="462c0819:PsiDirectoryNode" />
</path>
<path>
<item name="model_monitor" type="b2602c69:ProjectViewProjectNode" />
<item name="model_monitor" type="462c0819:PsiDirectoryNode" />
<item name="Monitor_risk_analysis" type="462c0819:PsiDirectoryNode" />
</path>
</expand>
<select />
</subPane>
</pane>
</panes>
</component>
<component name="PropertiesComponent">
<property name="ASKED_SHARE_PROJECT_CONFIGURATION_FILES" value="true" />
<property name="last_opened_file_path" value="$PROJECT_DIR$" />
</component>
<component name="RunDashboard">
<option name="ruleStates">
<list>
<RuleState>
<option name="name" value="ConfigurationTypeDashboardGroupingRule" />
</RuleState>
<RuleState>
<option name="name" value="StatusDashboardGroupingRule" />
</RuleState>
</list>
</option>
</component>
<component name="SvnConfiguration">
<configuration />
</component>
<component name="TaskManager">
<task active="true" id="Default" summary="Default task">
<changelist id="1ecd0b9f-60aa-441d-b8e6-0ca91e7a02ef" name="Default Changelist" comment="" />
<created>1562726148779</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1562726148779</updated>
</task>
<servers />
</component>
<component name="ToolWindowManager">
<frame x="-8" y="-8" width="1936" height="1066" extended-state="6" />
<editor active="true" />
<layout>
<window_info id="Favorites" side_tool="true" />
<window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.1564805" />
<window_info id="Structure" order="1" side_tool="true" weight="0.25" />
<window_info anchor="bottom" id="Version Control" />
<window_info anchor="bottom" id="Python Console" />
<window_info anchor="bottom" id="Terminal" />
<window_info anchor="bottom" id="Event Log" side_tool="true" />
<window_info anchor="bottom" id="Message" order="0" />
<window_info anchor="bottom" id="Find" order="1" />
<window_info anchor="bottom" id="Run" order="2" />
<window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
<window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
<window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
<window_info anchor="bottom" id="TODO" order="6" />
<window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
<window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
<window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
</layout>
</component>
<component name="editorHistoryManager">
<entry file="file://$PROJECT_DIR$/Monitor_risk_analysis/PSI&amp;VAL_riskanalysis.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-2104">
<caret line="103" lean-forward="true" selection-start-line="103" selection-end-line="103" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/Monitor_risk_analysis/Monitor_VLM_riskanalysi.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="136">
<caret line="8" column="17" selection-start-line="8" selection-start-column="17" selection-end-line="8" selection-end-column="17" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/Monitor_mongoDB/psi_from_mongodb.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="238">
<caret line="14" column="33" lean-forward="true" selection-start-line="14" selection-start-column="33" selection-end-line="14" selection-end-column="33" />
<folding>
<element signature="e#50#79#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/Monitor_mongoDB/monitoring_VLM_mongodb.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="374">
<caret line="23" column="44" lean-forward="true" selection-start-line="23" selection-start-column="44" selection-end-line="23" selection-end-column="44" />
</state>
</provider>
</entry>
</component>
</project>
\ No newline at end of file
#!/usr/bin/env python
# coding: utf-8
from datetime import datetime
import pandas as pd
pd.options.display.max_columns = 1000
import pymongo
import numpy as np
import time
import pymysql
import datetime
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from dateutil.relativedelta import relativedelta
# ka channel
kalist = [1,198,214,217,333,159507,159384,159478,459483,159563,159561,159538,159609]
risk_analysis_config = {'user' : 'fengkong_read_only',
'password' : 'mT2HFUgI',
'host' : '172.20.6.9',
'port' : 9030,
'database' : 'risk_analysis',
'encoding' : 'utf8'}
# read mongodb mapping from excel
#mapping_score = pd.read_excel("./mongodb.xlsx",sheet_name='score').dropna(axis=0)
#mapping_variable = pd.read_excel("./mongodb.xlsx",sheet_name='variable').dropna(axis=0)
def readExcel(path,sheet=None):
return pd.read_excel(path,sheet)
dict_DD = readExcel("../features_mongodb.xlsx")
modelList = [model for model in dict_DD.keys()]
limit = "{'wf_created_at': {'$gte': '@start_date', '$lt': '@end_date'}}"
query = "{'order_id':1,'@key':1,'_id':0}"
vlm_start_date = (datetime.date.today() - relativedelta(days = +57)).strftime("%Y-%m-01 00:00:00")
vlm_end_date = time.strftime("%Y-%m-01 00:00:00")
# extract channel list where except recalling channel
sql_channel = '''
SELECT DISTINCT(applied_from),applied_channel FROM risk_analysis
WHERE transacted = 1
AND real_loan_amount > 20000
AND loan_start_date >= DATE_FORMAT(DATE_ADD(NOW(),INTERVAL -1 MONTH),'%Y-%m-01')
AND loan_start_date < DATE_FORMAT(NOW(),'%Y-%m-01')
and applied_from not in (159481,159486,159528)
'''
sql = '''
SELECT date_format(applied_at,'%Y-%m-%d') as applied_at,applied_from,applied_type,order_no FROM risk_analysis
WHERE DATE_FORMAT(applied_at,'%Y-%m') BETWEEN DATE_FORMAT(DATE_ADD(NOW(),INTERVAL -2 MONTH),'%Y-%m')
AND DATE_FORMAT(DATE_ADD(NOW(),INTERVAL -1 MONTH),'%Y-%m')
AND applied_from in (@applied_channel)
AND applied = 1
'''
path_alarm = "../plot/VLM/alarm/"
path = "../plot/VLM/"
def querymongo(limit,query):
myclient = pymongo.MongoClient("mongodb://rc_dp_feature_user:qgrcdpfeature_2019@172.20.1.150:20000/?authSource=rc_dp_feature_pro")
mydb = myclient["rc_dp_feature_pro"]
mycol = mydb["rc_feature_analysis_timing_v2"]
# gt greater than, lt less than. e = equals
x = mycol.find(eval(limit),eval(query))
myclient.close()
return pd.DataFrame(list(x))
def connect2DB(db_config):
db = pymysql.connect(
host = db_config['host'],
port = db_config['port'],
user = db_config['user'],
passwd = db_config['password'],
db = db_config['database'],
charset = db_config['encoding'])
return db
def query_sql(sql,db=risk_analysis_config):
try:
conn = connect2DB(db)
df = pd.read_sql(sql,conn)
conn.close()
return df
except Exception as e:
return 0
# VLM with one variable
def plotLine(title,y,row,col,table,save_path,upperBoundary=0,bottomBoundary=0):
# if x less than 10, ignored its plot
if len(col) <= 10 | int(y.sum()) <= 10 :
return 1
else:
cols = [item[5:] for item in col.values.tolist()]
#print(content)
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['savefig.dpi'] = 226 #图片像素
#分辨率
fig,axs = plt.subplots(1,1,figsize=(33,11),linewidth=0.1)
x = range(len(col))
axs.plot(x,y)
axs.add_line(Line2D((x[0],x[-1]),(y.mean(),y.mean()),linestyle='--',color='darkorange'))
plt.annotate(s = '月均{}'.format(round(y.mean(),2)),xy=(x[-1] + 0.1,y.mean()))
# upper boundary
if upperBoundary == 0:
axs.add_line(Line2D((x[0],x[-1]),(y.mean() + 3 * y.std(),y.mean() + 3 * y.std()),linestyle = '--',color = 'lightcoral'))
plt.annotate(s = 'Mean+3STD\n{}'.format(round(y.mean() + 3 * y.std(),2)),xy = (x[-1] + 0.1,y.mean() + 3 * y.std()))
else:
axs.add_line(Line2D((x[0],x[-1]),(upperBoundary,upperBoundary),linestyle = '--',color = 'lightcoral'))
plt.annotate(s = 'Mean+3STD\n{}'.format(round(upperBoundary,2)),xy = (x[-1] + 0.1,upperBoundary))
# bottom boundary
if bottomBoundary == 0:
axs.add_line(Line2D((x[0],x[-1]),(y.mean() - 3 * y.std(),y.mean() - 3 * y.std()),linestyle = '--',color = 'lightcoral'))
plt.annotate(s = 'Mean-3STD\n{}'.format(round(y.mean() - 3 * y.std(),2)),xy = (x[-1] + 0.1,y.mean() - 3 * y.std()))
else:
axs.add_line(Line2D((x[0],x[-1]),(upperBoundary,upperBoundary),linestyle = '--',color = 'lightcoral'))
plt.annotate(s = 'Mean-3STD\n{}'.format(round(upperBoundary,2)),xy = (x[-1] + 0.1,upperBoundary))
# draw vertical line of each points
bottom = 0
if y.min() - y.std() * 3 - y.mean() * 0.02 > 0:
bottom = y.min() - y.std() * 3 - y.std() * 0.1
plt.vlines(x,[bottom],y,color = 'lightgrey',linestyle = '--')
axs.grid()
plt.xticks([])
the_table = plt.table(cellText=table,
rowLabels=row,
colLabels=cols,
colWidths=[0.91 / (len(col) - 1)]*len(col),
loc='bottom')
the_table.auto_set_font_size(False)
the_table.set_fontsize(9)
fig.subplots_adjust(left=0.032,right=0.97)
fig.set_size_inches(33,11)
#fig.suptitle(title)
plt.title(title,fontsize=18)
plt.savefig(save_path + title + ".png")
plt.show()
return 1
############################################
def dataManipul(df,keyword):
#df_withoutna = df.dropna(axis=0).sort_values(by=keyword,ascending=False).reset_index().drop('index',axis=1)
#df = pd.merge(df_withoutna[keyword].iloc[int(len(df_withoutna)*0.01):int(len(df_withoutna)*0.99)]
#df.dropna(axis=0).loc[:,keyword] = df.dropna(axis=0)[keyword].map(lambda x : np.nan if x < 0 else x )
df_count = df[['applied_at',keyword]].groupby('applied_at').count()[keyword] # need 2 recheck
df_zeros = pd.Series(np.zeros(df_count.shape),index = df_count.index)
df_missing = df[df[keyword].isnull()].fillna(0).groupby('applied_at')[keyword].count()
df_missing = pd.concat([df_zeros,df_missing], axis = 1, sort = True).fillna(0)[keyword]
# df_missing = df[df[keyword].isnull()].fillna(0).groupby('applied_at')[keyword].count()
# df_missing = df_shape + df_missing
missing_rate = df_missing / (df_count + df_missing) * 100
del df_missing
df_zero = df[df[keyword] == 0].groupby('applied_at')[keyword].count()
df_zero = pd.concat([df_zeros,df_zero], axis = 1, sort = True).fillna(0)[keyword]
zero_rate = df_zero / df_count * 100
del df_zero
df_noneNA = df.dropna(axis = 0)
df_noneNA = df_noneNA.sort_values(by=keyword,ascending=False).reset_index().drop('index',axis=1)
df_sum = df_noneNA.iloc[int(len(df_noneNA)*0.01):int(len(df_noneNA)*0.99)].groupby('applied_at').agg(['mean','std','count'])
df_sum = pd.concat([df_zeros,df_sum], axis = 1, sort = True).fillna(0).drop(columns=[0])
df_sum.columns = ['mean','std','count']
cols = df_count.index
return zero_rate.fillna(0).round(1),missing_rate.fillna(0).round(1),cols,df_sum
#############################################
# check via channel details
def separateBychannel(df,key,meansub3std,meanpls3std):
try:
for appliedFrom in kalist:
try:
#df.applied_from = df.applied_from.astype('str')
zero_rate_total,missing_rate_total,cols_total,df_sum = dataManipul(df[df.applied_from == appliedFrom][['applied_at',key]],key)
table = []
y_total = df_sum['mean']
table.append(df_sum['mean'].round(1)) #.round(1).values.tolist()
table.append(df_sum['count'].astype('int'))
table.append(missing_rate_total.astype('str')+'%')
table.append(zero_rate_total.astype('str')+'%')
if (y_total.iloc[-30:].max() > meanpls3std) | (y_total.iloc[-30:].min() < meansub3std):
plotLine(str(model) + '-' + description[fea_i]+'-Mean-'+appliedType_type[appliedType_index]+'with'+str(appliedFrom)+'-VLM',y_total,['value','count','Missing Rate','Zero Rate'],cols_total,table,'./plot/vlm_separate_By_channel/')
del table
except ValueError as e: #ValueError
continue
except Exception as e :
print('channel Exception : ',key,appliedType,e)
########### extract channel list #############
applied_channel = query_sql(sql_channel).applied_from.tolist()
sql = sql.replace('@applied_channel',str(applied_channel).strip('[').strip(']'))
#########################################################################
#########################################################################
for model in modelList:
#feature key list
features = dict_DD[model].feature
# query key list
queries = dict_DD[model].queries
#feature descriptions list
description = dict_DD[model].description
appliedTypeList = ['1,2,3','1','2','3']
appliedType_type = ['客群总体','首申','复申','复贷']
for fea_i in range(len(queries)):
appliedType_index = 0
key = queries[fea_i].strip()
df = querymongo(limit.replace('@start_date',vlm_start_date).replace('@end_date',vlm_end_date),query.replace('@key',key))
df = df.applymap(lambda x : np.nan if x == '' else x)
df_offline = query_sql(sql,risk_analysis_config)
df = pd.merge(df,df_offline,how='right',left_on='order_id',right_on='order_no')[['applied_at','applied_from','applied_type',key]]
del df_offline
df[key] = df[key].astype('float')
df.applied_type = df['applied_type'].astype('int')
for appliedType in appliedTypeList:
if appliedType_index == 0:
df_tmp = df[['applied_at','applied_from',key]]
else:
df_tmp = df[df.applied_type == int(appliedType)][['applied_at','applied_from',key]]
try:
#df.applied_from = df.applied_from.astype('str')
zero_rate_total,missing_rate_total,cols_total,df_sum = dataManipul(df_tmp[['applied_at',key]],key)
table = []
y_total = df_sum['mean']
table.append(df_sum['mean'].round(1)) #.round(1).values.tolist()
table.append(df_sum['count'].astype('int'))
table.append(missing_rate_total.astype('str')+'%')
table.append(zero_rate_total.astype('str')+'%')
meanpls3std = y_total.mean() + y_total.std() * 3
meansub3std = y_total.mean() - y_total.std() * 3
#mean_mean = y_total.mean()
if (y_total.iloc[-30:].max() > meanpls3std) | (y_total.iloc[-30:].min() < meansub3std):
plotLine(model+'-'+description[fea_i]+'-Mean-'+appliedType_type[appliedType_index]+'-变化VLM',y_total,['value','count','Missing Rate','Zero Rate'],cols_total,table,path_alarm)
separateBychannel(df_tmp,key,meansub3std,meanpls3std)
else:
plotLine(model+'-'+description[fea_i]+'-Mean-'+appliedType_type[appliedType_index]+'-变化VLM',y_total,['value','count','Missing Rate','Zero Rate'],cols_total,table,path)
del table
except Exception as e: #ValueError
print('Mean Exception : ',key,appliedType,e)
appliedType_index += 1
continue
try:
zero_rate_total,missing_rate_total,cols_total,df_sum = dataManipul(df_tmp[['applied_at',key]],key)
table = []
y_total = df_sum['std']
table.append(df_sum['std'].round(1))
table.append(df_sum['count'])
table.append(missing_rate_total.astype('str')+'%')
table.append(zero_rate_total.astype('str')+'%')
del df_sum
stdpls3std = y_total.mean() + y_total.std() * 3
stdsub3std = y_total.mean() - y_total.std() * 3
#std_mean = y_total.mean()
if (y_total.iloc[-30:-1].max() > stdpls3std) | (y_total.iloc[-30:-1].min() < stdsub3std):
plotLine(model+'-'+description[fea_i]+'-Std-'+appliedType_type[appliedType_index]+'-变化VLM',y_total,['value','count','Missing Rate','Zero Rate'],cols_total,table,path_alarm)
else:
plotLine(model+'-'+description[fea_i]+'-Std-'+appliedType_type[appliedType_index]+'-变化VLM',y_total,['value','count','Missing Rate','Zero Rate'],cols_total,table,path)
del table
except Exception as e:
print('Std Exception : ',e)
appliedType_index += 1
continue
appliedType_index += 1
#!/usr/bin/env python
# coding: utf-8
# In[ ]:
from datetime import datetime
import pandas as pd
pd.options.display.max_columns = 1000
import pymongo
import numpy as np
import matplotlib.pyplot as plt
import sklearn.metrics
# read mongodb mapping from excel
mapping_score = pd.read_excel("./query_score.xlsx",sheet_name='score_mongo').dropna(axis=0)
#mapping_variable = pd.read_excel("./mongodb.xlsx",sheet_name='variable').dropna(axis=0)
limit = "{'wf_created_at': {'$gte': '@start_date', '$lt': '@end_date'}}"
query = "{'order_id':1,'@key':1}"
passdue_day = 15
appliedType_type = {'1,2,3':'总体','1,2':'首贷','1':'首申','2':'复申','3':'复贷'}
path = "../plot/PSI_VAL/"
################################### plot PSI ##################################
def plotPSI(title,y_list,dateList,psi,missing,rows,cols,table_value,save_path):
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['savefig.dpi'] = 226 #图片像素
plt.rcParams['figure.dpi'] = 100 #分辨率
fig,axs = plt.subplots(1,1,figsize=(16,9),linewidth = 0.1)
for y_index in range(len(y_list)):
y = y_list[y_index]
x = range(len(y))
axs.plot(x,y,marker='o',label=dateList[y_index][0:7] + ' PSI:'+str(psi[y_index])+'\n缺失率:'+str(missing[y_index])+'%')
the_table = plt.table(cellText=table_value,
rowLabels=rows,
colLabels=cols,
colWidths=[0.91 / (len(cols)-1)] * len(cols),
loc='bottom')
the_table.auto_set_font_size(False)
the_table.set_fontsize(8)
fig.subplots_adjust(bottom=0.2)
plt.grid()
plt.ylabel('各分段样本占比'+' (%)')
plt.legend()
plt.xticks([])
#plt.vlines(xrange(len(cols))0],y,color='lightgrey',linestyle='--')
fig.suptitle(title)
plt.savefig(save_path + title + ".png")
plt.show()
return 1
# draw liftchart
def liftchart(df,keyword,interval):
# split bins with scores
#nothing,interval = pd.qcut(df[df.loc[:,keyword]>0][keyword],10,retbins=True,duplicates='drop')
# delete 'nothing' var cause its useless
if len(df[df.loc[:,keyword]<0][keyword])>0:
bins_interval = interval.tolist()
bins_interval.append(-10000000)
bins_interval.sort()
else:
bins_interval = interval
df.loc[:,'bins'] = pd.cut(df[keyword],bins_interval,precision=6)
# count of sample
df_count = df[['applied_at','bins','overdue']].groupby(['applied_at','bins']).count()
df_zeros = pd.Series(np.zeros(df_count['overdue'].shape),index = df_count.index)
# overdue samples
df = df[df.overdue == 1]
#df.loc[:,'bins'] = pd.cut(df[keyword],interval)
df_overdue = df[['applied_at','bins','overdue']].groupby(['applied_at','bins']).count()
df_overdue = pd.concat([df_zeros,df_overdue],axis=1)['overdue'].fillna(0)
y = df_overdue / df_count['overdue'].replace(0,1) * 100
rows = y.index.levels[0].tolist()
cols = df['bins'].value_counts().sort_index().index.astype('str').tolist()
return df_count['overdue'],df_overdue,y.round(3),rows,cols
############################## validation liftchart###############################
def plotLiftChart(title,y_list,dateList,aucri,auc,rows,cols,table_value,save_path):
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['savefig.dpi'] = 226 #图片像素
plt.rcParams['figure.dpi'] = 100 #分辨率
fig,axs = plt.subplots(1,1,figsize=(16,9),linewidth=0.1)
for y_index in range(len(y_list)):
y = y_list[y_index]
x = range(len(y))
axs.plot(x,y,marker='o',label=dateList[y_index][0:7] + ' (AUCRI:' + str(aucri[y_index])+ ') AUC: ' + str(auc[y_index]))
the_table = plt.table(cellText=table_value,
rowLabels = rows,
colLabels = cols,
colWidths = [0.91 / (len(cols)-1)] * len(cols),
loc = 'bottom')
the_table.auto_set_font_size(False)
the_table.set_fontsize(8)
fig.subplots_adjust(bottom = 0.2)
plt.legend()
plt.grid()
plt.ylabel('贷后首逾'+str(15)+'+ (%)')
plt.xticks([])
fig.suptitle(title)
plt.savefig(save_path + title + ".png")
plt.show()
return 1
def psi_bins(df,keyword,interval):
df.loc[:,'bins'] = pd.cut(df[keyword],interval,precision=6)
BM = df.groupby('bins').count()[keyword]
BM_count = BM / BM.values.sum() * 100
return BM_count
def querymongo(limit,query):
myclient = pymongo.MongoClient("mongodb://rc_dp_feature_user:qgrcdpfeature_2019@172.20.1.150:20000/?authSource=rc_dp_feature_pro")
mydb = myclient["rc_dp_feature_pro"]
mycol = mydb["rc_feature_analysis_timing_v2"]
x = mycol.find(eval(limit),eval(query))
myclient.close()
return pd.DataFrame(list(x))
benchmark_start_date = "2018-12-21 00:00:00"
benchmark_end_date = "2019-01-22 00:00:00"
psi_start_date = "2019-03-01 00:00:00"
psi_end_date = "2019-06-01 00:00:00"
val_start_date = "2019-02-03 00:00:00"
val_end_date = "2019-05-03 00:00:00"
risk_analysis_config = {'user' : 'fengkong_read_only',
'password' : 'mT2HFUgI',
'host' : '172.20.6.9',
'port' : 9030,
'database' : 'risk_analysis',
'encoding' : 'utf8'}
import pymysql
def connect2DB(db_config):
db = pymysql.connect(
host = db_config['host'],
port = db_config['port'],
user = db_config['user'],
passwd = db_config['password'],
db = db_config['database'],
charset = db_config['encoding'])
return db
def query_sql(sql,db=risk_analysis_config):
try:
conn = connect2DB(db)
df = pd.read_sql(sql,conn)
conn.close()
return df
except Exception as e:
return 0
def dataManipul(df,keyword,interval):
# df count of all records
# missing_rate = {}
# df_count = df[['applied_at','bins']].groupby('applied_at')
# count dataframe separated by mon
# set negative as null
df.dropna(axis=0)[keyword] = df.dropna(axis=0)[keyword].map(lambda x : np.nan if x < 0 else x )
df_noneNA = df.dropna(axis = 0)
df_count = df[['applied_at',keyword]].fillna(0).groupby('applied_at').count()
df_zeros = pd.Series(np.zeros(df_count[keyword].shape),index = df_count.index)
df_missing = df_count - df_noneNA[['applied_at',keyword]].groupby('applied_at').count()
df_missing = pd.concat([df_zeros,df_missing],axis=1)[keyword].fillna(0)
missing_rate = df_missing / df_count[keyword].replace(0,1) * 100
df_noneNA['bins'] = pd.cut(df_noneNA[keyword],interval,precision=6)
cols = df_noneNA['bins'].value_counts().sort_index().index.astype('str')
df_count = df_noneNA[['applied_at','bins',keyword]].groupby(['applied_at','bins']).count()
df_zeros = pd.Series(np.zeros(df_count[keyword].shape),index = df_count.index)
df_zero = df_noneNA[df_noneNA[keyword] == 0][['applied_at','bins',keyword]].groupby(['applied_at','bins']).count()
df_zero = pd.concat([df_zeros,df_zero],axis=1)[keyword].fillna(0)
zero_rate = df_zero / df_count[keyword].replace(0,1) * 100
y = df_count / df_noneNA[['applied_at',keyword]].groupby('applied_at').count() * 100
rows = y.index.levels[0].tolist()
return zero_rate.round(1),missing_rate.round(1),rows,cols,y[keyword].round(1),df_count[keyword]
sql_bins = '''
SELECT order_no,transacted,IF(passdue_day>15,1,0) as overdue
FROM risk_analysis
WHERE applied_at >= '@benchmark_start_date' and applied_at <= '@benchmark_end_date'
AND applied_from IN (@channelID)
AND applied_type IN (@appliedType)
AND repayment_status != 4
'''
sql_observation = '''
SELECT order_no,date_format(applied_at,'%Y-%m') as applied_at
FROM risk_analysis
WHERE applied_at >= '@psi_start_date' and applied_at <= '@psi_end_date'
AND applied_from IN (@channelID)
AND applied_type IN (@appliedType)
AND repayment_status != 4
'''
######## calculate with natural mon ###########
"""
sql_passdueday = '''
SELECT order_no,date_format(loan_start_date,'%Y-%m') as applied_at,IF(passdue_day > @passdue_day,1,0) as overdue
FROM risk_analysis
WHERE applied_at >= '@val_start_date' and applied_at <= '@val_end_date'
AND applied_from IN (@channelID)
AND applied_type IN (@appliedType)
AND transacted = 1
AND repayment_status != 4
'''
"""
######## calculate with T-n mon ###########
sql_passdueday = '''
(SELECT order_no,'T-1' as applied_at,IF(passdue_day > @passdue_day,1,0) as overdue
FROM risk_analysis
WHERE DATE_FORMAT(deadline,'%Y-%m-%d') >= DATE_FORMAT(DATE_ADD(NOW(),INTERVAL -45 DAY),'%Y-%m-%d') and DATE_FORMAT(deadline,'%Y-%m-%d') < DATE_FORMAT(DATE_ADD(NOW(),INTERVAL -15 DAY),'%Y-%m-%d')
AND applied_from IN (@channelID)
AND applied_type IN (@appliedType)
AND transacted = 1
AND repayment_status != 4)
UNION ALL
(SELECT order_no,'T-2' as applied_at,IF(passdue_day > @passdue_day,1,0) as overdue
FROM risk_analysis
WHERE DATE_FORMAT(deadline,'%Y-%m-%d') >= DATE_FORMAT(DATE_ADD(NOW(),INTERVAL -75 DAY),'%Y-%m-%d') and DATE_FORMAT(deadline,'%Y-%m-%d') < DATE_FORMAT(DATE_ADD(NOW(),INTERVAL -45 DAY),'%Y-%m-%d')
AND applied_from IN (@channelID)
AND applied_type IN (@appliedType)
AND transacted = 1
AND repayment_status != 4)
UNION ALL
(SELECT order_no,'T-3' as applied_at,IF(passdue_day > @passdue_day,1,0) as overdue
FROM risk_analysis
WHERE DATE_FORMAT(deadline,'%Y-%m-%d') >= DATE_FORMAT(DATE_ADD(NOW(),INTERVAL -105 DAY),'%Y-%m-%d') and DATE_FORMAT(deadline,'%Y-%m-%d') < DATE_FORMAT(DATE_ADD(NOW(),INTERVAL -75 DAY),'%Y-%m-%d')
AND applied_from IN (@channelID)
AND applied_type IN (@appliedType)
AND transacted = 1
AND repayment_status != 4)
'''
# benchmark
#df = querymongo(limit.replace('@start_date',benchmark_start_date).replace('@end_date',benchmark_end_date),query.replace('@key',key))[['order_id',key]]
modelType = mapping_score.description.tolist()
modelList = mapping_score.score.tolist()
appliedTypeList = mapping_score.appliedType.tolist()
#channelIDList = mapping_score.channel.tolist()
conn = connect2DB(risk_analysis_config)
# extract channel list where except recalling channel
sql_channel = '''
SELECT DISTINCT(applied_from),applied_channel
FROM risk_analysis
WHERE applied_from IN
(SELECT applied_from FROM risk_analysis
WHERE transacted = 1
AND loan_start_date >= DATE_FORMAT(DATE_ADD(NOW(),INTERVAL -1 MONTH),'%Y-%m-01')
AND loan_start_date < DATE_FORMAT(NOW(),'%Y-%m-01')
and applied_from not in (159481,159486,159528)
GROUP BY 1
HAVING SUM(real_loan_amount) > 100000
ORDER BY sum(real_loan_amount) DESC)
'''
channel = {'1,214,217,198':'内部','159507':'浅橙','159537':'360金融','333':'融360','159384,159483':'平安','159561':'51公积金API'}
channelId = pd.read_sql(sql_channel,conn).applied_from
l=''
for i in channel.keys():
l = l + i+','
l = eval('['+l+']')
channel[str(channelId[channelId.map(lambda x : True if x not in l else False)].tolist()).strip('[').strip(']')] = '其他渠道'
channel[str(channelId.tolist()).strip('[').strip(']')] = '全部渠道'
for modelVar in modelList:
for appliedType in str(appliedTypeList[modelList.index(modelVar)]).split(';'):
print('appliedType',appliedType)
print('appliedTypeList[model_index]',appliedTypeList[modelList.index(modelVar)])
for channelID in channel.keys():
try:
df_bins = querymongo(limit.replace('@start_date',benchmark_start_date).replace('@end_date',benchmark_end_date),query.replace('@key',modelVar))[['order_id',modelVar]]
df_bins = df_bins.applymap(lambda x : np.nan if x == '' else x)
df_bins[modelVar] = df_bins[modelVar].astype('float')
df_offline = query_sql(sql_bins.replace('@appliedType',appliedType).replace('@channelID',channelID).replace('@benchmark_start_date',benchmark_start_date).replace('@benchmark_end_date',benchmark_end_date),risk_analysis_config)
df_bins = pd.merge(df_bins,df_offline,how='right',left_on='order_id',right_on='order_no')[['transacted','overdue',modelVar]].dropna(axis=0)
del df_offline
df_observation = querymongo(limit.replace('@start_date',psi_start_date).replace('@end_date',psi_end_date),query.replace('@key',modelVar))[['order_id',modelVar]]
df_observation = df_observation.applymap(lambda x : np.nan if x == '' else x)
df_observation[modelVar] = df_observation[modelVar].astype('float')
df_offline = query_sql(sql_observation.replace('@appliedType',appliedType).replace('@channelID',channelID).replace('@psi_start_date',psi_start_date).replace('@psi_end_date',psi_end_date),risk_analysis_config)
df_observation = pd.merge(df_observation,df_offline,how='right',left_on='order_id',right_on='order_no')[['applied_at',modelVar]]
del df_offline
#df_observation = query_sql(sql_observation.replace('@appliedType',appliedType).replace('@channelID',channelID),risk_analysis_config)
df_observation.loc[:,modelVar] = df_observation.loc[:,modelVar].map(lambda x : np.nan if x < 0 else x)
#df_bins = df_bins.apply(lambda x :np.nan if x < 0 else x)
Nothing,interval = pd.qcut(df_bins.loc[:,modelVar],10,retbins=True,precision=6,duplicates='drop')
interval[0] = 0
del Nothing
BM_count = psi_bins(df_bins,modelVar,interval)
zero_rate,missing_rate,dateList,cols,y,count = dataManipul(df_observation,modelVar,np.array(interval).round(6))
#df_observation_with_bin = pd.cut(df_observation.dropna(axis=0)[modelVar],interval)
# del df_bins
del interval
value_tab = []
rows = []
y_list = []
psi = []
# plot line separated by mon
for mon in dateList:
y_list.append(y.loc[mon].values)
#value_tab.append(y.loc[mon].astype('str')+'%')
value_tab.append(count.loc[mon].astype('str')+'(zeroR:'+zero_rate.loc[mon].astype('str')+'%)')
#rows.append(str(mon)+' Value');
rows.append(str(mon)+' Count')
#(y-10).sum() / np.log10(y/10)
psi.append((((y.loc[mon]-BM_count) * np.log10(y.loc[mon]/BM_count)).sum()/100).round(3))
plotPSI(modelType[modelList.index(modelVar)]+'-'+appliedType_type[appliedType]+'-' + channel[channelID] + ' PSI',y_list,dateList,psi,missing_rate,rows,cols,value_tab,path)
except Exception as e:
print("psi exception",e)
try:
# Overdue dataframe
df_bins_auc = df_bins[df_bins.transacted == 1]
del df_bins
auc_BM = sklearn.metrics.roc_auc_score(df_bins_auc.overdue, df_bins_auc.loc[:,modelVar])
print('AUC_BM: ',auc_BM)
Nothing,interval = pd.qcut(df_bins_auc.loc[:,modelVar],10,retbins=True,precision=6,duplicates='drop')
interval[0] = 0
del Nothing
df_passdueday = querymongo(limit.replace('@start_date',val_start_date).replace('@end_date',val_end_date),query.replace('@key',modelVar))[['order_id',modelVar]]
df_passdueday = df_passdueday.applymap(lambda x : np.nan if x == '' else x)
df_passdueday[modelVar] = df_passdueday[modelVar].astype('float')
df_offline = query_sql(sql_passdueday.replace('@appliedType',appliedType).replace('@channelID',channelID).replace('@val_start_date',val_start_date).replace('@val_end_date',val_end_date).replace('@passdue_day',str(passdue_day)),risk_analysis_config)
df_passdueday = pd.merge(df_passdueday,df_offline,how='inner',left_on='order_id',right_on='order_no')[['applied_at','overdue',modelVar]].dropna(axis=0)
#print('df_passdueday count: ',df_passdueday.shape)
del df_offline
# df_passdueday = pd.read_sql(sql_passdueday.replace('@modelVar',modelVar).replace('@appliedType',appliedType).replace('@channelID',channelID).replace('@passdueday',str(passdueday)),conn)
count,df_overdue,y,dateList,cols = liftchart(df_passdueday,modelVar,np.array(interval).round(6))
value_tab = []
rows = []
y_list = []
aucri = []
auc = []
for mon in dateList:
y_list.append(y.loc[mon].values)
#value_tab.append(y.loc[mon].astype('str')+'%')
value_tab.append(df_overdue.loc[mon].astype('str') + ' (总计 ' + count.loc[mon].astype('str') + ')' )
#rows.append(str(mon)+' OverdueRate');
rows.append(str(mon)+' Count')
aucri.append(round((sklearn.metrics.roc_auc_score(df_passdueday[df_passdueday.applied_at==mon].overdue, df_passdueday[df_passdueday.applied_at==mon].loc[:,modelVar])/auc_BM),3))
auc.append(round(sklearn.metrics.roc_auc_score(df_passdueday[df_passdueday.applied_at==mon].overdue, df_passdueday[df_passdueday.applied_at==mon].loc[:,modelVar]),3))
auc[-1] = str(auc[-1]) + '\n AUC基准: ' + str(round(auc_BM,3))
plotLiftChart(modelType[modelList.index(modelVar)] + '-' + appliedType_type[appliedType] + '-' + channel[channelID] + ' AUC WITH '+ str(15) + '+',y_list,dateList,aucri,auc,rows,cols,value_tab,path)
except Exception as e:
print("val exception",e)
# In[ ]:
......@@ -5,42 +5,39 @@ Created on Mon Nov 26 21:44:56 2018
@author: Jason Wang
"""
import time
import os
import pymysql
import pandas as pd
import numpy as np
import openpyxl
import decimal
import matplotlib.pyplot as plt
import os
from matplotlib.font_manager import FontProperties
from matplotlib.lines import Line2D
import datetime
from django.db import transaction, DatabaseError
kalist = [1,198,214,217,333,159507,159384,159563,159561,159538,159609,159537]
kalist = [1, 198, 214, 217, 333, 159507, 159384, 159563, 159561, 159538, 159609, 159537]
############################## SQL ##############################################
#applied_channel = [1,214,217,198,159384,159483,159479,159478,333,158748,158764,158932,159457,159459,159519,159507,159538,159561]
#applice_type = []
#channelDict = {159384:'平安H5高净值',159483:'平安低净值',159479:'车险保单贷',159478:'法人贷',333:'融360',158748:'汽车之家',158764:'翼支付',158932:'拉卡拉',159457:'惠金所',159459:'惠金所',159519:'亿融普惠'}
appliedTypeList = ['1,2,3','1','2','3']
appliedType_type = ['客群总体','首申','复申','复贷']
# applied_channel = [1,214,217,198,159384,159483,159479,159478,333,158748,158764,158932,159457,159459,159519,159507,159538,159561]
# applice_type = []
# channelDict = {159384:'平安H5高净值',159483:'平安低净值',159479:'车险保单贷',159478:'法人贷',333:'融360',158748:'汽车之家',158764:'翼支付',158932:'拉卡拉',159457:'惠金所',159459:'惠金所',159519:'亿融普惠'}
appliedTypeList = ['1,2,3', '1', '2', '3']
appliedType_type = ['客群总体', '首申', '复申', '复贷']
# extract channel list where except recalling channel
# extract channel list where except recall channel
sql_channel = '''
SELECT DISTINCT(applied_from),applied_channel FROM risk_analysis
SELECT DISTINCT(applied_from),applied_channel
FROM risk_analysis
WHERE applied_from IN
(SELECT applied_from FROM risk_analysis
WHERE transacted = 1
AND real_loan_amount > 20000
AND loan_start_date >= DATE_FORMAT(DATE_ADD(NOW(),INTERVAL -1 MONTH),'%Y-%m-01')
AND loan_start_date < DATE_FORMAT(NOW(),'%Y-%m-01')
and applied_from not in (159481,159486,159528)
GROUP BY 1
HAVING SUM(real_loan_amount) > 100000
ORDER BY sum(real_loan_amount) DESC)
'''
sql = '''
SELECT date_format(applied_at,'%Y-%m-%d') as applied_at,applied_from,applied_type,@feature FROM risk_analysis
WHERE DATE_FORMAT(applied_at,'%Y-%m') BETWEEN DATE_FORMAT(DATE_ADD(NOW(),INTERVAL -2 MONTH),'%Y-%m')
......@@ -50,8 +47,8 @@ AND applied = 1
'''
########################## DB Configuration #####################################
risk_analysis_config = {'user' : 'jiahua_wang',
'password' : 'IqHKCIyZ',
risk_analysis_config = {'user' : 'fengkong_read_only',
'password' : 'mT2HFUgI',
'host' : '172.20.6.9',
'port' : 9030,
'database' : 'risk_analysis',
......@@ -67,7 +64,7 @@ now = time.strftime("%Y-%m-%d")
# make directory, if it exists return path, else return created folder path
#def mkdir(path,name):
# def mkdir(path,name):
# folder = os.path.exists(path+name)
# if folder:
# return path+name+'\\'
......@@ -76,264 +73,282 @@ now = time.strftime("%Y-%m-%d")
# return path+name+'\\'
# VLM with one variable
def plotLine(title,y,row,col,table,save_path,upperBoundary=0,bottomBoundary=0):
def plotLine(title, y, row, col, table, save_path, upperBoundary=0, bottomBoundary=0):
# if x less than 10, ignored its plot
if len(col) <= 10 | int(y.sum()) <= 10 :
if len(col) <= 10 | int(y.sum()) <= 10:
return 1
else:
cols = [item[5:] for item in col.values.tolist()]
#print(content)
cols = [item[5:] for item in col.values.tolist()]
# print(content)
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['savefig.dpi'] = 226 #图片像素
#分辨率
fig,axs = plt.subplots(1,1,figsize=(33,11),linewidth=0.1)
x = range(len(col))
axs.plot(x,y)
axs.add_line(Line2D((x[0],x[-1]),(y.mean(),y.mean()),linestyle='--',color='darkorange'))
plt.annotate(s = '月均{}'.format(round(y.mean(),2)),xy=(x[-1] + 0.1,y.mean()))
plt.rcParams['savefig.dpi'] = 226 # 图片像素
# 分辨率
fig, axs = plt.subplots(1, 1, figsize=(33, 11), linewidth=0.1)
x = range(len(col))
axs.plot(x, y)
axs.add_line(Line2D((x[0], x[-1]), (y.mean(), y.mean()), linestyle='--', color='darkorange'))
plt.annotate(s='月均{}'.format(round(y.mean(), 2)), xy=(x[-1] + 0.1, y.mean()))
# upper boundary
if upperBoundary == 0:
axs.add_line(Line2D((x[0],x[-1]),(y.mean() + 3 * y.std(),y.mean() + 3 * y.std()),linestyle = '--',color = 'lightcoral'))
plt.annotate(s = 'Mean+3STD\n{}'.format(round(y.mean() + 3 * y.std(),2)),xy = (x[-1] + 0.1,y.mean() + 3 * y.std()))
axs.add_line(Line2D((x[0], x[-1]), (y.mean() + 3 * y.std(), y.mean() + 3 * y.std()), linestyle='--',
color='lightcoral'))
plt.annotate(s='Mean+3STD\n{}'.format(round(y.mean() + 3 * y.std(), 2)),
xy=(x[-1] + 0.1, y.mean() + 3 * y.std()))
else:
axs.add_line(Line2D((x[0],x[-1]),(upperBoundary,upperBoundary),linestyle = '--',color = 'lightcoral'))
plt.annotate(s = 'Mean+3STD\n{}'.format(round(upperBoundary,2)),xy = (x[-1] + 0.1,upperBoundary))
# bottom boundary
if bottomBoundary == 0:
axs.add_line(Line2D((x[0],x[-1]),(y.mean() - 3 * y.std(),y.mean() - 3 * y.std()),linestyle = '--',color = 'lightcoral'))
plt.annotate(s = 'Mean-3STD\n{}'.format(round(y.mean() - 3 * y.std(),2)),xy = (x[-1] + 0.1,y.mean() - 3 * y.std()))
axs.add_line(Line2D((x[0], x[-1]), (upperBoundary, upperBoundary), linestyle='--', color='lightcoral'))
plt.annotate(s='Mean+3STD\n{}'.format(round(upperBoundary, 2)), xy=(x[-1] + 0.1, upperBoundary))
# bottom boundary
if bottomBoundary == 0:
axs.add_line(Line2D((x[0], x[-1]), (y.mean() - 3 * y.std(), y.mean() - 3 * y.std()), linestyle='--',
color='lightcoral'))
plt.annotate(s='Mean-3STD\n{}'.format(round(y.mean() - 3 * y.std(), 2)),
xy=(x[-1] + 0.1, y.mean() - 3 * y.std()))
else:
print('gonna here')
axs.add_line(Line2D((x[0],x[-1]),(bottomBoundary,bottomBoundary),linestyle = '--',color = 'lightcoral'))
plt.annotate(s = 'Mean-3STD\n{}'.format(round(bottomBoundary,2)),xy = (x[-1] + 0.1,bottomBoundary))
# draw vertical line of each points
axs.add_line(Line2D((x[0], x[-1]), (bottomBoundary, bottomBoundary), linestyle='--', color='lightcoral'))
plt.annotate(s='Mean-3STD\n{}'.format(round(bottomBoundary, 2)), xy=(x[-1] + 0.1, bottomBoundary))
# draw vertical line of each points
bottom = 0
if y.min() - y.std() * 3 - y.mean() * 0.02 > 0:
bottom = y.min() - y.std() * 3 - y.std() * 0.1
plt.vlines(x,[bottom],y,color = 'lightgrey',linestyle = '--')
axs.grid()
if y.min() - y.std() * 3 - y.mean() * 0.02 > 0:
bottom = y.min() - y.std() * 3 - y.std() * 0.1
plt.vlines(x, [bottom], y, color='lightgrey', linestyle='--')
axs.grid()
plt.xticks([])
the_table = plt.table(cellText=table,
rowLabels=row,
colLabels=cols,
colWidths=[0.91 / (len(col) - 1)]*len(col),
loc='bottom')
rowLabels=row,
colLabels=cols,
colWidths=[0.91 / (len(col) - 1)] * len(col),
loc='bottom')
the_table.auto_set_font_size(False)
the_table.set_fontsize(9)
fig.subplots_adjust(left=0.032,right=0.97)
fig.set_size_inches(33,11)
#fig.suptitle(title)
plt.title(title,fontsize=18)
fig.subplots_adjust(left=0.032, right=0.97)
fig.set_size_inches(33, 11)
# fig.suptitle(title)
plt.title(title, fontsize=18)
plt.savefig(save_path + title + ".png")
plt.show()
return 1
def readExcel(path,sheet=None):
return pd.read_excel(path,sheet)
#conn = connect2DB()
def readExcel(path, sheet=None):
return pd.read_excel(path, sheet)
# conn = connect2DB()
dict_keylist = []
dict_vallist = []
dict_DD = readExcel("E:\\Python\\su Project\\DD.xlsx")
dict_DD = readExcel("E:\\Python\\su Project\\features_DD.xlsx")
modelList = [model for model in dict_DD.keys()]
def mkdir(path,fd):
if not os.path.exists(path+fd):
folder = mkdir(path,fd)
def mkdir(path, fd):
if not os.path.exists(path + fd):
folder = mkdir(path, fd)
return folder
else:
return path + 'fd'
def connect2DB(db_config):
db = pymysql.connect(
host = db_config['host'],
port = db_config['port'],
user = db_config['user'],
passwd = db_config['password'],
db = db_config['database'],
charset = db_config['encoding'])
host=db_config['host'],
port=db_config['port'],
user=db_config['user'],
passwd=db_config['password'],
db=db_config['database'],
charset=db_config['encoding'])
return db
def query_sql(sql,db_config=risk_analysis_config):
def query_sql(sql, db_config=risk_analysis_config):
try:
conn = connect2DB(db_config)
df = pd.read_sql(sql,conn)
conn.close()
df = pd.read_sql(sql, conn)
conn.close()
return df
except Exception as e:
return 0
def dataManipul(df,keyword):
#df_withoutna = df.dropna(axis=0).sort_values(by=keyword,ascending=False).reset_index().drop('index',axis=1)
#df = pd.merge(df_withoutna[keyword].iloc[int(len(df_withoutna)*0.01):int(len(df_withoutna)*0.99)]
#df.dropna(axis=0).loc[:,keyword] = df.dropna(axis=0)[keyword].map(lambda x : np.nan if x < 0 else x )
df_count = df[['applied_at',keyword]].groupby('applied_at').count()[keyword] # need 2 recheck
df_zeros = pd.Series(np.zeros(df_count.shape),index = df_count.index)
df_missing = df[df[keyword].isnull()].fillna(0).groupby('applied_at')[keyword].count()
df_missing = pd.concat([df_zeros,df_missing], axis = 1, sort = True).fillna(0)[keyword]
# df_shape = pd.DataFrame(np.zeros(df_count.shape))
#
# df_missing = df[df[keyword].isnull()].fillna(0).groupby('applied_at')[keyword].count()
# df_missing = df_shape + df_missing
return 0
def dataManipul(df, keyword):
# df_withoutna = df.dropna(axis=0).sort_values(by=keyword,ascending=False).reset_index().drop('index',axis=1)
# df = pd.merge(df_withoutna[keyword].iloc[int(len(df_withoutna)*0.01):int(len(df_withoutna)*0.99)]
# df.dropna(axis=0).loc[:,keyword] = df.dropna(axis=0)[keyword].map(lambda x : np.nan if x < 0 else x )
df_count = df[['applied_at', keyword]].groupby('applied_at').count()[keyword] # need 2 recheck
df_zeros = pd.Series(np.zeros(df_count.shape), index=df_count.index)
df_missing = df[df[keyword].isnull()].fillna(0).groupby('applied_at')[keyword].count()
df_missing = pd.concat([df_zeros, df_missing], axis=1, sort=True).fillna(0)[keyword]
# df_shape = pd.DataFrame(np.zeros(df_count.shape))
#
# df_missing = df[df[keyword].isnull()].fillna(0).groupby('applied_at')[keyword].count()
# df_missing = df_shape + df_missing
missing_rate = df_missing / (df_count + df_missing) * 100
del df_missing
df_zero = df[df[keyword] == 0].groupby('applied_at')[keyword].count()
df_zero = pd.concat([df_zeros,df_zero], axis = 1, sort = True).fillna(0)[keyword]
df_zero = pd.concat([df_zeros, df_zero], axis=1, sort=True).fillna(0)[keyword]
zero_rate = df_zero / df_count * 100
del df_zero
df_noneNA = df.dropna(axis = 0)
df_noneNA = df_noneNA.sort_values(by=keyword,ascending=False).reset_index().drop('index',axis=1)
df_sum = df_noneNA.iloc[int(len(df_noneNA)*0.01):int(len(df_noneNA)*0.99)].groupby('applied_at').agg(['mean','std','count'])
df_sum = pd.concat([df_zeros,df_sum], axis = 1, sort = True).fillna(0).drop(columns=[0])
df_sum.columns = ['mean','std','count']
df_noneNA = df.dropna(axis=0)
df_noneNA = df_noneNA.sort_values(by=keyword, ascending=False).reset_index().drop('index', axis=1)
df_sum = df_noneNA.iloc[int(len(df_noneNA) * 0.01):int(len(df_noneNA) * 0.99)].groupby('applied_at').agg(
['mean', 'std', 'count'])
df_sum = pd.concat([df_zeros, df_sum], axis=1, sort=True).fillna(0).drop(columns=[0])
df_sum.columns = ['mean', 'std', 'count']
cols = df_count.index
return zero_rate.fillna(0).round(1),missing_rate.fillna(0).round(1),cols,df_sum
return zero_rate.fillna(0).round(1), missing_rate.fillna(0).round(1), cols, df_sum
#########################################################################
# check via channel details
def separateBychannel(df,key,meansub3std,meanpls3std):
def separateBychannel(df, key, meansub3std, meanpls3std):
try:
for appliedFrom in kalist:
try:
#df.applied_from = df.applied_from.astype('str')
zero_rate_total,missing_rate_total,cols_total,df_sum = dataManipul(df[df.applied_from == appliedFrom][['applied_at',key]],key)
try:
# df.applied_from = df.applied_from.astype('str')
zero_rate_total, missing_rate_total, cols_total, df_sum = dataManipul(
df[df.applied_from == appliedFrom][['applied_at', key]], key)
table = []
y_total = df_sum['mean']
table.append(df_sum['mean'].round(1)) #.round(1).values.tolist()
table.append(df_sum['mean'].round(1)) # .round(1).values.tolist()
table.append(df_sum['count'].astype('int'))
table.append(missing_rate_total.astype('str')+'%')
table.append(zero_rate_total.astype('str')+'%')
if (y_total.iloc[-30:].max() > meanpls3std) | (y_total.iloc[-30:].min() < meansub3std):
plotLine(str(modelList[i])+'-'+description[fea_i]+'-Mean-'+appliedType_type[appliedType_index]+'with'+str(appliedFrom)+'-VLM',y_total,['value','count','Missing Rate','Zero Rate'],cols_total,table,path_sepatate)
table.append(missing_rate_total.astype('str') + '%')
table.append(zero_rate_total.astype('str') + '%')
if (y_total.iloc[-30:].max() > meanpls3std) | (y_total.iloc[-30:].min() < meansub3std):
plotLine(str(modelList[i]) + '-' + description[fea_i] + '-Mean-' + appliedType_type[
appliedType_index] + 'with' + str(appliedFrom) + '-VLM', y_total,
['value', 'count', 'Missing Rate', 'Zero Rate'], cols_total, table, path_sepatate)
del table
except ValueError as e: #ValueError
except ValueError as e: # ValueError
continue
except Exception as e :
print('channel Exception : ',key,appliedType,e)
########### extract channel list #############
applied_channel = query_sql(sql_channel).applied_from.tolist()
sql = sql.replace('@applied_channel',str(applied_channel).strip('[').strip(']'))
except Exception as e:
print('channel Exception : ', key, appliedType, e)
########### extract channel list #############
applied_channel = query_sql(sql_channel).applied_from.tolist()
sql = sql.replace('@applied_channel', str(applied_channel).strip('[').strip(']'))
#########################################################################
#for model in modelList:
# df_model = dict_DD[model].dropna(axis = 0)
# for model in modelList:
# df_model = dict_DD[model].dropna(axis = 0)
# dict_keylist.append(df_model.feature.tolist())
# dict_keylist.append(df_model.query.tolist())
# dict_vallist.append(df_model.description.tolist())
#
#for li in dict_keylist:
#
# for li in dict_keylist:
for i in range(len(modelList)):
# drop colums from data dict where there has no description
df_model_list = dict_DD[modelList[i]].dropna(axis = 0)
#feature key list
df_model_list = dict_DD[modelList[i]].dropna(axis=0)
# feature key list
features = df_model_list.reset_index().feature
# query key list
queries = df_model_list.reset_index().queries
#feature descriptions list
# feature descriptions list
description = df_model_list.reset_index().description
# applied_from
# cv channel = df_model_list.reset_index().applied_type
modelVar_index = 0
for fea_i in range(len(features)):
appliedType_index = 0
try:
key = queries[fea_i].strip()
print('key: ',key)
df = query_sql(sql.replace('@feature',queries[fea_i]))
print('key: ', key)
df = query_sql(sql.replace('@feature', queries[fea_i]))
# except None
df.loc[:,key] = df.loc[:,key].map(lambda x : np.nan if x == None else x)
df.loc[:,key] = df.loc[:,key].map(lambda x : np.nan if x < 0 else x)
# exception of interger == mysql query meets a exception
df.loc[:, key] = df.loc[:, key].map(lambda x: np.nan if x == None else x)
df.loc[:, key] = df.loc[:, key].map(lambda x: np.nan if x < 0 else x)
# exception of interger == mysql query meets a exception
except Exception as a:
print(a)
continue
for appliedType in appliedTypeList:
print('appliedType',appliedType)
print('appliedType', appliedType)
if appliedType_index == 0:
df_tmp = df[['applied_at','applied_from',key]]
df_tmp = df[['applied_at', 'applied_from', key]]
else:
df_tmp = df[df.applied_type == int(appliedType)][['applied_at','applied_from',key]]
#print('appliedType: ',appliedType)
df_tmp = df[df.applied_type == int(appliedType)][['applied_at', 'applied_from', key]]
# print('appliedType: ',appliedType)
try:
#df.applied_from = df.applied_from.astype('str')
zero_rate_total,missing_rate_total,cols_total,df_sum = dataManipul(df_tmp[['applied_at',key]],key)
# df.applied_from = df.applied_from.astype('str')
zero_rate_total, missing_rate_total, cols_total, df_sum = dataManipul(df_tmp[['applied_at', key]], key)
table = []
y_total = df_sum['mean']
table.append(df_sum['mean'].round(1)) #.round(1).values.tolist()
table.append(df_sum['mean'].round(1)) # .round(1).values.tolist()
table.append(df_sum['count'].astype('int'))
table.append(missing_rate_total.astype('str')+'%')
table.append(zero_rate_total.astype('str')+'%')
table.append(missing_rate_total.astype('str') + '%')
table.append(zero_rate_total.astype('str') + '%')
meanpls3std = y_total.mean() + y_total.std() * 3
meansub3std = y_total.mean() - y_total.std() * 3
#mean_mean = y_total.mean()
if (y_total.iloc[-30:].max() > meanpls3std) | (y_total.iloc[-30:].min() < meansub3std):
plotLine(str(modelList[i])+'-'+description[fea_i]+'-Mean-'+appliedType_type[appliedType_index]+'-变化VLM',y_total,['value','count','Missing Rate','Zero Rate'],cols_total,table,path_alarm)
separateBychannel(df_tmp,key,meansub3std,meanpls3std)
# mean_mean = y_total.mean()
if (y_total.iloc[-30:].max() > meanpls3std) | (y_total.iloc[-30:].min() < meansub3std):
plotLine(str(modelList[i]) + '-' + description[fea_i] + '-Mean-' + appliedType_type[
appliedType_index] + '-变化VLM', y_total, ['value', 'count', 'Missing Rate', 'Zero Rate'],
cols_total, table, path_alarm)
separateBychannel(df_tmp, key, meansub3std, meanpls3std)
else:
plotLine(str(modelList[i])+'-'+description[fea_i]+'-Mean-'+appliedType_type[appliedType_index]+'-变化VLM',y_total,['value','count','Missing Rate','Zero Rate'],cols_total,table,path)
plotLine(str(modelList[i]) + '-' + description[fea_i] + '-Mean-' + appliedType_type[
appliedType_index] + '-变化VLM', y_total, ['value', 'count', 'Missing Rate', 'Zero Rate'],
cols_total, table, path)
del table
except Exception as e: #ValueError
print('Mean Exception : ',key,appliedType,e)
except Exception as e: # ValueError
print('Mean Exception : ', key, appliedType, e)
appliedType_index += 1
continue
try:
zero_rate_total,missing_rate_total,cols_total,df_sum = dataManipul(df_tmp[['applied_at',key]],key)
zero_rate_total, missing_rate_total, cols_total, df_sum = dataManipul(df_tmp[['applied_at', key]], key)
table = []
y_total = df_sum['std']
table.append(df_sum['std'].round(1))
table.append(df_sum['count'])
table.append(missing_rate_total.astype('str')+'%')
table.append(zero_rate_total.astype('str')+'%')
table.append(missing_rate_total.astype('str') + '%')
table.append(zero_rate_total.astype('str') + '%')
del df_sum
stdpls3std = y_total.mean() + y_total.std() * 3
stdsub3std = y_total.mean() - y_total.std() * 3
#std_mean = y_total.mean()
if (y_total.iloc[-30:-1].max() > stdpls3std) | (y_total.iloc[-30:-1].min() < stdsub3std):
plotLine(str(modelList[i])+'-'+description[fea_i]+'-Std-'+appliedType_type[appliedType_index]+'-变化VLM',y_total,['value','count','Missing Rate','Zero Rate'],cols_total,table,path_alarm)
# std_mean = y_total.mean()
if (y_total.iloc[-30:-1].max() > stdpls3std) | (y_total.iloc[-30:-1].min() < stdsub3std):
plotLine(str(modelList[i]) + '-' + description[fea_i] + '-Std-' + appliedType_type[
appliedType_index] + '-变化VLM', y_total, ['value', 'count', 'Missing Rate', 'Zero Rate'],
cols_total, table, path_alarm)
else:
plotLine(str(modelList[i])+'-'+description[fea_i]+'-Std-'+appliedType_type[appliedType_index]+'-变化VLM',y_total,['value','count','Missing Rate','Zero Rate'],cols_total,table,path)
plotLine(str(modelList[i]) + '-' + description[fea_i] + '-Std-' + appliedType_type[
appliedType_index] + '-变化VLM', y_total, ['value', 'count', 'Missing Rate', 'Zero Rate'],
cols_total, table, path)
del table
except Exception as e:
print('Std Exception : ',e)
print('Std Exception : ', e)
appliedType_index += 1
continue
continue
appliedType_index += 1
......
......@@ -20,8 +20,6 @@ import sklearn.metrics
from django.db import transaction, DatabaseError
sql_bins = '''
SELECT @modelVar,transacted,IF(passdue_day>@passdueday,1,0) as overdue FROM risk_analysis
WHERE applied_at BETWEEN
......@@ -40,18 +38,6 @@ AND !ISNULL(@modelVar)
AND @modelVar > 0
'''
"""
### sql_bins_360 = '''
SELECT @modelVar,transacted,IF(passdue_day > 15,1,0) as overdue
FROM risk_analysis
WHERE !ISNULL(@modelVar)
AND applied_at >= '2018-08-01' AND applied_at <= '2018-09-01'
AND applied_from IN (@channelID)
AND applied_type IN (@appliedType)
AND !ISNULL(@modelVar)
AND @modelVar > 0
'''
"""
sql_observation = '''
SELECT date_format(applied_at,'%Y-%m') as applied_at,@modelVar
......@@ -64,8 +50,7 @@ AND applied_type IN (@appliedType)
AND !ISNULL(@modelVar)
'''
######## calculate with natural mon ###########
######## calculate with T-N mon ###########
sql_passdueday = '''
(SELECT order_no,'T-1' as applied_at,@modelVar,IF(passdue_day > @passdue_day,1,0) as overdue
......@@ -90,8 +75,7 @@ AND applied_type IN (@appliedType)
AND transacted = 1)
'''
############ calculate with T-N mon #############
############ calculate with natural mon #############
"""
sql_passdueday = '''
SELECT date_format(loan_start_date,'%Y-%m') as applied_at,@modelVar,IF(passdue_day > @passdueday,1,0) as overdue
......@@ -106,26 +90,26 @@ AND transacted = 1
'''
"""
passdue_day = 15
#AND applied_from IN (@channelID)
##################################### db config ###############################
risk_analysis_config = {'user' : 'jiahua_wang',
'password' : 'IqHKCIyZ',
risk_analysis_config = {'user' : 'fengkong_read_only',
'password' : 'mT2HFUgI',
'host' : '172.20.6.9',
'port' : 9030,
'database' : 'risk_analysis',
'encoding' : 'utf8'}
#################################################################################
path = "E:\\Python\\su Project\\plot\\PSI&VAL\\"
mapping_path = "E:\\Python\\su Project\\query_score.xlsx"
path = "../plot/PSI_VAL/"
mapping_path = "./query_score.xlsx"
mapping = pd.read_excel(mapping_path,sheet_name='score_risk_anlysis')
modelType = mapping.description.tolist()
modelList = mapping.feature.tolist()
modelList = mapping.score.tolist()
appliedTypeList = mapping.appliedType.tolist()
channelIDList = mapping.channel.tolist()
......@@ -133,21 +117,8 @@ channelIDList = mapping.channel.tolist()
del mapping
#modelList = ['xinyan_xy_fstapply_point','xinyan_xy_reapply_point','xinyan_xy_reloan_point','reloan_v3_point','lxf_v2_point','v6_operator_score_raw','dhb_score','tongdun_score','shuchuang_phone_apply','pingan_markingCriterion','tencent_tencentAntiFraudScore','eleven_bei_score','ljj_old_score','ljj_model_trusty_score']
#modelList = ['xinyan_xy_reloan_point']
#modelType = ['新颜首申分','新颜复申分','新颜复贷分','复贷分','量信分','V6分','电话邦分','同盾分','数创多头','腾讯反欺诈分','十一贝分','量晶晶首贷分','量晶晶复贷分']
#modelType = ['新颜复贷']
#channelIDList = ['217,214,198,1,159481,158748,333,159384,149483,159479,159479,158764,158932,159457,159459,159519','217,214,198,1,159481','158748','333','159384','149483,159479,159479','158764,158932,159457,159459,159519']
#channel = ['全部渠道','内部','汽车之家','融360','平安高净值','平安非高净值','其他外部渠道']
#appliedTypeList = ['1,2,3','1','2','3']
#appliedTypeList = ['1']
#appliedType_type = ['总体','首申','复申','复贷']
appliedType_type = {'1,2,3':'总体','1':'首申','2':'复申','3':'复贷'}
#appliedType_type = ['首申']
passdueday = 15 #more than N days (fstOverdue N+)
def connect2DB(db_config):
......@@ -420,77 +391,6 @@ for modelVar in modelList:
except Exception as e: # ZeroDivisionError
print('val exception',e)
"""
#V5 333
modelVar = 'v5_filter_fraud_point_v5_without_zhima'
channelID = '333'
for appliedType in str(appliedTypeList[modelList.index(modelVar)]).split(';'):
print('appliedType',appliedType)
print('appliedTypeList[model_index]',appliedTypeList[modelList.index(modelVar)])
try:
df_bins = pd.read_sql(sql_bins.replace('@modelVar',modelVar).replace('@appliedType',appliedType).replace('@channelID',channelID).replace('@passdueday',str(passdueday)),conn).dropna(axis=0)
df_observation = pd.read_sql(sql_observation.replace('@modelVar',modelVar).replace('@appliedType',appliedType).replace('@channelID',channelID),conn)
df_observation.loc[:,modelVar] = df_observation.loc[:,modelVar].map(lambda x : np.nan if x < 0 else x)
#df_bins = df_bins.apply(lambda x :np.nan if x < 0 else x)
Nothing,interval = pd.qcut(df_bins.loc[:,modelVar],10,retbins=True,precision=6,duplicates='drop')
interval[0] = 0
del Nothing
BM_count = psi_bins(df_bins,modelVar,interval)
zero_rate,missing_rate,dateList,cols,y,count = dataManipul(df_observation,modelVar,np.array(interval).round(6))
#df_observation_with_bin = pd.cut(df_observation.dropna(axis=0)[modelVar],interval)
# del df_bins
del interval
value_tab = []
rows = []
y_list = []
psi = []
# plot line separated by mon
for mon in dateList:
y_list.append(y.loc[mon].values)
value_tab.append(y.loc[mon].astype('str')+'%')
value_tab.append(count.loc[mon].astype('str')+'(zeroR:'+zero_rate.loc[mon].astype('str')+'%)')
rows.append(str(mon)+' Value');rows.append(str(mon)+' Count')
#(y-10).sum() / np.log10(y/10)
psi.append((((y.loc[mon]-BM_count) * np.log10(y.loc[mon]/BM_count)).sum()/100).round(3))
plotPSI(modelType[modelList.index(modelVar)]+'-'+appliedType_type[appliedType]+'-' + channel[channelID] + ' PSI',y_list,dateList,psi,missing_rate,rows,cols,value_tab,path)
except Exception as e:
print(e)
try:
# Overdue dataframe
df_bins_auc = df_bins[df_bins.transacted == 1]
del df_bins
auc_BM = sklearn.metrics.roc_auc_score(df_bins_auc.overdue, df_bins_auc.loc[:,modelVar])
print('AUC_BM: ',auc_BM)
Nothing,interval = pd.qcut(df_bins_auc.loc[:,modelVar],10,retbins=True,precision=6,duplicates='drop')
interval[0] = 0
del Nothing
df_passdueday = pd.read_sql(sql_passdueday.replace('@modelVar',modelVar).replace('@appliedType',appliedType).replace('@channelID',channelID).replace('@passdueday',str(passdueday)),conn)
count,df_overdue,y,dateList,cols = liftchart(df_passdueday,modelVar,np.array(interval).round(6))
value_tab = []
rows = []
y_list = []
aucri = []
auc = []
for mon in dateList:
y_list.append(y.loc[mon].values)
value_tab.append(y.loc[mon].astype('str')+'%')
value_tab.append(df_overdue.loc[mon].astype('str') + ' (总计 ' + count.loc[mon].astype('str') + ')' )
rows.append(str(mon)+' OverdueRate');rows.append(str(mon)+' Count')
aucri.append(round((sklearn.metrics.roc_auc_score(df_passdueday[df_passdueday.applied_at==mon].overdue, df_passdueday[df_passdueday.applied_at==mon].loc[:,modelVar])/auc_BM),3))
auc.append(round(sklearn.metrics.roc_auc_score(df_passdueday[df_passdueday.applied_at==mon].overdue, df_passdueday[df_passdueday.applied_at==mon].loc[:,modelVar]),3))
auc[-1] = str(auc[-1]) + '\n AUC基准: ' + str(round(auc_BM,3))
plotLiftChart(modelType[modelList.index(modelVar)] + '-' + appliedType_type[appliedType] + '-' + channel[channelID] + ' AUC WITH '+ str(passdueday) + '+',y_list,dateList,aucri,auc,rows,cols,value_tab,path)
except Exception as e:
print(e)
"""
def plot_table_df(dataset, auc, title='untitled', X_label=None, y_label=None,
tab_df=None, plot_tab=True, saved_path=None):
......
No preview for this file type
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment