Commit ba959fdd authored by 舒皓月's avatar 舒皓月

nothing

parent 52b89d68
......@@ -2,10 +2,10 @@
<project version="4">
<component name="ChangeListManager">
<list default="true" id="1ecd0b9f-60aa-441d-b8e6-0ca91e7a02ef" name="Default Changelist" comment="">
<change beforePath="$PROJECT_DIR$/.idea/misc.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/misc.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/model_monitor.iml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/model_monitor.iml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/Monitor_mongoDB/psi_from_mongodb.py" beforeDir="false" afterPath="$PROJECT_DIR$/Monitor_mongoDB/psi_from_mongodb.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/Monitor_risk_analysis/Monitor_VLM_riskanalysi.py" beforeDir="false" afterPath="$PROJECT_DIR$/Monitor_risk_analysis/Monitor_VLM_riskanalysi.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/Monitor_risk_analysis/PSI&amp;VAL_riskanalysis.py" beforeDir="false" afterPath="$PROJECT_DIR$/Monitor_risk_analysis/PSI&amp;VAL_riskanalysis.py" afterDir="false" />
</list>
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
<option name="SHOW_DIALOG" value="false" />
......@@ -15,46 +15,68 @@
</component>
<component name="FileEditorManager">
<leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/Monitor_risk_analysis/PSI&amp;VAL_riskanalysis.py">
<file pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/Monitor_risk_analysis/Monitor_VLM_riskanalysi.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-12276">
<caret line="103" selection-start-line="103" selection-end-line="103" />
<state relative-caret-position="5220">
<caret line="311" column="58" selection-start-line="311" selection-start-column="53" selection-end-line="311" selection-end-column="58" />
<folding>
<element signature="e#89#100#0" expanded="true" />
<element signature="e#2551#2626#0" />
<element signature="e#2929#5687#0" />
<element signature="e#5727#5760#0" />
<element signature="e#5950#6075#0" />
<element signature="e#6109#6351#0" />
<element signature="e#6410#6555#0" />
<element signature="e#6592#8303#0" />
<element signature="e#8470#9706#0" />
</folding>
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/Monitor_risk_analysis/Monitor_VLM_riskanalysi.py">
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/Monitor_risk_analysis/PSI&amp;VAL_riskanalysis.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-577">
<caret line="253" column="37" selection-start-line="253" selection-start-column="28" selection-end-line="253" selection-end-column="37" />
<state relative-caret-position="828">
<caret line="35" column="25" selection-start-line="35" selection-start-column="25" selection-end-line="35" selection-end-column="25" />
<folding>
<element signature="e#89#100#0" expanded="true" />
<element signature="e#898#1320#0" />
<element signature="e#1328#1660#0" />
<element signature="e#1767#2032#0" />
<element signature="e#2717#5475#0" />
<element signature="e#5515#5548#0" />
<element signature="e#5748#5873#0" />
<element signature="e#5907#6149#0" />
<element signature="e#6208#6368#0" />
<element signature="e#6405#8116#0" />
<element signature="e#8283#9519#0" />
</folding>
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/test.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="576">
<caret line="16" selection-start-line="16" selection-end-line="16" />
</state>
</provider>
</entry>
</file>
</leaf>
</component>
<component name="FileTemplateManagerImpl">
<option name="RECENT_TEMPLATES">
<list>
<option value="Python Script" />
</list>
</option>
</component>
<component name="FindInProjectRecents">
<findStrings>
<find>E:</find>
<find>E:\</find>
<find>query_sql</find>
<find>sql_channel</find>
<find>liftchart</find>
<find>plotPSI</find>
<find>dateList</find>
<find>modelList</find>
<find>path</find>
<find>alarm</find>
</findStrings>
</component>
<component name="Git.Settings">
......@@ -63,62 +85,30 @@
<component name="IdeDocumentHistory">
<option name="CHANGED_PATHS">
<list>
<option value="$PROJECT_DIR$/Monitor_risk_analysis/PSI&amp;VAL_riskanalysis.py" />
<option value="$PROJECT_DIR$/Monitor_mongoDB/psi_from_mongodb.py" />
<option value="$PROJECT_DIR$/Monitor_mongoDB/monitoring_VLM_mongodb.py" />
<option value="$PROJECT_DIR$/Monitor_risk_analysis/Monitor_VLM_riskanalysi.py" />
</list>
</option>
</component>
<component name="ProjectConfigurationFiles">
<option name="files">
<list>
<option value="$PROJECT_DIR$/.idea/model_monitor.iml" />
<option value="$PROJECT_DIR$/.idea/vcs.xml" />
<option value="$PROJECT_DIR$/.idea/misc.xml" />
<option value="$PROJECT_DIR$/.idea/modules.xml" />
<option value="$PROJECT_DIR$/Monitor_mongoDB/psi_from_mongodb.py" />
<option value="$PROJECT_DIR$/Monitor_risk_analysis/PSI&amp;VAL_riskanalysis.py" />
<option value="$PROJECT_DIR$/test.py" />
</list>
</option>
</component>
<component name="ProjectFrameBounds" extendedState="6">
<option name="x" value="311" />
<option name="y" value="136" />
<option name="width" value="1229" />
<option name="x" value="310" />
<option name="y" value="135" />
<option name="width" value="1230" />
<option name="height" value="675" />
</component>
<component name="ProjectView">
<navigator proportions="" version="1">
<foldersAlwaysOnTop value="true" />
</navigator>
<panes>
<pane id="Scope" />
<pane id="ProjectPane">
<subPane>
<expand>
<path>
<item name="model_monitor" type="b2602c69:ProjectViewProjectNode" />
<item name="model_monitor" type="462c0819:PsiDirectoryNode" />
</path>
<path>
<item name="model_monitor" type="b2602c69:ProjectViewProjectNode" />
<item name="model_monitor" type="462c0819:PsiDirectoryNode" />
<item name="Monitor_mongoDB" type="462c0819:PsiDirectoryNode" />
</path>
<path>
<item name="model_monitor" type="b2602c69:ProjectViewProjectNode" />
<item name="model_monitor" type="462c0819:PsiDirectoryNode" />
<item name="Monitor_risk_analysis" type="462c0819:PsiDirectoryNode" />
</path>
</expand>
<select />
</subPane>
</pane>
</panes>
<panes />
</component>
<component name="PropertiesComponent">
<property name="ASKED_SHARE_PROJECT_CONFIGURATION_FILES" value="true" />
<property name="SHARE_PROJECT_CONFIGURATION_FILES" value="true" />
<property name="last_opened_file_path" value="$PROJECT_DIR$" />
<property name="last_opened_file_path" value="D:/work_space/test" />
<property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
</component>
<component name="RunDashboard">
......@@ -133,6 +123,56 @@
</list>
</option>
</component>
<component name="RunManager" selected="Python.test">
<configuration name="Monitor_VLM_riskanalysi" type="PythonConfigurationType" factoryName="Python" temporary="true">
<module name="model_monitor" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/Monitor_risk_analysis" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/Monitor_risk_analysis/Monitor_VLM_riskanalysi.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<option name="MODULE_MODE" value="false" />
<option name="REDIRECT_INPUT" value="false" />
<option name="INPUT_FILE" value="" />
<method v="2" />
</configuration>
<configuration name="test" type="PythonConfigurationType" factoryName="Python" temporary="true">
<module name="model_monitor" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/test.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<option name="MODULE_MODE" value="false" />
<option name="REDIRECT_INPUT" value="false" />
<option name="INPUT_FILE" value="" />
<method v="2" />
</configuration>
<recent_temporary>
<list>
<item itemvalue="Python.test" />
<item itemvalue="Python.Monitor_VLM_riskanalysi" />
</list>
</recent_temporary>
</component>
<component name="SvnConfiguration">
<configuration />
</component>
......@@ -148,14 +188,13 @@
</component>
<component name="ToolWindowManager">
<frame x="-7" y="-7" width="1550" height="838" extended-state="6" />
<editor active="true" />
<layout>
<window_info content_ui="combo" id="Project" order="0" weight="0.15587847" />
<window_info id="Structure" order="1" side_tool="true" weight="0.25" />
<window_info id="Favorites" order="2" side_tool="true" />
<window_info anchor="bottom" id="Message" order="0" />
<window_info anchor="bottom" id="Find" order="1" />
<window_info anchor="bottom" id="Run" order="2" />
<window_info anchor="bottom" id="Run" order="2" weight="0.6090652" />
<window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
<window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
<window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
......@@ -172,8 +211,8 @@
<component name="editorHistoryManager">
<entry file="file://$PROJECT_DIR$/Monitor_mongoDB/psi_from_mongodb.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="238">
<caret line="14" column="33" selection-start-line="14" selection-start-column="33" selection-end-line="14" selection-end-column="33" />
<state relative-caret-position="-12858">
<caret line="15" column="33" selection-start-line="15" selection-start-column="33" selection-end-line="15" selection-end-column="33" />
<folding>
<element signature="e#50#79#0" expanded="true" />
</folding>
......@@ -182,37 +221,45 @@
</entry>
<entry file="file://$PROJECT_DIR$/Monitor_mongoDB/monitoring_VLM_mongodb.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="374">
<state relative-caret-position="-1014">
<caret line="23" column="44" selection-start-line="23" selection-start-column="44" selection-end-line="23" selection-end-column="44" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/Monitor_risk_analysis/PSI&amp;VAL_riskanalysis.py">
<entry file="file://$PROJECT_DIR$/Monitor_risk_analysis/Monitor_VLM_riskanalysi.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-12276">
<caret line="103" selection-start-line="103" selection-end-line="103" />
<state relative-caret-position="5220">
<caret line="311" column="58" selection-start-line="311" selection-start-column="53" selection-end-line="311" selection-end-column="58" />
<folding>
<element signature="e#89#100#0" expanded="true" />
<element signature="e#2551#2626#0" />
<element signature="e#2929#5687#0" />
<element signature="e#5727#5760#0" />
<element signature="e#5950#6075#0" />
<element signature="e#6109#6351#0" />
<element signature="e#6410#6555#0" />
<element signature="e#6592#8303#0" />
<element signature="e#8470#9706#0" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/Monitor_risk_analysis/Monitor_VLM_riskanalysi.py">
<entry file="file://$PROJECT_DIR$/Monitor_risk_analysis/PSI&amp;VAL_riskanalysis.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-577">
<caret line="253" column="37" selection-start-line="253" selection-start-column="28" selection-end-line="253" selection-end-column="37" />
<state relative-caret-position="828">
<caret line="35" column="25" selection-start-line="35" selection-start-column="25" selection-end-line="35" selection-end-column="25" />
<folding>
<element signature="e#89#100#0" expanded="true" />
<element signature="e#898#1320#0" />
<element signature="e#1328#1660#0" />
<element signature="e#1767#2032#0" />
<element signature="e#2717#5475#0" />
<element signature="e#5515#5548#0" />
<element signature="e#5748#5873#0" />
<element signature="e#5907#6149#0" />
<element signature="e#6208#6368#0" />
<element signature="e#6405#8116#0" />
<element signature="e#8283#9519#0" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/test.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="576">
<caret line="16" selection-start-line="16" selection-end-line="16" />
</state>
</provider>
</entry>
</component>
</project>
\ No newline at end of file
......@@ -13,7 +13,7 @@ import matplotlib.pyplot as plt
import sklearn.metrics
# read mongodb mapping from excel
mapping_score = pd.read_excel("./query_score.xlsx",sheet_name='score_mongo').dropna(axis=0)
mapping_score = pd.read_excel("../query_score.xlsx",sheet_name='score_mongo').dropna(axis=0)
#mapping_variable = pd.read_excel("./mongodb.xlsx",sheet_name='variable').dropna(axis=0)
limit = "{'wf_created_at': {'$gte': '@start_date', '$lt': '@end_date'}}"
......
......@@ -53,12 +53,18 @@ risk_analysis_config = {'user': 'fengkong_read_only',
'port': 9030,
'database': 'risk_analysis',
'encoding': 'utf8'}
# risk_analysis_config = {'user': 'haoyue_shu',
# 'password': '0SMd1rNQ',
# 'host': '172.20.6.10',
# 'port': 9030,
# 'database': 'risk_analysis',
# 'encoding': 'utf8'}
#################################################################################
pwd = os.getcwd()
path = "E:\\Python\\su Project\\plot\\VLM\\"
path_alarm = "E:\\Python\\su Project\\plot\\VLM\\alarm\\"
path_sepatate = "E:\\Python\\su Project\\plot\\separateByChannel\\"
path = "../plot/VLM/"
path_alarm = "../plot/VLM/alarm/"
path_sepatate = "../plot/separateByChannel/"
now = time.strftime("%Y-%m-%d")
......@@ -140,7 +146,7 @@ def readExcel(path, sheet=None):
# conn = connect2DB()
dict_keylist = []
dict_vallist = []
dict_DD = readExcel("E:\\Python\\su Project\\features_DD.xlsx")
dict_DD = readExcel("../features_risk_analysis.xlsx")
modelList = [model for model in dict_DD.keys()]
......@@ -169,7 +175,7 @@ def query_sql(sql, db_config=risk_analysis_config):
df = pd.read_sql(sql, conn)
conn.close()
return df
except Exception as e:
except:
return 0
......
......@@ -17,28 +17,30 @@ from matplotlib.font_manager import FontProperties
from matplotlib.lines import Line2D
import datetime
import sklearn.metrics
from django.db import transaction, DatabaseError
# from django.db import transaction, DatabaseError
sql_bins = '''
SELECT @modelVar,transacted,IF(passdue_day>@passdueday,1,0) as overdue FROM risk_analysis
WHERE applied_at BETWEEN
(SELECT date_format(applied_at,'%Y-%m-%d')
FROM risk_analysis
WHERE !ISNULL(@modelVar) AND transacted=1 and applied_from IN (@channelID)
ORDER BY applied_at asc
LIMIT 1) AND DATE_ADD((SELECT date_format(applied_at,'%Y-%m-%d')
LIMIT 1)
AND DATE_ADD((SELECT date_format(applied_at,'%Y-%m-%d')
FROM risk_analysis
WHERE !ISNULL(@modelVar) AND transacted=1 and applied_from IN (@channelID)
ORDER BY applied_at asc
LIMIT 1),INTERVAL 30 DAY)
AND applied_from IN (@channelID)
AND applied_type IN (@appliedType)
AND !ISNULL(@modelVar)
AND @modelVar > 0
'''
sql_observation = '''
SELECT date_format(applied_at,'%Y-%m') as applied_at,@modelVar
FROM risk_analysis
......@@ -92,120 +94,126 @@ AND transacted = 1
passdue_day = 15
#AND applied_from IN (@channelID)
# AND applied_from IN (@channelID)
##################################### db config ###############################
risk_analysis_config = {'user' : 'fengkong_read_only',
'password' : 'mT2HFUgI',
'host' : '172.20.6.9',
'port' : 9030,
'database' : 'risk_analysis',
'encoding' : 'utf8'}
risk_analysis_config = {'user': 'fengkong_read_only',
'password': 'mT2HFUgI',
'host': '172.20.6.9',
'port': 9030,
'database': 'risk_analysis',
'encoding': 'utf8'}
#################################################################################
path = "../plot/PSI_VAL/"
mapping_path = "./query_score.xlsx"
mapping_path = "../query_score.xlsx"
mapping = pd.read_excel(mapping_path,sheet_name='score_risk_anlysis')
mapping = pd.read_excel(mapping_path, sheet_name='score_risk_anlysis')
modelType = mapping.description.tolist()
modelList = mapping.score.tolist()
appliedTypeList = mapping.appliedType.tolist()
channelIDList = mapping.channel.tolist()
#modelBound_dict = mapping[['feature','boundary']].set_index('feature').boundary.to_dict()
# modelBound_dict = mapping[['feature','boundary']].set_index('feature').boundary.to_dict()
del mapping
appliedType_type = {'1,2,3':'总体','1':'首申','2':'复申','3':'复贷'}
appliedType_type = {'1,2,3': '总体', '1': '首申', '2': '复申', '3': '复贷'}
passdueday = 15 # more than N days (fstOverdue N+)
passdueday = 15 #more than N days (fstOverdue N+)
def connect2DB(db_config):
db = pymysql.connect(
host = db_config['host'],
port = db_config['port'],
user = db_config['user'],
passwd = db_config['password'],
db = db_config['database'],
charset = db_config['encoding'])
host=db_config['host'],
port=db_config['port'],
user=db_config['user'],
passwd=db_config['password'],
db=db_config['database'],
charset=db_config['encoding'])
return db
def query_sql(sql,db_config=risk_analysis_config):
def query_sql(sql, db_config=risk_analysis_config):
try:
conn = connect2DB(db_config)
df = pd.read_sql(sql,conn)
df = pd.read_sql(sql, conn)
conn.close()
return df
except Exception as e:
return 0
################################### plot PSI ##################################
#+'\nmissing:'+str(missing[int(i/2)])+'%'
def plotPSI(title,y_list,dateList,psi,missing,rows,cols,table_value,save_path):
################################### plot PSI ##################################
# +'\nmissing:'+str(missing[int(i/2)])+'%'
def plotPSI(title, y_list, dateList, psi, missing, rows, cols, table_value, save_path):
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['savefig.dpi'] = 226 #图片像素
plt.rcParams['figure.dpi'] = 100 #分辨率
fig,axs = plt.subplots(1,1,figsize=(16,9),linewidth=0.1)
plt.rcParams['savefig.dpi'] = 226 # 图片像素
plt.rcParams['figure.dpi'] = 100 # 分辨率
fig, axs = plt.subplots(1, 1, figsize=(16, 9), linewidth=0.1)
for y_index in range(len(y_list)):
y = y_list[y_index]
x = range(len(y))
axs.plot(x,y,marker='o',label=dateList[y_index][0:7] + ' PSI:'+str(psi[y_index])+'\n缺失率:'+str(missing[y_index])+'%')
axs.plot(x, y, marker='o',
label=dateList[y_index][0:7] + ' PSI:' + str(psi[y_index]) + '\n缺失率:' + str(missing[y_index]) + '%')
the_table = plt.table(cellText=table_value,
rowLabels=rows,
colLabels=cols,
colWidths=[0.91 / (len(cols)-1)] * len(cols),
colWidths=[0.91 / (len(cols) - 1)] * len(cols),
loc='bottom')
the_table.auto_set_font_size(False)
the_table.set_fontsize(8)
fig.subplots_adjust(bottom=0.2)
plt.grid()
plt.ylabel('各分段样本占比'+' (%)')
plt.ylabel('各分段样本占比' + ' (%)')
plt.legend()
plt.xticks([])
#plt.vlines(xrange(len(cols))0],y,color='lightgrey',linestyle='--')
# plt.vlines(xrange(len(cols))0],y,color='lightgrey',linestyle='--')
fig.suptitle(title)
plt.savefig(save_path + title + ".png")
plt.show()
return 1
########################### validation liftchart###############################
def plotLiftChart(title,y_list,dateList,aucri,auc,rows,cols,table_value,save_path):
def plotLiftChart(title, y_list, dateList, aucri, auc, rows, cols, table_value, save_path):
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['savefig.dpi'] = 226 #图片像素
plt.rcParams['figure.dpi'] = 100 #分辨率
fig,axs = plt.subplots(1,1,figsize=(16,9),linewidth=0.1)
plt.rcParams['savefig.dpi'] = 226 # 图片像素
plt.rcParams['figure.dpi'] = 100 # 分辨率
fig, axs = plt.subplots(1, 1, figsize=(16, 9), linewidth=0.1)
for y_index in range(len(y_list)):
y = y_list[y_index]
x = range(len(y))
axs.plot(x,y,marker='o',label=dateList[y_index][0:7] + ' (AUCRI:' + str(aucri[y_index])+ ') AUC: ' + str(auc[y_index]))
axs.plot(x, y, marker='o',
label=dateList[y_index][0:7] + ' (AUCRI:' + str(aucri[y_index]) + ') AUC: ' + str(auc[y_index]))
the_table = plt.table(cellText=table_value,
rowLabels = rows,
colLabels = cols,
colWidths = [0.91 / (len(cols)-1)] * len(cols),
loc = 'bottom')
rowLabels=rows,
colLabels=cols,
colWidths=[0.91 / (len(cols) - 1)] * len(cols),
loc='bottom')
the_table.auto_set_font_size(False)
the_table.set_fontsize(8)
fig.subplots_adjust(bottom = 0.2)
fig.subplots_adjust(bottom=0.2)
plt.legend()
plt.grid()
plt.ylabel('贷后首逾'+str(passdueday)+'+ (%)')
plt.ylabel('贷后首逾' + str(passdueday) + '+ (%)')
plt.xticks([])
fig.suptitle(title)
plt.savefig(save_path + title + ".png")
plt.show()
return 1
###############################################################################
#def dataManipul(df,keyword,interval):
# def dataManipul(df,keyword,interval):
#
# # df count of all records
#
......@@ -231,66 +239,69 @@ def plotLiftChart(title,y_list,dateList,aucri,auc,rows,cols,table_value,save_pat
# cols = df_count.index
# return zero_rate,missing_rate,cols,df_sum
def dataManipul(df,keyword,interval):
def dataManipul(df, keyword, interval):
# df count of all records
# missing_rate = {}
# df_count = df[['applied_at','bins']].groupby('applied_at')
# missing_rate = {}
# df_count = df[['applied_at','bins']].groupby('applied_at')
# count dataframe separated by mon
# set negative as null
df.dropna(axis=0)[keyword] = df.dropna(axis=0)[keyword].map(lambda x : np.nan if x < 0 else x )
df_noneNA = df.dropna(axis = 0)
df.dropna(axis=0)[keyword] = df.dropna(axis=0)[keyword].map(lambda x: np.nan if x < 0 else x)
df_noneNA = df.dropna(axis=0)
df_count = df[['applied_at',keyword]].fillna(0).groupby('applied_at').count()
df_zeros = pd.Series(np.zeros(df_count[keyword].shape),index = df_count.index)
df_missing = df_count - df_noneNA[['applied_at',keyword]].groupby('applied_at').count()
df_missing = pd.concat([df_zeros,df_missing],axis=1)[keyword].fillna(0)
missing_rate = df_missing / df_count[keyword].replace(0,1) * 100
df_count = df[['applied_at', keyword]].fillna(0).groupby('applied_at').count()
df_zeros = pd.Series(np.zeros(df_count[keyword].shape), index=df_count.index)
df_missing = df_count - df_noneNA[['applied_at', keyword]].groupby('applied_at').count()
df_missing = pd.concat([df_zeros, df_missing], axis=1)[keyword].fillna(0)
missing_rate = df_missing / df_count[keyword].replace(0, 1) * 100
df_noneNA['bins'] = pd.cut(df_noneNA[keyword],interval,precision=6)
df_noneNA['bins'] = pd.cut(df_noneNA[keyword], interval, precision=6)
cols = df_noneNA['bins'].value_counts().sort_index().index.astype('str')
df_count = df_noneNA[['applied_at','bins',keyword]].groupby(['applied_at','bins']).count()
df_zeros = pd.Series(np.zeros(df_count[keyword].shape),index = df_count.index)
df_zero = df_noneNA[df_noneNA[keyword] == 0][['applied_at','bins',keyword]].groupby(['applied_at','bins']).count()
df_zero = pd.concat([df_zeros,df_zero],axis=1)[keyword].fillna(0)
zero_rate = df_zero / df_count[keyword].replace(0,1) * 100
y = df_count / df_noneNA[['applied_at',keyword]].groupby('applied_at').count() * 100
df_count = df_noneNA[['applied_at', 'bins', keyword]].groupby(['applied_at', 'bins']).count()
df_zeros = pd.Series(np.zeros(df_count[keyword].shape), index=df_count.index)
df_zero = df_noneNA[df_noneNA[keyword] == 0][['applied_at', 'bins', keyword]].groupby(
['applied_at', 'bins']).count()
df_zero = pd.concat([df_zeros, df_zero], axis=1)[keyword].fillna(0)
zero_rate = df_zero / df_count[keyword].replace(0, 1) * 100
y = df_count / df_noneNA[['applied_at', keyword]].groupby('applied_at').count() * 100
rows = y.index.levels[0].tolist()
return zero_rate.round(1),missing_rate.round(1),rows,cols,y[keyword].round(1),df_count[keyword]
return zero_rate.round(1), missing_rate.round(1), rows, cols, y[keyword].round(1), df_count[keyword]
def psi_bins(df,keyword,interval):
df.loc[:,'bins'] = pd.cut(df[keyword],interval,precision=6)
def psi_bins(df, keyword, interval):
df.loc[:, 'bins'] = pd.cut(df[keyword], interval, precision=6)
BM = df.groupby('bins').count()[keyword]
BM_count = BM / BM.values.sum() * 100
return BM_count
# draw liftchart
def liftchart(df,keyword,interval):
def liftchart(df, keyword, interval):
# split bins with scores
#nothing,interval = pd.qcut(df[df.loc[:,keyword]>0][keyword],10,retbins=True,duplicates='drop')
# nothing,interval = pd.qcut(df[df.loc[:,keyword]>0][keyword],10,retbins=True,duplicates='drop')
# delete 'nothing' var cause its useless
if len(df[df.loc[:,keyword]<0][keyword])>0:
if len(df[df.loc[:, keyword] < 0][keyword]) > 0:
bins_interval = interval.tolist()
bins_interval.append(-10000000)
bins_interval.sort()
else:
bins_interval = interval
df.loc[:,'bins'] = pd.cut(df[keyword],bins_interval,precision=6)
df.loc[:, 'bins'] = pd.cut(df[keyword], bins_interval, precision=6)
# count of sample
df_count = df[['applied_at','bins','overdue']].groupby(['applied_at','bins']).count()
df_zeros = pd.Series(np.zeros(df_count['overdue'].shape),index = df_count.index)
df_count = df[['applied_at', 'bins', 'overdue']].groupby(['applied_at', 'bins']).count()
df_zeros = pd.Series(np.zeros(df_count['overdue'].shape), index=df_count.index)
# overdue samples
df = df[df.overdue == 1]
#df.loc[:,'bins'] = pd.cut(df[keyword],interval)
df_overdue = df[['applied_at','bins','overdue']].groupby(['applied_at','bins']).count()
df_overdue = pd.concat([df_zeros,df_overdue],axis=1)['overdue'].fillna(0)
# df.loc[:,'bins'] = pd.cut(df[keyword],interval)
df_overdue = df[['applied_at', 'bins', 'overdue']].groupby(['applied_at', 'bins']).count()
df_overdue = pd.concat([df_zeros, df_overdue], axis=1)['overdue'].fillna(0)
y = df_overdue / df_count['overdue'].replace(0,1) * 100
y = df_overdue / df_count['overdue'].replace(0, 1) * 100
rows = y.index.levels[0].tolist()
cols = df['bins'].value_counts().sort_index().index.astype('str').tolist()
return df_count['overdue'],df_overdue,y.round(3),rows,cols
return df_count['overdue'], df_overdue, y.round(3), rows, cols
# extract channel list where except recalling channel
......@@ -303,42 +314,45 @@ AND loan_start_date < DATE_FORMAT(NOW(),'%Y-%m-01')
and applied_from not in (159481,159486,159528)
'''
channel = {'1,214,217,198':'内部','159507':'浅橙','159537':'360金融','333':'融360','159384,159483':'平安','159561':'51公积金API'}
channel = {'1,214,217,198': '内部', '159507': '浅橙', '159537': '360金融', '333': '融360', '159384,159483': '平安',
'159561': '51公积金API'}
channelId = query_sql(sql_channel).applied_from
l=''
l = ''
for i in channel.keys():
l = l + i+','
l = eval('['+l+']')
channel[str(channelId[channelId.map(lambda x : True if x not in l else False)].tolist()).strip('[').strip(']')] = '其他渠道'
l = l + i + ','
l = eval('[' + l + ']')
channel[str(channelId[channelId.map(lambda x: True if x not in l else False)].tolist()).strip('[').strip(']')] = '其他渠道'
channel[str(channelId.tolist()).strip('[').strip(']')] = '全部渠道'
# traverse each model & applied_type & channelbins_interval
for modelVar in modelList:
print('model: ',modelVar)
print('model: ', modelVar)
for appliedType in str(appliedTypeList[modelList.index(modelVar)]).split(';'):
# print('appliedType',appliedType)
# print('appliedTypeList[model_index]',appliedTypeList[modelList.index(modelVar)])
# print('appliedType',appliedType)
# print('appliedTypeList[model_index]',appliedTypeList[modelList.index(modelVar)])
for channelID in channel.keys():
try:
print('channelID:',channelID)
print('channelID:', channelID)
df_bins = query_sql(sql_bins.replace('@modelVar',modelVar).replace('@appliedType',appliedType).replace('@channelID',channelID).replace('@passdueday',str(passdueday))).dropna(axis=0)
df_bins = query_sql(
sql_bins.replace('@modelVar', modelVar).replace('@appliedType', appliedType).replace('@channelID',
channelID).replace(
'@passdueday', str(passdueday))).dropna(axis=0)
df_observation = query_sql(sql_observation.replace('@modelVar',modelVar).replace('@appliedType',appliedType).replace('@channelID',channelID))
df_observation = query_sql(
sql_observation.replace('@modelVar', modelVar).replace('@appliedType', appliedType).replace(
'@channelID', channelID))
df_observation.loc[:,modelVar] = df_observation.loc[:,modelVar].map(lambda x : np.nan if x < 0 else x)
#df_bins = df_bins.apply(lambda x :np.nan if x < 0 else x)
Nothing,interval = pd.qcut(df_bins.loc[:,modelVar],10,retbins=True,precision=6,duplicates='drop')
df_observation.loc[:, modelVar] = df_observation.loc[:, modelVar].map(lambda x: np.nan if x < 0 else x)
# df_bins = df_bins.apply(lambda x :np.nan if x < 0 else x)
Nothing, interval = pd.qcut(df_bins.loc[:, modelVar], 10, retbins=True, precision=6, duplicates='drop')
interval[0] = 0
del Nothing
BM_count = psi_bins(df_bins,modelVar,interval)
zero_rate,missing_rate,dateList,cols,y,count = dataManipul(df_observation,modelVar,np.array(interval).round(6))
#df_observation_with_bin = pd.cut(df_observation.dropna(axis=0)[modelVar],interval)
BM_count = psi_bins(df_bins, modelVar, interval)
zero_rate, missing_rate, dateList, cols, y, count = dataManipul(df_observation, modelVar,
np.array(interval).round(6))
# df_observation_with_bin = pd.cut(df_observation.dropna(axis=0)[modelVar],interval)
# del df_bins
del interval
value_tab = []
......@@ -348,28 +362,32 @@ for modelVar in modelList:
# plot line separated by mon
for mon in dateList:
y_list.append(y.loc[mon].values)
#value_tab.append(y.loc[mon].astype('str')+'%')
value_tab.append(count.loc[mon].astype('str')+'(zeroR:'+zero_rate.loc[mon].astype('str')+'%)')
#rows.append(str(mon)+' Value');
rows.append(str(mon)+' Count')
#(y-10).sum() / np.log10(y/10)
psi.append((((y.loc[mon]-BM_count) * np.log10(y.loc[mon]/BM_count)).sum()/100).round(3))
plotPSI(modelType[modelList.index(modelVar)]+'-'+appliedType_type[appliedType]+'-' + channel[channelID] + ' PSI',y_list,dateList,psi,missing_rate,rows,cols,value_tab,path)
# value_tab.append(y.loc[mon].astype('str')+'%')
value_tab.append(count.loc[mon].astype('str') + '(zeroR:' + zero_rate.loc[mon].astype('str') + '%)')
# rows.append(str(mon)+' Value');
rows.append(str(mon) + ' Count')
# (y-10).sum() / np.log10(y/10)
psi.append((((y.loc[mon] - BM_count) * np.log10(y.loc[mon] / BM_count)).sum() / 100).round(3))
plotPSI(modelType[modelList.index(modelVar)] + '-' + appliedType_type[appliedType] + '-' + channel[
channelID] + ' PSI', y_list, dateList, psi, missing_rate, rows, cols, value_tab, path)
except Exception as e:
print('psi exception',e)
print('psi exception', e)
try:
# Overdue dataframe
df_bins_auc = df_bins[df_bins.transacted == 1]
del df_bins
auc_BM = sklearn.metrics.roc_auc_score(df_bins_auc.overdue, df_bins_auc.loc[:,modelVar])
print('AUC_BM: ',auc_BM)
Nothing,interval = pd.qcut(df_bins_auc.loc[:,modelVar],10,retbins=True,precision=6,duplicates='drop')
auc_BM = sklearn.metrics.roc_auc_score(df_bins_auc.overdue, df_bins_auc.loc[:, modelVar])
print('AUC_BM: ', auc_BM)
Nothing, interval = pd.qcut(df_bins_auc.loc[:, modelVar], 10, retbins=True, precision=6,
duplicates='drop')
interval[0] = 0
del Nothing
df_passdueday = query_sql(sql_passdueday.replace('@modelVar',modelVar).replace('@appliedType',appliedType).replace('@channelID',channelID).replace('@passdue_day',str(passdueday)))
count,df_overdue,y,dateList,cols = liftchart(df_passdueday,modelVar,np.array(interval).round(6))
df_passdueday = query_sql(
sql_passdueday.replace('@modelVar', modelVar).replace('@appliedType', appliedType).replace(
'@channelID', channelID).replace('@passdue_day', str(passdueday)))
count, df_overdue, y, dateList, cols = liftchart(df_passdueday, modelVar, np.array(interval).round(6))
value_tab = []
rows = []
......@@ -378,18 +396,26 @@ for modelVar in modelList:
auc = []
for mon in dateList:
y_list.append(y.loc[mon].values)
#value_tab.append(y.loc[mon].astype('str')+'%')
value_tab.append(df_overdue.loc[mon].astype('str') + ' (总计 ' + count.loc[mon].astype('str') + ')' )
#rows.append(str(mon)+' OverdueRate');
rows.append(str(mon)+' Count')
# value_tab.append(y.loc[mon].astype('str')+'%')
value_tab.append(df_overdue.loc[mon].astype('str') + ' (总计 ' + count.loc[mon].astype('str') + ')')
# rows.append(str(mon)+' OverdueRate');
rows.append(str(mon) + ' Count')
df_passdueday = df_passdueday.dropna(axis=0)
aucri.append(round((sklearn.metrics.roc_auc_score(df_passdueday[df_passdueday.applied_at==mon].overdue, df_passdueday[df_passdueday.applied_at==mon].loc[:,modelVar])/auc_BM),3))
auc.append(round(sklearn.metrics.roc_auc_score(df_passdueday[df_passdueday.applied_at==mon].overdue, df_passdueday[df_passdueday.applied_at==mon].loc[:,modelVar]),3))
auc[-1] = str(auc[-1]) + '\n AUC基准: ' + str(round(auc_BM,3))
plotLiftChart(modelType[modelList.index(modelVar)] + '-' + appliedType_type[appliedType] + '-' + channel[channelID] + ' AUC WITH '+ str(passdueday) + '+',y_list,dateList,aucri,auc,rows,cols,value_tab,path)
aucri.append(round((sklearn.metrics.roc_auc_score(
df_passdueday[df_passdueday.applied_at == mon].overdue,
df_passdueday[df_passdueday.applied_at == mon].loc[:, modelVar]) / auc_BM), 3))
auc.append(round(
sklearn.metrics.roc_auc_score(df_passdueday[df_passdueday.applied_at == mon].overdue,
df_passdueday[df_passdueday.applied_at == mon].loc[:, modelVar]),
3))
auc[-1] = str(auc[-1]) + '\n AUC基准: ' + str(round(auc_BM, 3))
plotLiftChart(
modelType[modelList.index(modelVar)] + '-' + appliedType_type[appliedType] + '-' + channel[
channelID] + ' AUC WITH ' + str(passdueday) + '+', y_list, dateList, aucri, auc, rows, cols,
value_tab, path)
except Exception as e: # ZeroDivisionError
print('val exception',e)
print('val exception', e)
def plot_table_df(dataset, auc, title='untitled', X_label=None, y_label=None,
......@@ -409,14 +435,14 @@ def plot_table_df(dataset, auc, title='untitled', X_label=None, y_label=None,
fig, axs = plt.subplots(1, 1, figsize=(13, 9), linewidth=0.1)
table_rows = dataset.columns
table_cols = pd.Series(dataset.index).astype(str).map(lambda x : x.replace(' ','')).map(lambda x : x.replace('0.','.'))
table_cols = pd.Series(dataset.index).astype(str).map(lambda x: x.replace(' ', '')).map(
lambda x: x.replace('0.', '.'))
# traverse each columns of dataframe
for i in range(len(table_rows)):
x = range(len(table_cols))
y = dataset.iloc[:,i]
axs.plot(x, y, label = str(table_rows[i]) + ' AUC: ' + str(auc[i]))
y = dataset.iloc[:, i]
axs.plot(x, y, label=str(table_rows[i]) + ' AUC: ' + str(auc[i]))
# if table should be plot
if plot_tab:
......@@ -427,10 +453,10 @@ def plot_table_df(dataset, auc, title='untitled', X_label=None, y_label=None,
table_rows = tab_df.columns
table_cols = tab_df.index
tab_df = [list(tab_df.iloc[:, 1].values) for i in range(len(table_rows))]
the_table = plt.table(cellText = tab_df,
rowLabels = table_rows,
colLabels = table_cols,
colWidths = [0.91 / (len(table_cols) - 1)] * len(table_cols),
the_table = plt.table(cellText=tab_df,
rowLabels=table_rows,
colLabels=table_cols,
colWidths=[0.91 / (len(table_cols) - 1)] * len(table_cols),
loc='bottom')
plt.xticks([])
# otherwise, nothing to do here
......@@ -449,8 +475,3 @@ def plot_table_df(dataset, auc, title='untitled', X_label=None, y_label=None,
plt.savefig(saved_path + title + ".png")
plt.show()
return 1
No preview for this file type
class Solution:
def __init__(self):
pass
def find_max_length(self, array, k):
if not array:
return 0
sum_subarray = array[0]
left, right = 0, 1
max_length = 0
while right < len(array):
if sum_subarray == k:
max_length = max(max_length, right - left)
sum_subarray += array[right]
right += 1
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment