Commit dda80e02 authored by linfang.wang's avatar linfang.wang

dhb 待调试

parent a132f117
......@@ -103,14 +103,6 @@ def density_chart(dataset,title):
plt.title(title)
plt.show()
def learning_curve():
def pdp_chart():
return 1
def uniVarChart():
return 1
......
import pandas as pd
import mysqldb
from data.datasource import mysqldb,mongodb
import datetime
import dateutil
'''
model instructions : established a dhb obj which cotains attrubutes of dhb model
......@@ -176,24 +177,11 @@ class dhb():
'dhb_overview_ntdun_call_total_duration',
'dhb_overview_ntdun_call_total_times',
'dhb_overview_ntdun_first_call_time']
'''
instructions :
build a constructor of dhb
Params:
'''
# ime period set as default
start_time_period = (datetime.date.today() - relativedelta(months=+7)).strftime("%Y-%m-%d 00:00:00")
end_time_period = (datetime.date.today() - relativedelta(days=+16)).strftime("%Y-%m-%d 00:00:00")
def __init__(self,overdue_days=15,features=None,sql=None,start_time_period=None,end_time_period=None):
try:
if features != None:
self.features = features
self.features = features
if sql != None:
self.sql = sql
else:
......@@ -207,41 +195,46 @@ Params:
if start_time_period != None:
self.start_time_period = start_time_period
else:
self.start_time_period =(datetime.date.today() - dateutil.relativedelta(months=+7)).strftime("%Y-%m-%d 00:00:00")
if end_time_period != None:
self.end_time_period = end_time_period
else:
self.end_time_period = (datetime.date.today() - dateutil.relativedelta(days=+16)).strftime("%Y-%m-%d 00:00:00")
except Exception as e:
print('Parameters Error:\n',e)
'''
instrucions : extract dhb features from risk_analysis
Params : nothing yet
returns : dhb features
'''
def dhb_features_extract(self):
'''
instrucions : extract dhb features from risk_analysis
:param self:
:return: dhb features
'''
value_map = {
"近3天":1,
"近4-5天":2,
"近6-7天":3,
"近8-15天":4,
"近16-30天":5,
"近31-60天":6,
"近61-90天":7,
"近91-120天":8,
"近121-150天":9,
"近151-180天":10,
"180天前":11,
"无":0
}
#print(self.sql.replace('@start_time_period',self.start_time_period).replace('@end_time_period',self.end_time_period))
"近3天":1,
"近4-5天":2,
"近6-7天":3,
"近8-15天":4,
"近16-30天":5,
"近31-60天":6,
"近61-90天":7,
"近91-120天":8,
"近121-150天":9,
"近151-180天":10,
"180天前":11,
"无":0
}
#print(self.sql.replace('@start_time_period',self.start_time_period).replace('@end_time_period',self.end_time_period))
# use risk_analysis to extract data
dhb_loan = query_sql(self.sql.replace('@start_time_period',self.start_time_period).replace('@end_time_period',self.end_time_period))
# dhb_loan[["dhb_overview_dun_first_call_time", "dhb_overview_dun_last_call_time","dhb_overview_ntdun_first_call_time", "dhb_overview_ntdun_last_call_time"]] = dhb_loan[["dhb_overview_dun_first_call_time", "dhb_overview_dun_last_call_time","dhb_overview_ntdun_first_call_time", "dhb_overview_ntdun_last_call_time"]].applymap(lambda x : value_map[x])
sql=self.sql.replace('@start_time_period',self.start_time_period).replace('@end_time_period',self.end_time_period)
#
dhb_loan = pd.read_sql(sql,mysqldb.engine_risk_analysis)
# dhb_loan[["dhb_overview_dun_first_call_time", "dhb_overview_dun_last_call_time","dhb_overview_ntdun_first_call_time", "dhb_overview_ntdun_last_call_time"]] = dhb_loan[["dhb_overview_dun_first_call_time", "dhb_overview_dun_last_call_time","dhb_overview_ntdun_first_call_time", "dhb_overview_ntdun_last_call_time"]].applymap(lambda x : value_map[x])
# manipul category datatype which includes sequences
dhb_loan[["dhb_overview_dun_first_call_time", "dhb_overview_dun_last_call_time","dhb_overview_ntdun_first_call_time", "dhb_overview_ntdun_last_call_time"]] = pd.get_dummies(df[["dhb_overview_dun_first_call_time", "dhb_overview_dun_last_call_time","dhb_overview_ntdun_first_call_time", "dhb_overview_ntdun_last_call_time"]],columns=["dhb_overview_dun_first_call_time", "dhb_overview_dun_last_call_time","dhb_overview_ntdun_first_call_time", "dhb_overview_ntdun_last_call_time"])
dhb_loan[["dhb_overview_dun_first_call_time", "dhb_overview_dun_last_call_time","dhb_overview_ntdun_first_call_time", "dhb_overview_ntdun_last_call_time"]] = pd.get_dummies(dhb_loan[["dhb_overview_dun_first_call_time", "dhb_overview_dun_last_call_time","dhb_overview_ntdun_first_call_time", "dhb_overview_ntdun_last_call_time"]],columns=["dhb_overview_dun_first_call_time", "dhb_overview_dun_last_call_time","dhb_overview_ntdun_first_call_time", "dhb_overview_ntdun_last_call_time"])
# limit the upper boundary
dhb_loan.loc[dhb_loan.dhb_last_60_and_90_days_ntdun_call_avg_duration >= 42,"dhb_last_60_and_90_days_ntdun_call_avg_duration"] = 42
dhb_loan.loc[dhb_loan.dhb_overview_ntdun_call_duration_above60 >= 25,"dhb_overview_ntdun_call_duration_above60"] = 25
......@@ -255,15 +248,15 @@ Params:
dhb_loan.loc[dhb_loan.dhb_last_60_and_90_days_ntdun_call_in_duration>= 1000,"dhb_last_60_and_90_days_ntdun_call_in_duration"] = 1000
dhb_loan.loc[dhb_loan.dhb_overview_dun_call_tel_total_nums>= 22,"dhb_overview_dun_call_tel_total_nums"] = 22
dhb_loan.loc[dhb_loan.dhb_last_30_days_dun_call_total_duration>= 1100,"dhb_last_30_days_dun_call_total_duration"] = 1100
dhb_loan.loc[dhb_loan.dhb_last_two_weeks_ntdun_call_in_duration>= 300,"dhb_last_two_weeks_ntdun_call_in_duration"] = 300
dhb_loan.loc[dhb_loan.dhb_last_two_weeks_ntdun_call_in_duration>= 300,"dhb_last_two_weeks_ntdun_call_in_duration"] = 300
dhb_loan.to_csv("./dhb_loan_sample——"+str(datetime.date.today())+".csv")
print( time.strftime('%Y.%m.%d %H:%M:%S',time.localtime(time.time())) +"提取了dhb {}+ ".format(str(overdue_days)) + self.start_time_period + "to" + self.end_time_period + "时段样本")
print( datetime.time.strftime('%Y.%m.%d %H:%M:%S',datetime.time.localtime(datetime.time.time())) +"提取了dhb {}+ ".format(str(self.overdue_days)) + self.start_time_period + "to" + self.end_time_period + "时段样本")
return dhb_loan
def dhb_comparasion(df=None,start_time_period = self.start_time_period, end_time_period = self.end_time_period, applied_type = None,applied_from = None):
df_mongo = pymongodb(start_time_period, end_time_period, limit, "{'order_id':1,'model_exec_data_source#dhb':1}")
def dhb_comparasion(self,limit="{'wf_created_at': {'$gte': '@start_date', '$lt': '@end_date'}}",df=None,applied_type = None,applied_from = None):
df_mongo = mongodb.pymongodb(self.start_time_period, self.end_time_period, limit, "{'order_id':1,'model_exec_data_source#dhb':1}")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment