Commit 45721de0 authored by 王家华's avatar 王家华

debug

parent b5a3f366
import lightgbm as lgb
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix, mean_squared_error
import numpy
import pandas
......@@ -21,8 +22,9 @@ params = {
'verbose': 1 # <0 显示致命的, =0 显示错误 (警告), >0 显示信息
}
'''
'''
instructions : training lightgbm model with specified params
Parameters :
......@@ -33,5 +35,53 @@ Parameters :
'''
def lgb_train(params,training_set,features,target):
lgb_train = lgb.Dataset(training_set[features],training_set[target])
#lgb.train(params,)
return 1
'''
instructions : build a lgb classifier
Params :
'''
def buildClf(params):
return lgb.LGBMClassifier(params)
'''
'''
def automodelfit(clf,param_grid,dftrain,features,resp, kfold=10,scoring='roc_auc'):
# kflod=StratifiedKFold(n_splits=kfold,shuffle=True,random_state=7)
grid_search=GridSearchCV(clf,param_grid,scoring=scoring,n_jobs=2,cv=kfold,verbose=2,iid=True,refit=True)
#== 模型训练
grid_search.fit(dftrain[features],dftrain[resp])
#== 获取最优参数
return grid_search
def modelfit(clf, dftrain, features, resp,useTrainCV = True, kfold=10, eval_metric='auc',early_stopping_rounds=20):
'''
模型训练
:type useTrainCV: object
:param clf:XGBClassifier
:param dftrain:训练集
:param features: 特征
:param resp:label
:param useTrainCV:if True call cv function,目的是调节参数 n_estimators
:param cv_folds: N 折交叉验证
:param early_stopping_rounds:添加数loss变化不大这个状态持续的轮数,达到这个数就退出训练过程
:param eval_metric 同 目标函数 objective 有关,取值https://xgboost.readthedocs.io/en/latest/python/python_api.html#
:return:
'''
if useTrainCV:
# kflod = StratifiedKFold(n_splits=kfold, shuffle=True, random_state=7)
xgb_param = clf.get_xgb_params()
xgtrain = lgb.DMatrix(dftrain[features].values, label=dftrain[resp].values)
cvresult = lgb.cv(xgb_param, xgtrain, num_boost_round=clf.get_params()['n_estimators'], nfold=kfold,
metrics=eval_metric, early_stopping_rounds=early_stopping_rounds,verbose_eval=True)
clf.set_params(n_estimators=cvresult.shape[0])
clf.fit(dftrain[features], dftrain[resp],eval_metric=eval_metric)
return clf
import pandas as pd
from data.datasource import mysqldb,mongodb
import time
from dateutil.relativedelta import relativedelta
import datetime
import dateutil
'''
model instructions : established a dhb obj which cotains attrubutes of dhb model
......@@ -20,222 +22,223 @@ API :
class dhb:
# features as Series format
features = ['dhb_last_30_and_60_days_dun_call_avg_duration',
'dhb_last_30_and_60_days_dun_call_duration_above60',
'dhb_last_30_and_60_days_dun_call_duration_below15',
'dhb_last_30_and_60_days_dun_call_duration_between15_and_30',
'dhb_last_30_and_60_days_dun_call_in_duration',
'dhb_last_30_and_60_days_dun_call_in_times',
'dhb_last_30_and_60_days_dun_call_out_duration',
'dhb_last_30_and_60_days_dun_call_out_times',
'dhb_last_30_and_60_days_dun_call_tel_total_nums',
'dhb_last_30_and_60_days_dun_call_total_duration',
'dhb_last_30_and_60_days_dun_call_total_times',
'dhb_last_30_and_60_days_ntdun_call_avg_duration',
'dhb_last_30_and_60_days_ntdun_call_duration_above60',
'dhb_last_30_and_60_days_ntdun_call_duration_below15',
'dhb_last_30_and_60_days_ntdun_call_duration_between15_and_30',
'dhb_last_30_and_60_days_ntdun_call_in_duration',
'dhb_last_30_and_60_days_ntdun_call_in_times',
'dhb_last_30_and_60_days_ntdun_call_out_duration',
'dhb_last_30_and_60_days_ntdun_call_out_times',
'dhb_last_30_and_60_days_ntdun_call_tel_total_nums',
'dhb_last_30_and_60_days_ntdun_call_total_duration',
'dhb_last_30_and_60_days_ntdun_call_total_times',
'dhb_last_30_days_dun_call_avg_duration',
'dhb_last_30_days_dun_call_duration_above60',
'dhb_last_30_days_dun_call_duration_below15',
'dhb_last_30_days_dun_call_duration_between15_and_30',
'dhb_last_30_days_dun_call_in_duration',
'dhb_last_30_days_dun_call_in_times',
'dhb_last_30_days_dun_call_out_duration',
'dhb_last_30_days_dun_call_out_times',
'dhb_last_30_days_dun_call_tel_total_nums',
'dhb_last_30_days_dun_call_total_duration',
'dhb_last_30_days_dun_call_total_times',
'dhb_last_30_days_ntdun_call_avg_duration',
'dhb_last_30_days_ntdun_call_duration_above60',
'dhb_last_30_days_ntdun_call_duration_below15',
'dhb_last_30_days_ntdun_call_duration_between15_and_30',
'dhb_last_30_days_ntdun_call_in_duration',
'dhb_last_30_days_ntdun_call_in_times',
'dhb_last_30_days_ntdun_call_out_duration',
'dhb_last_30_days_ntdun_call_out_times',
'dhb_last_30_days_ntdun_call_tel_total_nums',
'dhb_last_30_days_ntdun_call_total_duration',
'dhb_last_30_days_ntdun_call_total_times',
'dhb_last_60_and_90_days_dun_call_avg_duration',
'dhb_last_60_and_90_days_dun_call_duration_above60',
'dhb_last_60_and_90_days_dun_call_duration_below15',
'dhb_last_60_and_90_days_dun_call_duration_between15_and_30',
'dhb_last_60_and_90_days_dun_call_in_duration',
'dhb_last_60_and_90_days_dun_call_in_times',
'dhb_last_60_and_90_days_dun_call_out_duration',
'dhb_last_60_and_90_days_dun_call_out_times',
'dhb_last_60_and_90_days_dun_call_tel_total_nums',
'dhb_last_60_and_90_days_dun_call_total_duration',
'dhb_last_60_and_90_days_dun_call_total_times',
'dhb_last_60_and_90_days_ntdun_call_avg_duration',
'dhb_last_60_and_90_days_ntdun_call_duration_above60',
'dhb_last_60_and_90_days_ntdun_call_duration_below15',
'dhb_last_60_and_90_days_ntdun_call_duration_between15_and_30',
'dhb_last_60_and_90_days_ntdun_call_in_duration',
'dhb_last_60_and_90_days_ntdun_call_in_times',
'dhb_last_60_and_90_days_ntdun_call_out_duration',
'dhb_last_60_and_90_days_ntdun_call_out_times',
'dhb_last_60_and_90_days_ntdun_call_tel_total_nums',
'dhb_last_60_and_90_days_ntdun_call_total_duration',
'dhb_last_60_and_90_days_ntdun_call_total_times',
'dhb_last_three_weeks_dun_call_avg_duration',
'dhb_last_three_weeks_dun_call_duration_above60',
'dhb_last_three_weeks_dun_call_duration_below15',
'dhb_last_three_weeks_dun_call_duration_between15_and_30',
'dhb_last_three_weeks_dun_call_in_duration',
'dhb_last_three_weeks_dun_call_in_times',
'dhb_last_three_weeks_dun_call_out_duration',
'dhb_last_three_weeks_dun_call_out_times',
'dhb_last_three_weeks_dun_call_tel_total_nums',
'dhb_last_three_weeks_dun_call_total_duration',
'dhb_last_three_weeks_dun_call_total_times',
'dhb_last_three_weeks_ntdun_call_avg_duration',
'dhb_last_three_weeks_ntdun_call_duration_above60',
'dhb_last_three_weeks_ntdun_call_duration_below15',
'dhb_last_three_weeks_ntdun_call_duration_between15_and_30',
'dhb_last_three_weeks_ntdun_call_in_duration',
'dhb_last_three_weeks_ntdun_call_in_times',
'dhb_last_three_weeks_ntdun_call_out_duration',
'dhb_last_three_weeks_ntdun_call_out_times',
'dhb_last_three_weeks_ntdun_call_tel_total_nums',
'dhb_last_three_weeks_ntdun_call_total_duration',
'dhb_last_three_weeks_ntdun_call_total_times',
'dhb_last_two_weeks_dun_call_avg_duration',
'dhb_last_two_weeks_dun_call_duration_above60',
'dhb_last_two_weeks_dun_call_duration_below15',
'dhb_last_two_weeks_dun_call_duration_between15_and_30',
'dhb_last_two_weeks_dun_call_in_duration',
'dhb_last_two_weeks_dun_call_in_times',
'dhb_last_two_weeks_dun_call_out_duration',
'dhb_last_two_weeks_dun_call_out_times',
'dhb_last_two_weeks_dun_call_tel_total_nums',
'dhb_last_two_weeks_dun_call_total_duration',
'dhb_last_two_weeks_dun_call_total_times',
'dhb_last_two_weeks_ntdun_call_avg_duration',
'dhb_last_two_weeks_ntdun_call_duration_above60',
'dhb_last_two_weeks_ntdun_call_duration_below15',
'dhb_last_two_weeks_ntdun_call_duration_between15_and_30',
'dhb_last_two_weeks_ntdun_call_in_duration',
'dhb_last_two_weeks_ntdun_call_in_times',
'dhb_last_two_weeks_ntdun_call_out_duration',
'dhb_last_two_weeks_ntdun_call_out_times',
'dhb_last_two_weeks_ntdun_call_tel_total_nums',
'dhb_last_two_weeks_ntdun_call_total_duration',
'dhb_last_two_weeks_ntdun_call_total_times',
'dhb_last_week_dun_call_avg_duration',
'dhb_last_week_dun_call_duration_above60',
'dhb_last_week_dun_call_duration_below15',
'dhb_last_week_dun_call_duration_between15_and_30',
'dhb_last_week_dun_call_in_duration',
'dhb_last_week_dun_call_in_times',
'dhb_last_week_dun_call_out_duration',
'dhb_last_week_dun_call_out_times',
'dhb_last_week_dun_call_tel_total_nums',
'dhb_last_week_dun_call_total_duration',
'dhb_last_week_dun_call_total_times',
'dhb_last_week_ntdun_call_avg_duration',
'dhb_last_week_ntdun_call_duration_above60',
'dhb_last_week_ntdun_call_duration_below15',
'dhb_last_week_ntdun_call_duration_between15_and_30',
'dhb_last_week_ntdun_call_in_duration',
'dhb_last_week_ntdun_call_in_times',
'dhb_last_week_ntdun_call_out_duration',
'dhb_last_week_ntdun_call_out_times',
'dhb_last_week_ntdun_call_tel_total_nums',
'dhb_last_week_ntdun_call_total_duration',
'dhb_last_week_ntdun_call_total_times',
'dhb_overview_dun_call_avg_duration',
'dhb_overview_dun_call_duration_above60',
'dhb_overview_dun_call_duration_below15',
'dhb_overview_dun_call_duration_between15_and_30',
'dhb_overview_dun_call_in_duration',
'dhb_overview_dun_call_in_times',
'dhb_overview_dun_call_out_duration',
'dhb_overview_dun_call_out_times',
'dhb_overview_dun_call_tel_total_nums',
'dhb_overview_dun_call_total_duration',
'dhb_overview_dun_call_total_times',
'dhb_overview_dun_first_call_time',
'dhb_overview_dun_last_call_time',
'dhb_overview_ntdun_call_avg_duration',
'dhb_overview_ntdun_call_duration_above60',
'dhb_overview_ntdun_call_duration_below15',
'dhb_overview_ntdun_call_duration_between15_and_30',
'dhb_overview_ntdun_call_in_duration',
'dhb_overview_ntdun_call_in_times',
'dhb_overview_ntdun_call_out_duration',
'dhb_overview_ntdun_call_out_times',
'dhb_overview_ntdun_call_tel_total_nums',
'dhb_overview_ntdun_call_total_duration',
'dhb_overview_ntdun_call_total_times',
'dhb_overview_ntdun_first_call_time']
#features = pd.read_excel()
sql = '''
select dhb_last_30_and_60_days_dun_call_avg_duration,
dhb_last_30_and_60_days_dun_call_duration_above60,
dhb_last_30_and_60_days_dun_call_duration_below15,
dhb_last_30_and_60_days_dun_call_duration_between15_and_30,
dhb_last_30_and_60_days_dun_call_in_duration,
dhb_last_30_and_60_days_dun_call_in_times,
dhb_last_30_and_60_days_dun_call_out_duration,
dhb_last_30_and_60_days_dun_call_out_times,
dhb_last_30_and_60_days_dun_call_tel_total_nums,
dhb_last_30_and_60_days_dun_call_total_duration,
dhb_last_30_and_60_days_dun_call_total_times,
dhb_last_30_and_60_days_ntdun_call_avg_duration,
dhb_last_30_and_60_days_ntdun_call_duration_above60,
dhb_last_30_and_60_days_ntdun_call_duration_below15,
dhb_last_30_and_60_days_ntdun_call_duration_between15_and_30,
dhb_last_30_and_60_days_ntdun_call_in_duration,
dhb_last_30_and_60_days_ntdun_call_in_times,
dhb_last_30_and_60_days_ntdun_call_out_duration,
dhb_last_30_and_60_days_ntdun_call_out_times,
dhb_last_30_and_60_days_ntdun_call_tel_total_nums,
dhb_last_30_and_60_days_ntdun_call_total_duration,
dhb_last_30_and_60_days_ntdun_call_total_times,
dhb_last_30_days_dun_call_avg_duration,
dhb_last_30_days_dun_call_duration_above60,
dhb_last_30_days_dun_call_duration_below15,
dhb_last_30_days_dun_call_duration_between15_and_30,
dhb_last_30_days_dun_call_in_duration,
dhb_last_30_days_dun_call_in_times,
dhb_last_30_days_dun_call_out_duration,
dhb_last_30_days_dun_call_out_times,
dhb_last_30_days_dun_call_tel_total_nums,
dhb_last_30_days_dun_call_total_duration,
dhb_last_30_days_dun_call_total_times,
dhb_last_30_days_ntdun_call_avg_duration,
dhb_last_30_days_ntdun_call_duration_above60,
dhb_last_30_days_ntdun_call_duration_below15,
dhb_last_30_days_ntdun_call_duration_between15_and_30,
dhb_last_30_days_ntdun_call_in_duration,
dhb_last_30_days_ntdun_call_in_times,
dhb_last_30_days_ntdun_call_out_duration,
dhb_last_30_days_ntdun_call_out_times,
dhb_last_30_days_ntdun_call_tel_total_nums,
dhb_last_30_days_ntdun_call_total_duration,
dhb_last_30_days_ntdun_call_total_times,
dhb_last_60_and_90_days_dun_call_avg_duration,
dhb_last_60_and_90_days_dun_call_duration_above60,
dhb_last_60_and_90_days_dun_call_duration_below15,
dhb_last_60_and_90_days_dun_call_duration_between15_and_30,
dhb_last_60_and_90_days_dun_call_in_duration,
dhb_last_60_and_90_days_dun_call_in_times,
dhb_last_60_and_90_days_dun_call_out_duration,
dhb_last_60_and_90_days_dun_call_out_times,
dhb_last_60_and_90_days_dun_call_tel_total_nums,
dhb_last_60_and_90_days_dun_call_total_duration,
dhb_last_60_and_90_days_dun_call_total_times,
dhb_last_60_and_90_days_ntdun_call_avg_duration,
dhb_last_60_and_90_days_ntdun_call_duration_above60,
dhb_last_60_and_90_days_ntdun_call_duration_below15,
dhb_last_60_and_90_days_ntdun_call_duration_between15_and_30,
dhb_last_60_and_90_days_ntdun_call_in_duration,
dhb_last_60_and_90_days_ntdun_call_in_times,
dhb_last_60_and_90_days_ntdun_call_out_duration,
dhb_last_60_and_90_days_ntdun_call_out_times,
dhb_last_60_and_90_days_ntdun_call_tel_total_nums,
dhb_last_60_and_90_days_ntdun_call_total_duration,
dhb_last_60_and_90_days_ntdun_call_total_times,
dhb_last_three_weeks_dun_call_avg_duration,
dhb_last_three_weeks_dun_call_duration_above60,
dhb_last_three_weeks_dun_call_duration_below15,
dhb_last_three_weeks_dun_call_duration_between15_and_30,
dhb_last_three_weeks_dun_call_in_duration,
dhb_last_three_weeks_dun_call_in_times,
dhb_last_three_weeks_dun_call_out_duration,
dhb_last_three_weeks_dun_call_out_times,
dhb_last_three_weeks_dun_call_tel_total_nums,
dhb_last_three_weeks_dun_call_total_duration,
dhb_last_three_weeks_dun_call_total_times,
dhb_last_three_weeks_ntdun_call_avg_duration,
dhb_last_three_weeks_ntdun_call_duration_above60,
dhb_last_three_weeks_ntdun_call_duration_below15,
dhb_last_three_weeks_ntdun_call_duration_between15_and_30,
dhb_last_three_weeks_ntdun_call_in_duration,
dhb_last_three_weeks_ntdun_call_in_times,
dhb_last_three_weeks_ntdun_call_out_duration,
dhb_last_three_weeks_ntdun_call_out_times,
dhb_last_three_weeks_ntdun_call_tel_total_nums,
dhb_last_three_weeks_ntdun_call_total_duration,
dhb_last_three_weeks_ntdun_call_total_times,
dhb_last_two_weeks_dun_call_avg_duration,
dhb_last_two_weeks_dun_call_duration_above60,
dhb_last_two_weeks_dun_call_duration_below15,
dhb_last_two_weeks_dun_call_duration_between15_and_30,
dhb_last_two_weeks_dun_call_in_duration,
dhb_last_two_weeks_dun_call_in_times,
dhb_last_two_weeks_dun_call_out_duration,
dhb_last_two_weeks_dun_call_out_times,
dhb_last_two_weeks_dun_call_tel_total_nums,
dhb_last_two_weeks_dun_call_total_duration,
dhb_last_two_weeks_dun_call_total_times,
dhb_last_two_weeks_ntdun_call_avg_duration,
dhb_last_two_weeks_ntdun_call_duration_above60,
dhb_last_two_weeks_ntdun_call_duration_below15,
dhb_last_two_weeks_ntdun_call_duration_between15_and_30,
dhb_last_two_weeks_ntdun_call_in_duration,
dhb_last_two_weeks_ntdun_call_in_times,
dhb_last_two_weeks_ntdun_call_out_duration,
dhb_last_two_weeks_ntdun_call_out_times,
dhb_last_two_weeks_ntdun_call_tel_total_nums,
dhb_last_two_weeks_ntdun_call_total_duration,
dhb_last_two_weeks_ntdun_call_total_times,
dhb_last_week_dun_call_avg_duration,
dhb_last_week_dun_call_duration_above60,
dhb_last_week_dun_call_duration_below15,
dhb_last_week_dun_call_duration_between15_and_30,
dhb_last_week_dun_call_in_duration, dhb_last_week_dun_call_in_times,
dhb_last_week_dun_call_out_duration,
dhb_last_week_dun_call_out_times,
dhb_last_week_dun_call_tel_total_nums,
dhb_last_week_dun_call_total_duration,
dhb_last_week_dun_call_total_times,
dhb_last_week_ntdun_call_avg_duration,
dhb_last_week_ntdun_call_duration_above60,
dhb_last_week_ntdun_call_duration_below15,
dhb_last_week_ntdun_call_duration_between15_and_30,
dhb_last_week_ntdun_call_in_duration,
dhb_last_week_ntdun_call_in_times,
dhb_last_week_ntdun_call_out_duration,
dhb_last_week_ntdun_call_out_times,
dhb_last_week_ntdun_call_tel_total_nums,
dhb_last_week_ntdun_call_total_duration,
dhb_last_week_ntdun_call_total_times,
dhb_overview_dun_call_avg_duration,
dhb_overview_dun_call_duration_above60,
dhb_overview_dun_call_duration_below15,
dhb_overview_dun_call_duration_between15_and_30,
dhb_overview_dun_call_in_duration, dhb_overview_dun_call_in_times,
dhb_overview_dun_call_out_duration, dhb_overview_dun_call_out_times,
dhb_overview_dun_call_tel_total_nums,
dhb_overview_dun_call_total_duration,
dhb_overview_dun_call_total_times, dhb_overview_dun_first_call_time,
dhb_overview_dun_last_call_time,
dhb_overview_ntdun_call_avg_duration,
dhb_overview_ntdun_call_duration_above60,
dhb_overview_ntdun_call_duration_below15,
dhb_overview_ntdun_call_duration_between15_and_30,
dhb_overview_ntdun_call_in_duration,
dhb_overview_ntdun_call_in_times,
dhb_overview_ntdun_call_out_duration,
dhb_overview_ntdun_call_out_times,
dhb_overview_ntdun_call_tel_total_nums,
dhb_overview_ntdun_call_total_duration,
dhb_overview_ntdun_call_total_times,
dhb_overview_ntdun_first_call_time,
dhb_overview_ntdun_last_call_time,applied_at,applied_from,applied_type,if(passdue_day>15,1,0) as target
from risk_analysis
where applied_at >= '@start_time_period' and applied_at < '@end_time_period'
and transacted = 1
and dhb_flag =1
and datediff(now(),deadline) > 15
'''
def __init__(self,overdue_days=15,features=None,sql=None,start_time_period=None,end_time_period=None):
# sql = '''
#
# '''
start_time_period = (datetime.date.today() - relativedelta(months=+7)).strftime("%Y-%m-%d 00:00:00")
end_time_period = (datetime.date.today() - relativedelta(days=+17)).strftime("%Y-%m-%d 00:00:00")
def __init__(self,features=None,sql=None,start_time_period=None,end_time_period=None):
try:
if features != None:
self.features = features
if sql != None:
self.sql = sql
else:
sql = "select "+str(features).strip('[').strip(']')+''',if(passdue_day>'''+str(overdue_days)+''',1,0) as target, applied_at, applied_from, applied_type
from risk_analysis
where applied_at >= '@start_time_period' and applied_at < '@end_time_period'
and transacted = 1
and dhb_flag =1
and datediff(now(),deadline) > '''+str(overdue_days)+'''
'''
if start_time_period != None:
self.start_time_period = start_time_period
# if the para was not Series
if(type(features) != pd.core.series.Series):
self.features =pd.Series(features)
else:
self.start_time_period =(datetime.date.today() - dateutil.relativedelta(months=+7)).strftime("%Y-%m-%d 00:00:00")
if end_time_period != None:
self.end_time_period = end_time_period
else:
self.end_time_period = (datetime.date.today() - dateutil.relativedelta(days=+16)).strftime("%Y-%m-%d 00:00:00")
self.features = features
except Exception as e:
print('Parameters Error:\n',e)
print("'features' parameter type Error, it should be list or Series")
raise
if sql != None:
self.sql = sql
if start_time_period != None:
self.start_time_period = start_time_period
if end_time_period != None:
self.end_time_period = end_time_period
def dhb_features_extract(self):
'''
instrucions : extract dhb features from risk_analysis
:param self:
:return: dhb features
'''
value_map = {
"近3天":1,
"近4-5天":2,
"近6-7天":3,
"近8-15天":4,
"近16-30天":5,
"近31-60天":6,
"近61-90天":7,
"近91-120天":8,
"近121-150天":9,
"近151-180天":10,
"180天前":11,
"无":0
"近3天":1,
"近4-5天":2,
"近6-7天":3,
"近8-15天":4,
"近16-30天":5,
"近31-60天":6,
"近61-90天":7,
"近91-120天":8,
"近121-150天":9,
"近151-180天":10,
"180天前":11,
"无":0
}
#print(self.sql.replace('@start_time_period',self.start_time_period).replace('@end_time_period',self.end_time_period))
# use risk_analysis to extract data
sql=self.sql.replace('@start_time_period',self.start_time_period).replace('@end_time_period',self.end_time_period)
#
dhb_loan = pd.read_sql(sql,mysqldb.engine_risk_analysis)
# dhb_loan[["dhb_overview_dun_first_call_time", "dhb_overview_dun_last_call_time","dhb_overview_ntdun_first_call_time", "dhb_overview_ntdun_last_call_time"]] = dhb_loan[["dhb_overview_dun_first_call_time", "dhb_overview_dun_last_call_time","dhb_overview_ntdun_first_call_time", "dhb_overview_ntdun_last_call_time"]].applymap(lambda x : value_map[x])
# manipul category datatype which includes sequences
dhb_loan[["dhb_overview_dun_first_call_time", "dhb_overview_dun_last_call_time","dhb_overview_ntdun_first_call_time", "dhb_overview_ntdun_last_call_time"]] = pd.get_dummies(dhb_loan[["dhb_overview_dun_first_call_time", "dhb_overview_dun_last_call_time","dhb_overview_ntdun_first_call_time", "dhb_overview_ntdun_last_call_time"]],columns=["dhb_overview_dun_first_call_time", "dhb_overview_dun_last_call_time","dhb_overview_ntdun_first_call_time", "dhb_overview_ntdun_last_call_time"])
# limit the upper boundary
dhb_loan = pd.read_sql(self.sql.replace('@start_time_period',self.start_time_period).replace('@end_time_period',self.end_time_period),mysqldb.engine_risk_analysis)
dhb_loan[["dhb_overview_dun_first_call_time", "dhb_overview_dun_last_call_time","dhb_overview_ntdun_first_call_time", "dhb_overview_ntdun_last_call_time"]] = dhb_loan[["dhb_overview_dun_first_call_time", "dhb_overview_dun_last_call_time","dhb_overview_ntdun_first_call_time", "dhb_overview_ntdun_last_call_time"]].applymap(lambda x : value_map[x])
dhb_loan.loc[dhb_loan.dhb_last_60_and_90_days_ntdun_call_avg_duration >= 42,"dhb_last_60_and_90_days_ntdun_call_avg_duration"] = 42
dhb_loan.loc[dhb_loan.dhb_overview_ntdun_call_duration_above60 >= 25,"dhb_overview_ntdun_call_duration_above60"] = 25
dhb_loan.loc[dhb_loan.dhb_last_30_and_60_days_ntdun_call_total_duration>= 800,"dhb_last_30_and_60_days_ntdun_call_total_duration"] = 800
......@@ -249,14 +252,41 @@ class dhb:
dhb_loan.loc[dhb_loan.dhb_overview_dun_call_tel_total_nums>= 22,"dhb_overview_dun_call_tel_total_nums"] = 22
dhb_loan.loc[dhb_loan.dhb_last_30_days_dun_call_total_duration>= 1100,"dhb_last_30_days_dun_call_total_duration"] = 1100
dhb_loan.loc[dhb_loan.dhb_last_two_weeks_ntdun_call_in_duration>= 300,"dhb_last_two_weeks_ntdun_call_in_duration"] = 300
# dhb_loan.to_csv("./dhb_loan_sample——"+str(datetime.date.today())+".csv")
print( datetime.time.strftime('%Y.%m.%d %H:%M:%S',datetime.time.localtime(datetime.time.time())) +"提取了dhb {}+ ".format(str(self.overdue_days)) + self.start_time_period + "to" + self.end_time_period + "时段样本")
dhb_loan.to_csv("./dhb_loan_sample——"+str(datetime.date.today())+".csv")
print( time.strftime('%Y.%m.%d %H:%M:%S',time.localtime(time.time())) +"提取了dhb "+ self.start_time_period + "to" + self.end_time_period + "时段样本")
return dhb_loan
def dhb_comparasion(self,limit="{'wf_created_at': {'$gte': '@start_date', '$lt': '@end_date'}}",df=None,applied_type = None,applied_from = None):
df_mongo = mongodb.pymongodb(self.start_time_period, self.end_time_period, limit, "{'order_id':1,'model_exec_data_source#dhb':1}")
'''
instructions : build a comparasion
Params :
df - test dataset which was given
score - score column
target - label
start_time_period -
end_time_period -
applied_tpye -
applied_from -
Returns :
auc comparasion
liftchart plot
'''
# def dhb_comparasion(df,score = 'model_exec_data_source#dhb' ,target = 'target', start_time_period = self.start_time_period, end_time_period = self.end_time_period, applied_type = None, applied_from = None):
# df_mongo = mongodb.pymongodb(start_time_period, end_time_period, limit, "{'order_id':1,'model_exec_data_source#dhb':1}")
# df = pd.merge(df,df_mongo,how='left',left_on='order_no',right_on='order_id')
# df['bins'] = df.qcut(df['target'], q = 10, percision = 6, dupulicates='drop')
# df.groupby
# return 1
......@@ -6,7 +6,6 @@ from data.analyis import datacal
from models import xgboost
from matplotlib import pyplot as plt
from data.graph import drawplot
import dhb
from mvp import dhb
from data.datasource import mysqldb,mongodb
dhb = dhb()
df_dhb = dhb.dhb_features_extract()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment