Commit a3823e67 authored by linfang.wang's avatar linfang.wang

update 之前的模型

parent fe8f7148
...@@ -9,7 +9,7 @@ def mysql_query(sql,engine_sql): ...@@ -9,7 +9,7 @@ def mysql_query(sql,engine_sql):
''' '''
res=[] res=[]
#== palo 每次查询不超过10000 #== palo 每次查询不超过10000
tmp=pd.read_sql(sql,engine_sql,chunksize=5000) tmp=pd.read_sql(sql,engine_sql,chunksize=5001)
for tt in tmp: for tt in tmp:
res.append(tt) res.append(tt)
return pd.concat(res) return pd.concat(res)
\ No newline at end of file
...@@ -37,7 +37,10 @@ def query_sample(start_date,end_date,is_loan=True): ...@@ -37,7 +37,10 @@ def query_sample(start_date,end_date,is_loan=True):
''' % (','.join(features),start_date,end_date) ''' % (','.join(features),start_date,end_date)
df=dbquery.mysql_query(sql,engine_risk_analysis) df=dbquery.mysql_query(sql,engine_risk_analysis)
yewu=query_byloanid(df.loan_id.tolist()) yewu=query_byloanid(df.loan_id.tolist())
df.loan_id=df.loan_id.astype(int)
yewu.loan_id=yewu.loan_id.astype(int)
df=pd.merge(df,yewu,on='loan_id',how='inner') df=pd.merge(df,yewu,on='loan_id',how='inner')
df.applied_at=pd.to_datetime(df.applied_at)
value_map = { value_map = {
"近3天": 1, "近3天": 1,
"近4-5天": 2, "近4-5天": 2,
......
...@@ -3,7 +3,8 @@ import numpy as np ...@@ -3,7 +3,8 @@ import numpy as np
import datetime import datetime
from mvp import xgbreport from mvp import xgbreport
from data.analyis import datacal from data.analyis import datacal
from mvp import dhb # from mvp import dhb
from data.samples import dhb,sample
if __name__ == '__main__': if __name__ == '__main__':
# features=[ # features=[
...@@ -47,16 +48,36 @@ if __name__ == '__main__': ...@@ -47,16 +48,36 @@ if __name__ == '__main__':
# 'third_data_source#xy_pan_newqueryAorgAcount', # 'third_data_source#xy_pan_newqueryAorgAcount',
# 'third_data_source#xy_pan_newqueryAsumAcount' # 'third_data_source#xy_pan_newqueryAsumAcount'
# ] # ]
dhb = dhb.dhb(start_time_period='2019-01-19 11:00:00',end_time_period='2019-01-20 12:00:00') # dhb = dhb.dhb(start_time_period='2019-01-19 11:00:00',end_time_period='2019-01-20 12:00:00')
df=dhb.dhb_features_extract() # df=dhb.dhb_features_extract()
print(df.columns.tolist()) # print(df.columns.tolist())
print(df.target.unique()) # print(df.target.unique())
label='target' # label='target'
features=dhb.get_feature() # features=dhb.get_feature()
df[features]=df[features].astype(float) # df[features]=df[features].astype(float)
df['target']=df['target'].astype(int) # df['target']=df['target'].astype(int)
print('----feature---',len(features)) # print('----feature---',len(features))
# df=pd.read_csv('test.csv') # df=pd.read_csv('test.csv')
dftrain,dftest=datacal.split_train_val(df,trainsplit='timeSeries',trainsplitRatio=0.8,sort_col='applied_at') #== 模型名称
model_name='dhb'
xgbreport.report(dftrain,dftest,features,label,'','tmp.doc',kfold=2) #== 目标是15天
passdue_day=15
df_log=sample.get_last_record(model_name)
if df_log.shape[0]==1:
start_date,end_date=sample.cal_sample_date(df_log.max_date[0],passdue_day)
else:
start_date, end_date = sample.cal_sample_date(passdue_day=passdue_day)
start_date='2019-01-01'
end_date='2019-01-10'
print(start_date,end_date)
df_sample=dhb.query_sample(start_date,end_date)
df_sample['applied_at'] = pd.to_datetime(df_sample['applied_at'])
df_sample['label']=1
df_sample.loc[df_sample.passdue_day >= passdue_day,'label']=0
dftrain,dftest=datacal.split_train_val(df_sample,trainsplit='timeSeries',trainsplitRatio=0.8,sort_col='applied_at')
# 记录样本信息
# sample.save_model_record(model_name,min_date=df_sample.applied_at.min(),max_date=df_sample.applied_at.max(),sample_cnt=df_sample.shape[0],
# train_min_date=dftrain.applied_at.min(),train_max_date=dftrain.applied_at.max(),train_cnt=dftrain.shape[0],
# test_min_date=dftest.applied_at.min(),test_max_date=dftest.applied_at.max(),test_cnt=dftest.shape[0])
#== xgboost gbtree
xgbreport.report(dftrain,dftest,dhb.get_feature(),'label','','xgboost_%s.doc' % datetime.datetime.now().date().strftime('%y%m%d'),kfold=2)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment