import pandas as pd
import numpy as np
import datetime
from tools import datacal

from mvp import refit
from mvp import rebuild


from models_obj import dhb_obj

###### global variable ######
# label
target = 'target'




#############################






dhb = dhb_obj.dhb()
df_sample = dhb.dhb_features_extract()

features = dhb.features
df_sample[features] = df_sample[features].astype(float)
df_sample['target'] = df_sample['target'].astype(int)
print('period of time: ',dhb.start_time_period,'-',dhb.end_time_period)
print('----no.',len(features),'of samples of dhb----')

# to save model performance

if __name__ == '__main__':
    # data extraction

    ''' ## Old Edition here
    # if total sample more than 30000, it would use train-validation-test
    # else use CV to parameters tuning

    # if len(df_sample) >= 30000:
    #     df_train,df_val,df_test = datacal.train_test_split_general(df_sample, val_size=0.25, test_size=0.25, stratify='target', random_state=7)
    # else:
    #     df_train,df_test = datacal.train_test_split_general(df_sample, val_size=None, test_size=0.25, stratify='target', random_state=7)
    '''
    # 默认取样本方法
    df_train, df_val, df_test = datacal.train_test_split_general()





    # model refit



    #xgboost
    xgb_model_auc = {'training_auc' : None, 'val_auc' : None, 'test_auc' : None}
    xgb_model_auc['training_auc'] = None
    xgb_model_auc['val_auc'] = None

    #xgbreport.report(df_train, df_test, df_val, features, target, '','dhb模型迭代报告.doc', kfold = 2)

    ## 待加入 ： xgb 各dataset的 auc, KA 渠道 / 客群 的 auc

    #ligthtgbm
    lgb_model_auc = {'training_auc' : None, 'val_auc' : None, 'test_auc' : None}
    lgb_model_auc['training_auc'] = None
    lgb_model_auc['val_auc'] = None

    #dftrain,dftest = datacal.split_train_val(df_sample,trainsplit = 'timeSeries',trainsplitRatio=0.8,sort_col='applied_at')
    #lgbreport.report(df_train, df_test, df_val, features, target,'','dhb模型迭代报告.doc', kfold = 2)

    # merge as single dataframe full of models
    #pd.DataFrame(xgb_model)






    # dhb = dhb.dhb(start_time_period='2019-01-19 11:00:00',end_time_period='2019-01-20 12:00:00')
    # df=dhb.dhb_features_extract()
    # print(df.columns.tolist())
    # print(df.target.unique())
    # label='target'
    # features=dhb.get_feature()
    # df[features]=df[features].astype(float)
    # df['target']=df['target'].astype(int)
    # print('----feature---',len(features))
    # df=pd.read_csv('test.csv')
    #== 模型名称
    model_name='dhb'
    #== 目标是15天
    passdue_day=15
    df_log=sample.get_last_record(model_name)
    if df_log.shape[0]==1:
        start_date,end_date=sample.cal_sample_date(df_log.max_date[0],passdue_day)
    else:
        start_date, end_date = sample.cal_sample_date(passdue_day=passdue_day)
    start_date='2019-01-01'
    end_date='2019-01-10'
    print(start_date,end_date)
    df_sample=dhb.query_sample(start_date,end_date)
    df_sample['applied_at'] = pd.to_datetime(df_sample['applied_at'])
    df_sample['label']=1
    df_sample.loc[df_sample.passdue_day >= passdue_day,'label']=0
    dftrain,dftest=datacal.split_train_val(df_sample,trainsplit='timeSeries',trainsplitRatio=0.8,sort_col='applied_at')
    # 记录样本信息
    # sample.save_model_record(model_name,min_date=df_sample.applied_at.min(),max_date=df_sample.applied_at.max(),sample_cnt=df_sample.shape[0],
    #                          train_min_date=dftrain.applied_at.min(),train_max_date=dftrain.applied_at.max(),train_cnt=dftrain.shape[0],
    #                          test_min_date=dftest.applied_at.min(),test_max_date=dftest.applied_at.max(),test_cnt=dftest.shape[0])
    #== xgboost gbtree
    xgbreport.report(dftrain,dftest,dhb.get_feature(),'label','','xgboost_%s.doc' % datetime.datetime.now().date().strftime('%y%m%d'),kfold=2)








#################################################### report settings #############################################################################

applied_from = {'1,214,217,198': '内部', '333': '融360', '159537': '360金融'}
applied_type = {'1,2':'首贷','1,2,3':'首付贷','1':'首申','2':'复申','3':'复贷'}

    # refit / rebuild sequence

# 生成电话帮对象(使用默认参数)
dhb = dhb_obj.dhb(features=None, sql=None, start_time_period=None, end_time_period=None,passdue_day=15)

# 提取样本
df_sample = dhb.dhb_features_extract()

# 备份df_sample
df_sample.to_csv(str(datetime.date.today())+"dhb_samples.xlsx")




# 电话帮数据处理
    # report sequence































