from models_obj import dhb_obj
from tools import datacal
import datetime
from models_kit import lightgbm
from models_kit import xgboost
import lightgbm as lgb
from graph import matplot
from tools import filetool


dhb = dhb_obj.dhb(features=None, sql=None, start_time_period=None, end_time_period=None,passdue_day=15)

# 提取样本
#df_sample = dhb.dhb_features_extract()


######### temp #############
import pandas as pd
df_sample = pd.read_csv('E:\\model\\model_mvp\\mvp\\dhb_loan_sample——2019-04-23.csv',engine='python')

############################


# 备份df_sample
df_sample.to_csv(str(datetime.date.today())+"dhb_samples.xlsx")


# 默认样本划分
df_train, df_val, df_test = datacal.train_test_split_general(df_sample, val_size=0.2, test_size=0.2, stratify='target',
                                                             random_state=7,split_methods='random',
                                                             time_label='applied_at')
del df_sample
# 用交叉验证获取最优参optimal_para和对应参数在CV验证集上最优AUC列表topn
optimal_para,topn = lightgbm.lgb_params_tuning(lightgbm.params_lgb, dhb.features, df_train, df_val, target='target',
                                               topN=3, cv_fold=5)
print('topn 通过train交叉验证得到的auc ',topn)
# 用新参数(optimal_para)训练模型,adds_on是需要修改的参数字典,输出feature Importance
train_auc, val_auc, lgbm = lightgbm.train_lgbm(lightgbm.params_lgb, df_train, df_val, dhb.features,
                                               adds_on=optimal_para, target='target')

predictions ,test_auc = lightgbm.predict(lgbm,df_test,features=dhb.features)
df_test['predict'] = predictions




####### allocator cache ############
applied_from = {'1,214,217,198': '内部', '333': '融360', '159537': '360金融'}
applied_type = {'1,2':'首贷','1,2,3':'首付贷','1':'首申','2':'复申','3':'复贷'}
####################################


### report

# plot feature importance
path = matplot.topN_feature_importance(lgb, lgbm, title="untitled", save_path='./plots/', topN=20)
# report file
report_path = "E:\\bla\\"
report_name = "lgb_report.docx"

document = filetool.buildDocument(report_path, report_name)

document.add_heading('lightGBM 算法refit报告')

filetool.Document.add_paragraph('特征权重图')

filetool.add_picture(path)

filetool.Document.add_paragraph('univar_chart')

for i in dhb.features:
    univar = datacal.cal_univar(df_train,score='raw_score')
    univarChart = matplot.plot_table(univar,title= i +' univar Chart',saved_path='./plots/cache')
    filetool.add_picture("./plots/cache" + i +' univar Chart')

for i in dhb.features:
    pdp = datacal.cal_pdp(df_test,score='predict')
    pdpChart = matplot.plot_table(pdp,title= i +' PDP Chart',saved_path='./plots/cache')
    filetool.add_picture("./plots/cache" + i + ' PDP Chart')

for i in dhb.features:
    lift = datacal.cal_liftchart(df_test,score='predict')
    liftChart = matplot.plot_table(lift, title=i +' lift Chart',saved_path='./plots/cache')
    filetool.add_picture("./plots/cache" + i + ' lift Chart')

filetool.saveDocument(document, report_path, report_name)














