import pandas as pd
import numpy as np
import datetime
from data.analyis import filetool
from data.analyis import datacal
from models_kit import lightgbm
from matplotlib import pyplot as plt
from graph import drawplot


def report(dftrain,dftest,features,label,path,filename,kfold=10):
    '''
    dftrain,dftest 中必然有 字段 applied_at,applied_channel,applied_type
    :param dftrain:
    :param dftest:
    :param features:
    :param label:
    :param path:
    :param filename:
    :return:
    '''
    document=filetool.buildDocument(path,filename)
    document.add_heading('xgboost 算法运行报告')
    clf=lightgbm.buildClf()
    document.add_paragraph('初始化参数运行{}'.format(clf.get_xgb_params()))
    clf=lightgbm.modelfit(clf,dftrain,features,label,kfold=kfold)
    document.add_paragraph('模型训练集{}'.format(lightgbm.auc(clf,dftrain,features,label)))
    document.add_paragraph('模型测试集{}'.format(lightgbm.auc(clf, dftest, features, label)))

    document.add_heading('调整参数')
    max_depth=[2,3]
    min_child_weight=range(1,4,1)
    document, clf = tun_params(document, clf, dftrain, dftest, {'max_depth': max_depth,'min_child_weight':min_child_weight}, features, label,kfold=kfold)

    # gamma
    gamma=[i/10 for i in range(0,5)]
    document,clf=tun_params(document,clf,dftrain,dftest,{'gamma':gamma},features,label,kfold=kfold)

    # subsample colsample_bytree
    subsample=[0.8,0.9,1]
    colsample_bytree=[0.8,0.9,1]
    document, clf = tun_params(document, clf, dftrain, dftest,
                               {'subsample': subsample, 'colsample_bytree': colsample_bytree}, features, label,kfold=kfold)

    # reg_alpha
    reg_alpha=[0.001,0.01,0.1,1,10]
    document, clf = tun_params(document, clf, dftrain, dftest,
                               {'reg_alpha': reg_alpha}, features, label,kfold=kfold)

    # reg_lambda
    reg_lambda = [0.001, 0.01, 0.1, 1, 10]
    document, clf = tun_params(document, clf, dftrain, dftest,
                               {'reg_lambda': reg_lambda}, features, label,kfold=kfold)

    #==生成模型最后的报告，各个特征的单变量图，PDP，liftchart
    dftrain=lightgbm.predict(clf,dftrain,features)
    dftest=lightgbm.predict(clf,dftest,features)
    #== 特征权重
    featureimp=lightgbm.featureImportance(clf,features)

    fig=drawplot.draw_barplot(featureimp.head(10),'feature','weight',title='Feature importance')
    fig.savefig('tmp.png')
    document.add_paragraph('特征权重图，近前10个特征')
    document.add_picture('tmp.png')
    #== 模型分同逾期率的关系图
    dftrain['flag']='训练集'
    dftest['flag']='测试集'
    drawplot.liftchart(pd.concat([dftrain,dftest]), 'predict_proba', label, bin=10, classes='flag', title='liftchart',
                       xlabel='模型分', ylabel='逾期率').savefig('tmp.png')
    document.add_paragraph('整体--liftchart')
    document.add_picture('tmp.png')

    #== 分月份查看-- 只看测试集
    dftest=datacal.cal_month(dftest,'applied_at','applied_month')
    drawplot.liftchart(dftest, 'predict_proba', label, bin=10, classes='applied_month', title='分月liftchart',
                       xlabel='模型分', ylabel='逾期率').savefig('tmp.png')
    document.add_paragraph('测试集分月--liftchart')
    document.add_picture('tmp.png')

    #== 分用户类型分月查看
    drawplot.liftchart(dftest,'predict_proba',label,bin=10,classes='applied_type',title='分用户类型liftchart',xlabel='模型分',ylabel='逾期率').savefig('tmp.png')
    document.add_paragraph('测试集分用户类型--liftchart')
    document.add_picture('tmp.png')

    #== 分渠道分月查看--取前5个渠道查看
    channels=dftest.applied_channel.value_counts()[:5].index
    drawplot.liftchart(dftest[dftest.applied_channel.isin(channels)], 'predict_proba', label, bin=10, classes='applied_channel', title='分渠道liftchart',
                       xlabel='模型分', ylabel='逾期率').savefig('tmp.png')
    document.add_paragraph('测试集分渠道--liftchart')
    document.add_picture('tmp.png')

    #== 各个特征的 单变量图 和 pdp 图
    for i in featureimp.feature.tolist():
        drawplot.univarchart(dftest, i, label, bin=10, title='单变量%s' % i,
                            ylabel='逾期率').savefig('tmp.png')
        document.add_paragraph('单变量%s' % i)
        document.add_picture('tmp.png')
        #= pdp
        drawplot.pdpchart(dftest, i, 'predict_proba', bin=10, title='pdp %s' % i,
                             ylabel='模型分').savefig('tmp.png')
        document.add_paragraph('pdp %s' % i)
        document.add_picture('tmp.png')

    filetool.saveDocument(document, path, filename)



def tun_params(document,clf,dftrain,dftest,params,features,label,kfold=10):
    for i in dict(params).keys():
        document.add_paragraph('调参{},取值{}'.format(i,params[i]))
    grid_search = lightgbm.automodelfit(clf, params,dftrain, features, label,kfold=kfold)
    clf = grid_search.best_estimator_
    document.add_paragraph('模型训练参数{}'.format(clf.get_xgb_params()))
    #==
    # clf = xgboost.modelfit(clf, dftrain, features, label)
    document.add_paragraph('寻找最优参数过程{}'.format(grid_search.cv_results_))
    document.add_paragraph('最优参数{},最优分{}'.format(grid_search.best_params_,grid_search.best_score_))
    document.add_paragraph('模型训练集{}'.format(lightgbm.auc(grid_search, dftrain, features, label)))
    document.add_paragraph('模型测试集{}'.format(lightgbm.auc(grid_search, dftest, features, label)))
    return document,clf



