import lightgbm as lgb
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix, mean_squared_error
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os,psutil
from tools import datacal
from graph import matplot
params_lgb = {
    'task': 'train',  # 用途
    'application': 'binary',  # 用于二分类
    'boosting_type': 'gbdt',  # 设置提升类型
    'num_boost_round': 150,  # 迭代次数
    'learning_rate': 0.01,  # 学习速率
    'metric': {'logloss', 'auc'},  # 评估函数
    'early_stopping_rounds': None,
    #         'objective': 'regression', # 目标函数
    'max_depth': 4,
    'num_leaves': 20,  # 叶子节点数
    'feature_fraction': 0.9,  # 建树的特征选择比例
    'bagging_fraction': 0.8,  # 建树的样本采样比例
    'bagging_freq': 5,  # k 意味着每 k 次迭代执行bagging
    'verbose': 1  # <0 显示致命的, =0 显示错误 (警告), >0 显示信息
}


def returnAUC(clf, training_set, validation_set, features, target='target'):
    '''
    instructions : return AUC of training set & test set

    Parameters :
        clf - classifier training object
        training_set - training dataset
        validation_set -
        features - features of training set
        target - X_test labels
    '''
    train_auc = roc_auc_score(training_set[target], clf.predict(training_set[features]))
    val_auc = roc_auc_score(validation_set[target], clf.predict(validation_set[features]))
    print('training set AUC : ', train_auc)
    print('validation set AUC : ', val_auc)
    return train_auc, val_auc


def train_lgbm(params, df_train, df_val, features, adds_on=None, target='target'):
    '''
    instructions : training lightgbm model with specified params

    Parameters :
        params - default params
        df_train - training set
        df_val - validation set
        features - feature list of dataset
        adds_on - parameters dict which would assign as training parameters
        target - tagert column or label list of samples
    '''
    params = params.copy()
    print(type(df_train), type(df_val))
    # training params

    if adds_on != None:
        for i in adds_on.keys():
            params[i] = adds_on[i]
            # convert DataFrame to binary format
    lgb_train = lgb.Dataset(df_train[features], df_train[target])
    lgb_val = lgb.Dataset(df_val[features], df_val[target], reference=lgb_train)

    lgbm = lgb.train(params, lgb_train, valid_sets=lgb_val, verbose_eval=False)
    train_auc, val_auc = returnAUC(lgbm, df_train, df_val, features)
    # auc = roc_auc_score(dev['target'],gbm.predict(dev[features]))
    return train_auc, val_auc, lgbm


def lgb_params_tuning(params, features, train, val, target='target', topN=3, cv_fold=5):
    '''
    instructions : find optimal parameters with lgbm

    Parameters :
        params - default parameters (dict format)
        target_params - parameter which would be tuning
        features - features list
        train - training set
        val - validation set
        target - target label
        topN - top N optimal parameters
        cv_fold - k folders CV
    '''
    # reassign as a duplication
    params = params.copy()
    lgb_train = lgb.Dataset(train[features], train[target])
    lgb_val = lgb.Dataset(val[features], val[target], reference=lgb_train)

    # create a ndarray shapes 1*n
    topn = np.zeros(topN)

    # make sure that memory can afford
    print('Memory Occupancy Rate: ' + (str)(psutil.virtual_memory().percent) + '%')
    optimal_para = list(topn)

    for deepth in np.arange(2, 4, 1):
        for leaves in np.arange(2, 2 ** deepth, 4):
            params['max_depth'] = deepth
            params['num_leaves'] = leaves
            print("parameter combination : ", 'max_depth ', deepth, 'num_leaves ', leaves)
            cv_result = lgb.cv(params, lgb_train, seed=7, nfold=cv_fold, verbose_eval=30)
            # return max auc(best performance)
            auc_score = pd.Series(cv_result['auc-mean']).max()
            print('auc ', auc_score)

            boost_round = pd.Series(cv_result['auc-mean']).argmax()
            # if anyone greater than item in topn list(either of them)
            if (auc_score > topn).any():
                # find the worst one / lowest AUC
                topn[topn.argmin()] = auc_score
                para = {}
                # replace the worst parameter with a greater combination
                para['max_depth'] = deepth
                para['num_leaves'] = leaves
                optimal_para[topn.argmin()] = para
    return optimal_para, list(topn)


#        training_curve.append(train_auc)
#        validation_curve.append(val_auc)

# auc_matrix = pd.concat([pd.Series(training_curve),pd.Series(validation_curve)],index=['trainingAUC','validationAUC'],axis=1)
#    print(auc_matrix)
#
#    plt.plot(candidate_list, training_curve,label='training')
#    plt.plot(candidate_list, validation_curve,label='validation')
#    plt.legend()
#    plt.show()
#
#    return validation_curve[:3]


# pending here 这个函数没有测
# def lightGBM_gridCV(param_validation, params=params_lgb):
#     # make sure that memory can afford
#     print('Memory Occupancy Rate: ' + (str)(psutil.virtual_memory().percent) + '%')
#
#     param_test = {
#         'max_depth': np.arange(2, 7, 1),
#         'num_leaves': np.arange(20, 200, 10),
#     }
#     estimator = LGBMRegressor(
#         num_leaves=50,
#         max_depth=13,
#         learning_rate=0.1,
#         n_estimators=1000,
#         objective='binary',
#         min_child_weight=1,
#         param['metric'] = ['auc', 'binary_logloss'],
#         subsample = 0.8,
#         colsample_bytree = 0.8,
#         nthread = 7
#     )
#     gsearch = GridSearchCV(estimator, param_grid=param_test, scoring='roc_auc', cv=5)
#     gsearch.fit(values, labels)
#     gsearch.grid_scores_, gsearch.best_params_, gsearch.best_score_
#     return 1


def predict(lgbm,df_test,features,target='target'):
    predictions = lgbm.predict(df_test[features])
    auc = roc_auc_score(df_test[target],predictions)
    return predictions, auc



def buildClf(params=params_lgb):
    '''
    instructions : build a lgb classifier

    Params :
    '''
    return lgb.LGBMClassifier(params)


def automodelfit(clf, param_grid, dftrain, features, resp, kfold=10, scoring='roc_auc'):
    # kflod=StratifiedKFold(n_splits=kfold,shuffle=True,random_state=7)
    grid_search = GridSearchCV(clf, param_grid, scoring=scoring, n_jobs=-1, cv=kfold, verbose=2, iid=True, refit=True)
    # == 模型训练
    grid_search.fit(dftrain[features], dftrain[resp])
    # == 获取最优参数
    return grid_search


##############################################################################




