# -*- coding: utf-8 -*-
"""
Created on Mon Nov 26 21:44:56 2018

@author: Jason Wang
"""
import time
import pymysql
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from matplotlib.lines import Line2D
import datetime

kalist = [1, 198, 214, 217, 333, 159507, 159384, 159563, 159561, 159538, 159609, 159537]

############################## SQL ##############################################
# applied_channel = [1,214,217,198,159384,159483,159479,159478,333,158748,158764,158932,159457,159459,159519,159507,159538,159561]
# applice_type = []
# channelDict = {159384:'平安H5高净值',159483:'平安低净值',159479:'车险保单贷',159478:'法人贷',333:'融360',158748:'汽车之家',158764:'翼支付',158932:'拉卡拉',159457:'惠金所',159459:'惠金所',159519:'亿融普惠'}

appliedTypeList = ['1,2,3', '1', '2', '3']
appliedType_type = ['客群总体', '首申', '复申', '复贷']

# extract channel list where except recall channel
sql_channel = '''
SELECT DISTINCT(applied_from),applied_channel
FROM risk_analysis
WHERE applied_from IN
(SELECT applied_from FROM risk_analysis
WHERE transacted = 1
AND loan_start_date >= DATE_FORMAT(DATE_ADD(NOW(),INTERVAL -1 MONTH),'%Y-%m-01') 
AND loan_start_date < DATE_FORMAT(NOW(),'%Y-%m-01')
and applied_from not in (159481,159486,159528)
GROUP BY 1
HAVING SUM(real_loan_amount) > 100000
ORDER BY sum(real_loan_amount) DESC)
'''

sql = '''
SELECT date_format(applied_at,'%Y-%m-%d') as applied_at,applied_from,applied_type,@feature FROM risk_analysis
WHERE DATE_FORMAT(applied_at,'%Y-%m') BETWEEN DATE_FORMAT(DATE_ADD(NOW(),INTERVAL -2 MONTH),'%Y-%m')
AND DATE_FORMAT(DATE_ADD(NOW(),INTERVAL -1 MONTH),'%Y-%m')
AND applied_from in (@applied_channel)
AND applied = 1
'''

########################## DB Configuration #####################################
risk_analysis_config = {'user' : 'fengkong_read_only',
                        'password' : 'mT2HFUgI',
                        'host' : '172.20.6.9',
                        'port' : 9030,
                        'database' : 'risk_analysis',
                        'encoding' : 'utf8'}
#################################################################################

pwd = os.getcwd()
path = "E:\\Python\\su Project\\plot\\VLM\\"
path_alarm = "E:\\Python\\su Project\\plot\\VLM\\alarm\\"
path_sepatate = "E:\\Python\\su Project\\plot\\separateByChannel\\"
now = time.strftime("%Y-%m-%d")


# make directory, if it exists return path, else return created folder path

# def mkdir(path,name):
#    folder = os.path.exists(path+name)
#    if folder:
#        return path+name+'\\'
#    else:
#        os.makedirs(path+name)
#        return path+name+'\\'

# VLM with one variable
def plotLine(title, y, row, col, table, save_path, upperBoundary=0, bottomBoundary=0):
    # if x less than 10, ignored its plot
    if len(col) <= 10 | int(y.sum()) <= 10:
        return 1
    else:
        cols = [item[5:] for item in col.values.tolist()]
        # print(content)
        plt.rcParams['font.sans-serif'] = ['SimHei']
        plt.rcParams['savefig.dpi'] = 226  # 图片像素
        # 分辨率
        fig, axs = plt.subplots(1, 1, figsize=(33, 11), linewidth=0.1)
        x = range(len(col))
        axs.plot(x, y)
        axs.add_line(Line2D((x[0], x[-1]), (y.mean(), y.mean()), linestyle='--', color='darkorange'))
        plt.annotate(s='月均{}'.format(round(y.mean(), 2)), xy=(x[-1] + 0.1, y.mean()))

        # upper boundary
        if upperBoundary == 0:
            axs.add_line(Line2D((x[0], x[-1]), (y.mean() + 3 * y.std(), y.mean() + 3 * y.std()), linestyle='--',
                                color='lightcoral'))
            plt.annotate(s='Mean+3STD\n{}'.format(round(y.mean() + 3 * y.std(), 2)),
                         xy=(x[-1] + 0.1, y.mean() + 3 * y.std()))
        else:
            axs.add_line(Line2D((x[0], x[-1]), (upperBoundary, upperBoundary), linestyle='--', color='lightcoral'))
            plt.annotate(s='Mean+3STD\n{}'.format(round(upperBoundary, 2)), xy=(x[-1] + 0.1, upperBoundary))
            # bottom boundary
        if bottomBoundary == 0:
            axs.add_line(Line2D((x[0], x[-1]), (y.mean() - 3 * y.std(), y.mean() - 3 * y.std()), linestyle='--',
                                color='lightcoral'))
            plt.annotate(s='Mean-3STD\n{}'.format(round(y.mean() - 3 * y.std(), 2)),
                         xy=(x[-1] + 0.1, y.mean() - 3 * y.std()))
        else:
            print('gonna here')
            axs.add_line(Line2D((x[0], x[-1]), (bottomBoundary, bottomBoundary), linestyle='--', color='lightcoral'))
            plt.annotate(s='Mean-3STD\n{}'.format(round(bottomBoundary, 2)), xy=(x[-1] + 0.1, bottomBoundary))
            # draw vertical line of each points
        bottom = 0
        if y.min() - y.std() * 3 - y.mean() * 0.02 > 0:
            bottom = y.min() - y.std() * 3 - y.std() * 0.1
        plt.vlines(x, [bottom], y, color='lightgrey', linestyle='--')
        axs.grid()
        plt.xticks([])

        the_table = plt.table(cellText=table,
                              rowLabels=row,
                              colLabels=cols,
                              colWidths=[0.91 / (len(col) - 1)] * len(col),
                              loc='bottom')
        the_table.auto_set_font_size(False)
        the_table.set_fontsize(9)
        fig.subplots_adjust(left=0.032, right=0.97)
        fig.set_size_inches(33, 11)

        # fig.suptitle(title)
        plt.title(title, fontsize=18)
        plt.savefig(save_path + title + ".png")
        plt.show()
        return 1


def readExcel(path, sheet=None):
    return pd.read_excel(path, sheet)


# conn = connect2DB()
dict_keylist = []
dict_vallist = []
dict_DD = readExcel("E:\\Python\\su Project\\features_DD.xlsx")
modelList = [model for model in dict_DD.keys()]


def mkdir(path, fd):
    if not os.path.exists(path + fd):
        folder = mkdir(path, fd)
        return folder
    else:
        return path + 'fd'


def connect2DB(db_config):
    db = pymysql.connect(
        host=db_config['host'],
        port=db_config['port'],
        user=db_config['user'],
        passwd=db_config['password'],
        db=db_config['database'],
        charset=db_config['encoding'])
    return db


def query_sql(sql, db_config=risk_analysis_config):
    try:
        conn = connect2DB(db_config)
        df = pd.read_sql(sql, conn)
        conn.close()
        return df
    except Exception as e:
        return 0


def dataManipul(df, keyword):
    # df_withoutna = df.dropna(axis=0).sort_values(by=keyword,ascending=False).reset_index().drop('index',axis=1)
    # df = pd.merge(df_withoutna[keyword].iloc[int(len(df_withoutna)*0.01):int(len(df_withoutna)*0.99)]

    # df.dropna(axis=0).loc[:,keyword] = df.dropna(axis=0)[keyword].map(lambda x : np.nan if x < 0 else x )

    df_count = df[['applied_at', keyword]].groupby('applied_at').count()[keyword]  # need 2 recheck

    df_zeros = pd.Series(np.zeros(df_count.shape), index=df_count.index)
    df_missing = df[df[keyword].isnull()].fillna(0).groupby('applied_at')[keyword].count()
    df_missing = pd.concat([df_zeros, df_missing], axis=1, sort=True).fillna(0)[keyword]

    #    df_shape = pd.DataFrame(np.zeros(df_count.shape))
    #
    #    df_missing = df[df[keyword].isnull()].fillna(0).groupby('applied_at')[keyword].count()
    #    df_missing = df_shape + df_missing
    missing_rate = df_missing / (df_count + df_missing) * 100
    del df_missing

    df_zero = df[df[keyword] == 0].groupby('applied_at')[keyword].count()
    df_zero = pd.concat([df_zeros, df_zero], axis=1, sort=True).fillna(0)[keyword]
    zero_rate = df_zero / df_count * 100
    del df_zero

    df_noneNA = df.dropna(axis=0)
    df_noneNA = df_noneNA.sort_values(by=keyword, ascending=False).reset_index().drop('index', axis=1)

    df_sum = df_noneNA.iloc[int(len(df_noneNA) * 0.01):int(len(df_noneNA) * 0.99)].groupby('applied_at').agg(
        ['mean', 'std', 'count'])
    df_sum = pd.concat([df_zeros, df_sum], axis=1, sort=True).fillna(0).drop(columns=[0])
    df_sum.columns = ['mean', 'std', 'count']
    cols = df_count.index
    return zero_rate.fillna(0).round(1), missing_rate.fillna(0).round(1), cols, df_sum


#########################################################################
# check via channel details
def separateBychannel(df, key, meansub3std, meanpls3std):
    try:
        for appliedFrom in kalist:
            try:
                # df.applied_from = df.applied_from.astype('str')
                zero_rate_total, missing_rate_total, cols_total, df_sum = dataManipul(
                    df[df.applied_from == appliedFrom][['applied_at', key]], key)
                table = []
                y_total = df_sum['mean']
                table.append(df_sum['mean'].round(1))  # .round(1).values.tolist()
                table.append(df_sum['count'].astype('int'))
                table.append(missing_rate_total.astype('str') + '%')
                table.append(zero_rate_total.astype('str') + '%')

                if (y_total.iloc[-30:].max() > meanpls3std) | (y_total.iloc[-30:].min() < meansub3std):
                    plotLine(str(modelList[i]) + '-' + description[fea_i] + '-Mean-' + appliedType_type[
                        appliedType_index] + 'with' + str(appliedFrom) + '-VLM', y_total,
                             ['value', 'count', 'Missing Rate', 'Zero Rate'], cols_total, table, path_sepatate)
                del table
            except ValueError as e:  # ValueError
                continue
    except Exception as e:
        print('channel Exception : ', key, appliedType, e)


########### extract channel list #############

applied_channel = query_sql(sql_channel).applied_from.tolist()
sql = sql.replace('@applied_channel', str(applied_channel).strip('[').strip(']'))

#########################################################################
# for model in modelList:
#    df_model = dict_DD[model].dropna(axis = 0)
#    dict_keylist.append(df_model.feature.tolist())
#    dict_keylist.append(df_model.query.tolist())
#    dict_vallist.append(df_model.description.tolist())
#
# for li in dict_keylist:
for i in range(len(modelList)):

    # drop colums from data dict where there has no description
    df_model_list = dict_DD[modelList[i]].dropna(axis=0)
    # feature key list
    features = df_model_list.reset_index().feature
    # query key list
    queries = df_model_list.reset_index().queries
    # feature descriptions list
    description = df_model_list.reset_index().description
    # applied_from
    # cv channel = df_model_list.reset_index().applied_type

    modelVar_index = 0

    for fea_i in range(len(features)):
        appliedType_index = 0
        try:
            key = queries[fea_i].strip()
            print('key: ', key)
            df = query_sql(sql.replace('@feature', queries[fea_i]))
            # except None
            df.loc[:, key] = df.loc[:, key].map(lambda x: np.nan if x == None else x)
            df.loc[:, key] = df.loc[:, key].map(lambda x: np.nan if x < 0 else x)
            # exception of interger == mysql query meets a exception
        except Exception as a:
            print(a)
            continue
        for appliedType in appliedTypeList:
            print('appliedType', appliedType)
            if appliedType_index == 0:
                df_tmp = df[['applied_at', 'applied_from', key]]
            else:
                df_tmp = df[df.applied_type == int(appliedType)][['applied_at', 'applied_from', key]]
            # print('appliedType: ',appliedType)

            try:
                # df.applied_from = df.applied_from.astype('str')
                zero_rate_total, missing_rate_total, cols_total, df_sum = dataManipul(df_tmp[['applied_at', key]], key)

                table = []
                y_total = df_sum['mean']
                table.append(df_sum['mean'].round(1))  # .round(1).values.tolist()
                table.append(df_sum['count'].astype('int'))
                table.append(missing_rate_total.astype('str') + '%')
                table.append(zero_rate_total.astype('str') + '%')

                meanpls3std = y_total.mean() + y_total.std() * 3
                meansub3std = y_total.mean() - y_total.std() * 3

                # mean_mean = y_total.mean()

                if (y_total.iloc[-30:].max() > meanpls3std) | (y_total.iloc[-30:].min() < meansub3std):
                    plotLine(str(modelList[i]) + '-' + description[fea_i] + '-Mean-' + appliedType_type[
                        appliedType_index] + '-变化VLM', y_total, ['value', 'count', 'Missing Rate', 'Zero Rate'],
                             cols_total, table, path_alarm)
                    separateBychannel(df_tmp, key, meansub3std, meanpls3std)
                else:
                    plotLine(str(modelList[i]) + '-' + description[fea_i] + '-Mean-' + appliedType_type[
                        appliedType_index] + '-变化VLM', y_total, ['value', 'count', 'Missing Rate', 'Zero Rate'],
                             cols_total, table, path)
                del table
            except Exception as e:  # ValueError
                print('Mean Exception : ', key, appliedType, e)
                appliedType_index += 1
                continue
            try:
                zero_rate_total, missing_rate_total, cols_total, df_sum = dataManipul(df_tmp[['applied_at', key]], key)
                table = []
                y_total = df_sum['std']
                table.append(df_sum['std'].round(1))
                table.append(df_sum['count'])
                table.append(missing_rate_total.astype('str') + '%')
                table.append(zero_rate_total.astype('str') + '%')
                del df_sum

                stdpls3std = y_total.mean() + y_total.std() * 3
                stdsub3std = y_total.mean() - y_total.std() * 3
                # std_mean = y_total.mean()
                if (y_total.iloc[-30:-1].max() > stdpls3std) | (y_total.iloc[-30:-1].min() < stdsub3std):
                    plotLine(str(modelList[i]) + '-' + description[fea_i] + '-Std-' + appliedType_type[
                        appliedType_index] + '-变化VLM', y_total, ['value', 'count', 'Missing Rate', 'Zero Rate'],
                             cols_total, table, path_alarm)
                else:
                    plotLine(str(modelList[i]) + '-' + description[fea_i] + '-Std-' + appliedType_type[
                        appliedType_index] + '-变化VLM', y_total, ['value', 'count', 'Missing Rate', 'Zero Rate'],
                             cols_total, table, path)
                del table
            except Exception as e:
                print('Std Exception : ', e)
                appliedType_index += 1
                continue
            appliedType_index += 1
















