#!/usr/bin/env python
# coding: utf-8

from datetime import datetime
import pandas as pd
pd.options.display.max_columns = 1000
import pymongo
import numpy as np
import time
import pymysql
import datetime
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from dateutil.relativedelta import relativedelta

# ka channel 
kalist  = [1,198,214,217,333,159507,159384,159478,459483,159563,159561,159538,159609]

risk_analysis_config = {'user' : 'fengkong_read_only',
                        'password' : 'mT2HFUgI',
                        'host' : '172.20.6.9',
                        'port' : 9030,
                        'database' : 'risk_analysis',
                        'encoding' : 'utf8'}

# read mongodb mapping from excel
#mapping_score = pd.read_excel("./mongodb.xlsx",sheet_name='score').dropna(axis=0)
#mapping_variable = pd.read_excel("./mongodb.xlsx",sheet_name='variable').dropna(axis=0)

def readExcel(path,sheet=None): 
    return pd.read_excel(path,sheet)

dict_DD = readExcel("../features_mongodb.xlsx")
modelList = [model for model in dict_DD.keys()]


limit = "{'wf_created_at': {'$gte': '@start_date', '$lt': '@end_date'}}"
query = "{'order_id':1,'@key':1,'_id':0}"

vlm_start_date = (datetime.date.today() - relativedelta(days = +57)).strftime("%Y-%m-01 00:00:00")
vlm_end_date = time.strftime("%Y-%m-01 00:00:00")

# extract channel list where except recalling channel 
sql_channel = '''
SELECT DISTINCT(applied_from),applied_channel FROM risk_analysis
WHERE transacted = 1
AND real_loan_amount > 20000
AND loan_start_date >= DATE_FORMAT(DATE_ADD(NOW(),INTERVAL -1 MONTH),'%Y-%m-01') 
AND loan_start_date < DATE_FORMAT(NOW(),'%Y-%m-01')
and applied_from not in (159481,159486,159528)
'''

sql = '''
SELECT date_format(applied_at,'%Y-%m-%d') as applied_at,applied_from,applied_type,order_no FROM risk_analysis
WHERE DATE_FORMAT(applied_at,'%Y-%m') BETWEEN DATE_FORMAT(DATE_ADD(NOW(),INTERVAL -2 MONTH),'%Y-%m')
AND DATE_FORMAT(DATE_ADD(NOW(),INTERVAL -1 MONTH),'%Y-%m')
AND applied_from in (@applied_channel)
AND applied = 1
'''

path_alarm = "../plot/VLM/alarm/"
path = "../plot/VLM/"

def querymongo(limit,query):    
    myclient = pymongo.MongoClient("mongodb://rc_dp_feature_user:qgrcdpfeature_2019@172.20.1.150:20000/?authSource=rc_dp_feature_pro")
    mydb = myclient["rc_dp_feature_pro"]
    mycol = mydb["rc_feature_analysis_timing_v2"]
    # gt greater than, lt less than. e = equals
    x = mycol.find(eval(limit),eval(query)) 
    myclient.close()
    return pd.DataFrame(list(x))

def connect2DB(db_config):
    db = pymysql.connect(
        host = db_config['host'],
        port = db_config['port'],
        user = db_config['user'],
        passwd = db_config['password'],
        db = db_config['database'],
        charset = db_config['encoding'])
    return db

def query_sql(sql,db=risk_analysis_config):
    try:
        conn = connect2DB(db)
        df = pd.read_sql(sql,conn)
        conn.close() 
        return df
    except Exception as e:
        return 0   
    
 # VLM with one variable
def plotLine(title,y,row,col,table,save_path,upperBoundary=0,bottomBoundary=0):   
    # if x less than 10, ignored its plot
    if len(col) <= 10 | int(y.sum()) <= 10 :
        return 1
    else:
        cols = [item[5:] for item in col.values.tolist()]    
        #print(content)
        plt.rcParams['font.sans-serif'] = ['SimHei']
        plt.rcParams['savefig.dpi'] = 226 #图片像素 
        #分辨率
        fig,axs = plt.subplots(1,1,figsize=(33,11),linewidth=0.1) 
        x = range(len(col))    
        axs.plot(x,y)
        axs.add_line(Line2D((x[0],x[-1]),(y.mean(),y.mean()),linestyle='--',color='darkorange'))
        plt.annotate(s = '月均{}'.format(round(y.mean(),2)),xy=(x[-1] + 0.1,y.mean()))
        
        # upper boundary
        if upperBoundary == 0:
            axs.add_line(Line2D((x[0],x[-1]),(y.mean() + 3 * y.std(),y.mean() + 3 * y.std()),linestyle = '--',color = 'lightcoral'))
            plt.annotate(s = 'Mean+3STD\n{}'.format(round(y.mean() + 3 * y.std(),2)),xy = (x[-1] + 0.1,y.mean() + 3 * y.std()))
        else:
            axs.add_line(Line2D((x[0],x[-1]),(upperBoundary,upperBoundary),linestyle = '--',color = 'lightcoral'))
            plt.annotate(s = 'Mean+3STD\n{}'.format(round(upperBoundary,2)),xy = (x[-1] + 0.1,upperBoundary)) 
        # bottom boundary
        
        if bottomBoundary == 0:           
            axs.add_line(Line2D((x[0],x[-1]),(y.mean() - 3 * y.std(),y.mean() - 3 * y.std()),linestyle = '--',color = 'lightcoral'))
            plt.annotate(s = 'Mean-3STD\n{}'.format(round(y.mean() - 3 * y.std(),2)),xy = (x[-1] + 0.1,y.mean() - 3 * y.std()))
        else:
            axs.add_line(Line2D((x[0],x[-1]),(upperBoundary,upperBoundary),linestyle = '--',color = 'lightcoral'))
            plt.annotate(s = 'Mean-3STD\n{}'.format(round(upperBoundary,2)),xy = (x[-1] + 0.1,upperBoundary))             
            
            
            
        # draw vertical line of each points
        bottom = 0
        if y.min() - y.std() * 3 - y.mean() * 0.02  > 0:
            bottom = y.min() - y.std() * 3 - y.std() * 0.1           
        plt.vlines(x,[bottom],y,color = 'lightgrey',linestyle = '--')
        axs.grid()      
        plt.xticks([])
     
        the_table = plt.table(cellText=table,
                          rowLabels=row,
                          colLabels=cols,
                          colWidths=[0.91 / (len(col) - 1)]*len(col),
                          loc='bottom')       
        the_table.auto_set_font_size(False)
        the_table.set_fontsize(9)
        fig.subplots_adjust(left=0.032,right=0.97)
        fig.set_size_inches(33,11) 
         
        #fig.suptitle(title)
        plt.title(title,fontsize=18)
        plt.savefig(save_path + title + ".png")
        plt.show()
        return 1   
    
############################################    
def dataManipul(df,keyword):
    #df_withoutna = df.dropna(axis=0).sort_values(by=keyword,ascending=False).reset_index().drop('index',axis=1)
    #df = pd.merge(df_withoutna[keyword].iloc[int(len(df_withoutna)*0.01):int(len(df_withoutna)*0.99)]

    #df.dropna(axis=0).loc[:,keyword] = df.dropna(axis=0)[keyword].map(lambda x : np.nan if x < 0 else x )

    df_count = df[['applied_at',keyword]].groupby('applied_at').count()[keyword] # need 2 recheck
    
    df_zeros = pd.Series(np.zeros(df_count.shape),index = df_count.index)
    df_missing = df[df[keyword].isnull()].fillna(0).groupby('applied_at')[keyword].count()    
    df_missing = pd.concat([df_zeros,df_missing], axis = 1, sort = True).fillna(0)[keyword]
       
#    df_missing = df[df[keyword].isnull()].fillna(0).groupby('applied_at')[keyword].count()
#    df_missing = df_shape + df_missing
    missing_rate = df_missing / (df_count + df_missing) * 100
    del df_missing
    
    df_zero = df[df[keyword] == 0].groupby('applied_at')[keyword].count()
    df_zero = pd.concat([df_zeros,df_zero], axis = 1, sort = True).fillna(0)[keyword]
    zero_rate = df_zero / df_count * 100
    del df_zero
    
    df_noneNA = df.dropna(axis = 0)    
    df_noneNA = df_noneNA.sort_values(by=keyword,ascending=False).reset_index().drop('index',axis=1)
    
    df_sum = df_noneNA.iloc[int(len(df_noneNA)*0.01):int(len(df_noneNA)*0.99)].groupby('applied_at').agg(['mean','std','count'])
    df_sum = pd.concat([df_zeros,df_sum], axis = 1, sort = True).fillna(0).drop(columns=[0])
    df_sum.columns = ['mean','std','count']
    cols = df_count.index
    return zero_rate.fillna(0).round(1),missing_rate.fillna(0).round(1),cols,df_sum

#############################################    
# check via channel details
def separateBychannel(df,key,meansub3std,meanpls3std):
    try:
        for appliedFrom in kalist:
            try:               
                #df.applied_from = df.applied_from.astype('str')            
                zero_rate_total,missing_rate_total,cols_total,df_sum = dataManipul(df[df.applied_from == appliedFrom][['applied_at',key]],key)
                table = []
                y_total = df_sum['mean']
                table.append(df_sum['mean'].round(1)) #.round(1).values.tolist()
                table.append(df_sum['count'].astype('int'))
                table.append(missing_rate_total.astype('str')+'%')
                table.append(zero_rate_total.astype('str')+'%')
                
                if (y_total.iloc[-30:].max() >  meanpls3std) | (y_total.iloc[-30:].min() <  meansub3std):   
                    plotLine(str(model) + '-' + description[fea_i]+'-Mean-'+appliedType_type[appliedType_index]+'with'+str(appliedFrom)+'-VLM',y_total,['value','count','Missing Rate','Zero Rate'],cols_total,table,'./plot/vlm_separate_By_channel/')
                del table
            except ValueError as e:  #ValueError
                continue
    except Exception as e :        
        print('channel Exception : ',key,appliedType,e)

        
########### extract channel list #############        
         
applied_channel = query_sql(sql_channel).applied_from.tolist()       
sql = sql.replace('@applied_channel',str(applied_channel).strip('[').strip(']'))        

#########################################################################

#########################################################################
for model in modelList:
    #feature key list
    features = dict_DD[model].feature
    # query key list
    queries = dict_DD[model].queries
    #feature descriptions list
    description = dict_DD[model].description

    appliedTypeList = ['1,2,3','1','2','3']
    appliedType_type = ['客群总体','首申','复申','复贷']

    for fea_i in range(len(queries)):

        appliedType_index = 0
        key = queries[fea_i].strip()

        df = querymongo(limit.replace('@start_date',vlm_start_date).replace('@end_date',vlm_end_date),query.replace('@key',key))
        df = df.applymap(lambda x : np.nan if x == '' else x)
        df_offline = query_sql(sql,risk_analysis_config)
        df = pd.merge(df,df_offline,how='right',left_on='order_id',right_on='order_no')[['applied_at','applied_from','applied_type',key]]
        del df_offline

        df[key] = df[key].astype('float')
        df.applied_type = df['applied_type'].astype('int')
        for appliedType in appliedTypeList:
            if appliedType_index == 0:
                df_tmp = df[['applied_at','applied_from',key]]
            else:
                df_tmp = df[df.applied_type == int(appliedType)][['applied_at','applied_from',key]]                
            try:
                #df.applied_from = df.applied_from.astype('str')            
                zero_rate_total,missing_rate_total,cols_total,df_sum = dataManipul(df_tmp[['applied_at',key]],key)

                table = []
                y_total = df_sum['mean']
                table.append(df_sum['mean'].round(1)) #.round(1).values.tolist()
                table.append(df_sum['count'].astype('int'))
                table.append(missing_rate_total.astype('str')+'%')
                table.append(zero_rate_total.astype('str')+'%')

                meanpls3std = y_total.mean() + y_total.std() * 3
                meansub3std = y_total.mean() - y_total.std() * 3

                #mean_mean = y_total.mean()

                if (y_total.iloc[-30:].max() >  meanpls3std) | (y_total.iloc[-30:].min() <  meansub3std):   
                    plotLine(model+'-'+description[fea_i]+'-Mean-'+appliedType_type[appliedType_index]+'-变化VLM',y_total,['value','count','Missing Rate','Zero Rate'],cols_total,table,path_alarm)
                    separateBychannel(df_tmp,key,meansub3std,meanpls3std)
                else:
                    plotLine(model+'-'+description[fea_i]+'-Mean-'+appliedType_type[appliedType_index]+'-变化VLM',y_total,['value','count','Missing Rate','Zero Rate'],cols_total,table,path)
                del table
            except Exception as e:  #ValueError
                print('Mean Exception : ',key,appliedType,e)
                appliedType_index += 1
                continue                
            try:
                zero_rate_total,missing_rate_total,cols_total,df_sum = dataManipul(df_tmp[['applied_at',key]],key)            
                table = []
                y_total = df_sum['std']
                table.append(df_sum['std'].round(1))
                table.append(df_sum['count'])
                table.append(missing_rate_total.astype('str')+'%')
                table.append(zero_rate_total.astype('str')+'%')
                del df_sum

                stdpls3std = y_total.mean() + y_total.std() * 3
                stdsub3std = y_total.mean() - y_total.std() * 3
                #std_mean = y_total.mean()
                if (y_total.iloc[-30:-1].max() >  stdpls3std) | (y_total.iloc[-30:-1].min() <  stdsub3std):   
                    plotLine(model+'-'+description[fea_i]+'-Std-'+appliedType_type[appliedType_index]+'-变化VLM',y_total,['value','count','Missing Rate','Zero Rate'],cols_total,table,path_alarm)
                else:
                    plotLine(model+'-'+description[fea_i]+'-Std-'+appliedType_type[appliedType_index]+'-变化VLM',y_total,['value','count','Missing Rate','Zero Rate'],cols_total,table,path)          
                del table
            except Exception as e:
                print('Std Exception : ',e)
                appliedType_index += 1
                continue            
            appliedType_index += 1





