# -*- encoding: utf8 -*-
"""
描述：
    现金贷应还贷前服务费（A）和实还贷前服务费（B）以及减免贷前服务费（G1）
    现金贷应还贷后服务费（C）和实还贷后服务费（D）以及减免贷后服务费（G2）

    E = max(C - D - G2, 0)
    F = min(C - D - G2, 0)

    每条明细都计算一个E和F列，然后对明细进行按月累积汇总，最后的结果是一个累积的值

最终表：
    1. 按月不按ABC汇总的EF列
    2. 按月按ABC汇总的EF列

注意：
    1. 查询放款订单只查成功订单（loan_manifest.is_active = 1）
    2. 新马上和非新马上分开处理

Author:
  Ma Zhen(zhen.ma@quantgroup.cn)
"""

import pandas as pd
from sqlalchemy import create_engine
import numpy as np
pd.set_option('display.float_format', lambda x: '%.5f' % x)
pd.options.mode.chained_assignment = None

path = u'E:/内控/'

engine_audit = create_engine('mysql+mysqldb://yulong:ohVU1nPITynSZwR2@172.16.3.201:3306/temp?charset=utf8', echo=True)

# 可以优化一下sql里的字段，去掉不用的，比如去掉plan_id，年月放在一起等
# 还有，在读取的时候，可以直接关联上class，不用之后再单独查
sql_loan_plan = """
SELECT
  t3.class,
  date_format(t2.loan_paid_at, '%%Y-%%m') loan_month,
  t2.loan_paid_at,
  t2.ref_id,
  t2.loan_id,
  t2.service_fee1                       total_req_service_fee1,
  t2.service_fee1 / t2.contract_term    loan_req_service_fee1,
  t2.is_active,
  t2.funding_code,
  date_format(t1.deadline, '%%Y-%%m')     plan_month,
  t1.deadline,
  t1.term_no,
  t1.id                                 plan_id,
  t1.service_fee1                       plan_req_service_fee1,
  t1.service_fee2                       plan_req_service_fee2
FROM
  temp.user_repayment_plan t1
  JOIN temp.loan_manifest t2
    ON t1.ref_id = t2.ref_id
       AND t2.loan_paid_at < '%s'
  JOIN temp.loan_class t3 ON t3.ref_id = t2.ref_id;
"""
sql_ref = """
SELECT
  date_format(t2.repaid_at, '%%Y-%%m') repay_month,
  t2.repaid_at,
  t1.plan_id,
  t1.service_fee1                    real_service_fee1,
  t1.service_fee2                    real_service_fee2,
  t1.mitigate_service_fee
FROM
  temp.xjd_repay_plan_repay_record_ref t1
  JOIN temp.xjd_repay_plan_repay_record_ref_repay_date t2
    ON t1.id = t2.xjd_ref_id AND t2.repaid_at < '2017-01-01 00:00:00'
  JOIN temp.user_repayment_plan t3 ON t3.id = t1.plan_id AND t3.fund_code <> 160;
"""
sql_ms = """
SELECT
  date_format(t1.repaid_at, '%%Y-%%m') repay_month,
  t1.repaid_at,
  t2.id                              plan_id,
  t1.service_fee1 real_service_fee1,
  t1.service_fee2 real_service_fee2,
  0                                  mitigate_service_fee
FROM temp.new_ms_transaction_with_term_copy t1
  JOIN temp.user_repayment_plan t2 ON t2.real_loan_id = t1.loan_id
                                      AND t2.term_no = t1.term_no AND t2.fund_code = 160 AND t1.repaid_at < '2017-01-01 00:00:00';
"""

step = 10000


# 获取15-16年全部的应还贷后服务费明细
def get_plan():
    df = pd.read_sql(sql_plan, con=engine_audit, chunksize=step)
    df_plan = pd.DataFrame()
    for tmp in df:
        df_plan = df_plan.append(tmp)
    df_plan.to_csv(path + 'plan_2015_2016.csv', index=None, encoding='utf-8')


# 获取除新马上之外的其他所有的实还贷后服务费
def get_ref():
    df_plan = pd.read_csv(path + 'plan_2015_2016.csv', encoding='utf-8')
    no_ms_ref_id = df_plan.ix[df_plan.fund_code != 160, 'ref_id'].tolist()
    # non_ms_plan_id = df_plan.ix[df_plan.fund_code != 160, 'plan_id'].tolist()
    df = pd.read_sql(sql_ref, con=engine_audit, chunksize=step)
    df_ref = pd.DataFrame()
    for tmp in df:
        # 注意看是否有成功滤掉，没有滤掉考虑类型是否一致
        # print '滤掉之前长度', len(tmp)
        # tmp = tmp.ix[tmp.plan_id.isin(non_ms_plan_id)]
        # print '滤掉之后长度', len(tmp)
        df_ref = df_ref.append(tmp)
    print 'before', df_ref[[u'实还贷后服务费', u'服务费减免金额']].sum()
    df_ref.to_csv(path + 'all_ref_2015_2016.csv', index=None, encoding='utf-8')
    print df_ref.ix[~df_ref.ref_id.isin(no_ms_ref_id), 'ref_id']
    df_ref = df_ref.ix[df_ref.ref_id.isin(no_ms_ref_id)]
    print 'after', df_ref[[u'实还贷后服务费', u'服务费减免金额']].sum()
    df_ref.fillna(0, inplace=True)
    df_ref.to_csv(path + 'ref_2015_2016.csv', index=None, encoding='utf-8')


# 获取新马上的实还贷后服务费
def get_new_ms():
    df = pd.read_sql(sql_new_ms, con=engine_audit, chunksize=step)
    df_new_ms = pd.DataFrame()
    for tmp in df:
        df_new_ms = df_new_ms.append(tmp)
    df_new_ms.to_csv(path + 'new_ms_2015_2016.csv', index=None, encoding='utf-8')


# 对获取到的数据进行处理
def process_data():
    df_plan = pd.read_csv(path + 'plan_2015_2016.csv', encoding='utf8')
    df_ref = pd.read_csv(path + 'all_ref_2015_2016.csv', encoding='utf8')
    df_ms = pd.read_csv(path + 'new_ms_2015_2016.csv', encoding='utf8')
    # 新马上和非新马上需要分开处理
    df_new_ms = df_plan.ix[df_plan.fund_code == 160]
    df_others = df_plan.ix[df_plan.fund_code != 160]

    print '应收贷后', df_plan[u'应收贷后服务费'].sum(), df_new_ms[u'应收贷后服务费'].sum(), df_others[u'应收贷后服务费'].sum()
    print '实收贷后', df_ref[u'实还贷后服务费'].sum(), df_ref[u'服务费减免金额'].sum(), df_ms[u'实还贷后服务费'].sum()
    # 根据年、月以及ref_id或loan_id去merge，这里期数已经没有意义
    # df_ref = df_ref.ix[df_ref.ref_id.isin(df_others.ref_id)]
    print df_ref[[u'实还贷后服务费', u'服务费减免金额']].sum()
    df1 = pd.merge(df_others, df_ref, how='outer', on=[u'年', u'月', 'ref_id'])
    df1.fillna(0, inplace=True)
    df1 = df1[[u'年', u'月', 'ref_id', u'应收贷后服务费', u'实还贷后服务费', u'服务费减免金额']]
    df1[u'实还贷后服务费'] = df1[u'实还贷后服务费'] + df1[u'服务费减免金额']
    # df1.to_csv(path + 'non_new_ms.csv', index=None, encoding='utf8')

    # 新马上用的是loan_id
    df2 = pd.merge(df_new_ms, df_ms, how='outer', on=[u'年', u'月', 'loan_id'])
    df2.fillna(0, inplace=True)
    df2 = df2[[u'年', u'月', 'loan_id', u'应收贷后服务费', u'实还贷后服务费', u'服务费减免金额']]
    # 为了保持一致，重命名一下，但是不能把这两类放在一起处理，ref_id 和 loan_id 是有重叠的
    df2.rename(columns={'loan_id': 'ref_id'}, inplace=True)
    # df2.to_csv(path + 'new_ms.csv', index=None, encoding='utf8')

    print df1[u'应收贷后服务费'].sum(), df2[u'应收贷后服务费'].sum()
    print df1[u'实还贷后服务费'].sum(), df2[u'实还贷后服务费'].sum()
    # 做key
    df1[u'年月'] = df1.apply(lambda x: '%d%02d' % (x[u'年'], x[u'月']), axis=1)
    df2[u'年月'] = df2.apply(lambda x: '%d%02d' % (x[u'年'], x[u'月']), axis=1)

    # 用透视表来做，处理之前要确保index唯一（这里是ref_id），新马上有一个还款计划有问题，执行会出错，需要单独处理一下

    df = df2
    fill_name = path + 'new_ms_pivot_'
    df = df.drop_duplicates()
    df.ix[(df['ref_id'] == 20109145) & (df[u'年月'] == '201610'), u'应收贷后服务费'] = 48
    if len(df.ix[df[['ref_id', u'年月']].duplicated()]) > 0:
        ref_id = df.ix[df[['ref_id', u'年月']].duplicated(), 'ref_id']
        print df.ix[df.ref_id.isin(ref_id)]
        raise ValueError('新马上数据有异常')

    df_req_fee = df.pivot(u'ref_id', u'年月', u'应收贷后服务费')
    df_req_fee.fillna(0, inplace=True)
    df_req_fee.to_csv(fill_name + 'req.csv', encoding='utf8')
    df_real_fee = df.pivot(u'ref_id', u'年月', u'实还贷后服务费')
    df_real_fee.fillna(0, inplace=True)
    df_real_fee.to_csv(fill_name + 'real.csv', encoding='utf8')

    df = df1
    fill_name = path + 'non_new_ms_pivot_'
    if len(df.ix[df[['ref_id', u'年月']].duplicated()]) > 0:
        ref_id = df.ix[df[['ref_id', u'年月']].duplicated(), 'ref_id']
        print df.ix[df.ref_id.isin(ref_id)]
        raise ValueError('非新马上数据有异常')
    df_req_fee = df.pivot(u'ref_id', u'年月', u'应收贷后服务费')
    df_req_fee.fillna(0, inplace=True)
    df_req_fee.to_csv(fill_name + 'req.csv', encoding='utf8')
    df_real_fee = df.pivot(u'ref_id', u'年月', u'实还贷后服务费')
    df_real_fee.fillna(0, inplace=True)
    df_real_fee.to_csv(fill_name + 'real.csv', encoding='utf8')


# 汇总数据，应该可以继续优化
def cal_fee():
    other_real = pd.read_csv(path + 'non_new_ms_pivot_real.csv', encoding='utf8')
    other_req = pd.read_csv(path + 'non_new_ms_pivot_req.csv', encoding='utf8')
    ms_real = pd.read_csv(path + 'new_ms_pivot_real.csv', encoding='utf8')
    ms_real.rename(columns={'ref_id': 'loan_id'}, inplace=True)
    ms_req = pd.read_csv(path + 'new_ms_pivot_req.csv', encoding='utf8')
    ms_req.rename(columns={'ref_id': 'loan_id'}, inplace=True)

    all_req = pd.concat([other_req, ms_req])
    all_req.fillna(0, inplace=True)

    all_real = pd.concat([other_real, ms_real])
    all_real.fillna(0, inplace=True)

    # all_req.to_csv(path + 'all_req.csv', index=None, encoding='utf8')
    # all_real.to_csv(path + 'all_real.csv', index=None, encoding='utf8')
    all_req.drop(['loan_id', 'ref_id'], axis=1, inplace=True)
    all_real.drop(['loan_id', 'ref_id'], axis=1, inplace=True)
    print all_req.sum(), all_real.sum()

    col = all_real.columns
    for i in xrange(len(col)):
        name = col[i]
        if i == 0:
            continue
        else:
            all_real[name] = all_real[col[i]] + all_real[col[i - 1]]
            all_req[name] = all_req[col[i]] + all_req[col[i - 1]]


    df_E = all_req - all_real
    df_F = all_req - all_real

    for name in col:
        df_E.ix[df_E[name] < 0, name] = 0
        df_F.ix[df_F[name] > 0, name] = 0
    E = df_E.sum().to_frame()
    E.columns = ['E']
    F = df_F.sum().to_frame()
    print df_E.sum(), df_F.sum()
    F.columns = ['F']
    df = pd.merge(E, F, left_index=True, right_index=True)
    df.to_excel(path + 'EF_no_ABC_1.xlsx')


sql_class = """SELECT * FROM loan_class WHERE loan_id"""


def calc_fee2():
    df_class = pd.read_csv(path + 'loan_class_all.csv', encoding='utf8')
    df_class.loan_id = df_class.loan_id.astype(str)
    df_class.ref_id = df_class.ref_id.astype(int)

    other_real = pd.read_csv(path + 'non_new_ms_pivot_real.csv', encoding='utf8')
    other_real.ref_id = other_real.ref_id.astype(int)
    print 'other_real before', len(other_real)
    other_real = pd.merge(other_real, df_class, on='ref_id', how='left')
    print 'other_real after', len(other_real)

    other_req = pd.read_csv(path + 'non_new_ms_pivot_req.csv', encoding='utf8')
    other_req.ref_id = other_req.ref_id.astype(int)
    print 'other_req before', len(other_req)
    other_req = pd.merge(other_req, df_class, on='ref_id', how='left')
    print 'other_req after', len(other_req)

    ms_real = pd.read_csv(path + 'new_ms_pivot_real.csv', encoding='utf8')
    ms_real.rename(columns={'ref_id': 'loan_id'}, inplace=True)
    ms_real.loan_id = ms_real.loan_id.astype(str)
    print 'ms_real before', len(ms_real)
    ms_real = pd.merge(ms_real, df_class, on='loan_id', how='left')
    print 'ms_real after', len(ms_real)

    ms_req = pd.read_csv(path + 'new_ms_pivot_req.csv', encoding='utf8')
    ms_req.rename(columns={'ref_id': 'loan_id'}, inplace=True)
    ms_req.loan_id = ms_req.loan_id.astype(str)
    print 'ms_req before', len(ms_req)
    ms_req = pd.merge(ms_req, df_class, on='loan_id', how='left')
    print 'ms_req after', len(ms_req)

    all_req = pd.concat([other_req, ms_req])
    all_real = pd.concat([other_real, ms_real])
    all_req.to_csv(path + 'all_req_ABC.csv', index=None, encoding='utf8')
    all_real.to_csv(path + 'all_real_ABC.csv', index=None, encoding='utf8')
    # all_req = pd.read_csv(path + 'all_req_ABC.csv', encoding='utf8')
    # all_real = pd.read_csv(path + 'all_real_ABC.csv', encoding='utf8')
    all_req.drop(['loan_id', 'ref_id'], inplace=True, axis=1)
    all_req.fillna(0, inplace=True)
    all_real.drop(['loan_id', 'ref_id'], inplace=True, axis=1)
    all_real.fillna(0, inplace=True)
    col = list(all_req.columns)
    col.remove('class')

    print '应还', all_req[col].sum().sum()
    print '实还', all_real[col].sum().sum()

    for i in xrange(len(col)):
        name = col[i]
        if i == 0:
            continue
        else:
            all_req[name] = all_req[col[i]] + all_req[col[i - 1]]
            all_real[name] = all_real[col[i]] + all_real[col[i - 1]]
    df_E = pd.DataFrame(index=all_req.index, columns=all_req.columns)
    df_F = pd.DataFrame(index=all_req.index, columns=all_req.columns)
    for name in all_req.columns:
        if name != 'class':
            df_E[name] = all_req[name] - all_real[name]
            df_F[name] = all_req[name] - all_real[name]
        else:
            df_E[name] = all_req[name]
            df_F[name] = all_req[name]
    for name in col:
        df_E.ix[df_E[name] < 0, name] = 0
        df_F.ix[df_F[name] > 0, name] = 0
    E = df_E.groupby(['class']).agg(sum).T
    F = df_F.groupby(['class']).agg(sum).T
    E['E_ALL'] = E['A'] + E['B'] + E['C']
    E.rename(columns={'A': 'E_A', 'B': 'E_B', 'C': 'E_C'}, inplace=True)
    F['F_ALL'] = F['A'] + F['B'] + F['C']
    F.rename(columns={'A': 'F_A', 'B': 'F_B', 'C': 'F_C'}, inplace=True)
    EF = pd.merge(E, F, left_index=True, right_index=True)
    EF.to_excel(path + 'EF_ABC_1.xlsx')


# 注意顺序，输入输出的文件依赖
if __name__ == '__main__':
    get_plan()
    get_ref()
    get_new_ms()
    process_data()
    cal_fee()
    # calc_fee2()
