# -*- encoding: utf8 -*-

"""
截止未还服务费
出明细，带分类ABC

注意：
1. 放款表订单状态的筛选
2. 新马上和非新马上分开出

Author:
  Ma Zhen(zhen.ma@quantgroup.cn)
"""
import pandas as pd
from sqlalchemy import create_engine
import datetime
import numpy as np

pd.options.mode.chained_assignment = None
pd.set_option('display.float_format', lambda x: '%.5f' % x)
path = u'E:/财务/tmp/'
path_out = u'E:/财务/'

engine_audit = create_engine(
    'mysql+mysqldb://internal_r:ArbNgtvlJzZHXsEu@172.16.3.201:3306/test_8_2?charset=utf8',
    echo=True)

step = 10000
columns = [u'应收贷前服务费', u'实还贷前服务费', u'应收贷后服务费', u'实还贷后服务费', u'F', u'no_repay_fee', u'贷前服务费减免金额', u'贷后服务费减免金额']

sql_plan = """
SELECT 
  t2.ref_id,
  t2.loan_id,
  t1.id plan_id,
  t1.term_no,
  t1.fund_code,
  t4.service_fee1 应收贷前服务费,
  t4.service_fee2 应收贷后服务费,
  t1.deadline,
  t2.loan_paid_at,
  t3.class
FROM
  user_repayment_plan t1 
  JOIN loan_manifest t2 
    ON t1.ref_id = t2.ref_id 
AND t2.is_active IN (1, -2) AND t2.loan_paid_at < '%s'
JOIN loan_class t3 ON t3.ref_id = t2.ref_id
join test_mzh.user_repayment_plan_service_fee t4 on t4.plan_id = t1.id;
"""

sql_ref = """
SELECT t1.plan_id, t2.repaid_at, t3.service_fee1 实还贷前服务费, 
t3.service_fee2 实还贷后服务费, 
t3.mitigate_service_fee1 贷前服务费减免金额, t3.mitigate_service_fee2 贷后服务费减免金额
FROM xjd_repay_plan_repay_record_ref t1 JOIN xjd_repay_plan_repay_record_ref_repay_date t2 ON t2.xjd_ref_id = t1.id 
JOIN test_mzh.xjd_repay_plan_repay_record_ref_service_fee t3 ON t3.xjd_ref_id = t1.id
"""

sql_new_ms = """
 SELECT 
  t3.loan_id, t2.term_no,
  t2.repaid_at,
  t2.service_fee1 实还贷前服务费,
  t2.service_fee2 实还贷后服务费,
   0 贷前服务费减免金额, 0 贷后服务费减免金额
FROM
  new_ms_transaction_with_term_details t2 
  JOIN loan_manifest t3 
    ON t3.funding_code = 160 
    AND t3.loan_id = t2.loan_id 
    AND t2.repaid_at < '%s' 
    AND t3.is_active IN (1, -2)
  JOIN user_repayment_plan t1 ON t1.ref_id = t3.ref_id AND t1.term_no = t2.term_no
  join test_mzh.new_ms_transaction_with_term_details_service_fee t1 on t1.ms_trans_id = t2.id;
"""

sql_detail = """
SELECT ref_id, loan_id, term_no, no_repay_fee FROM service_fee_detail_%s
"""

sql_repay = """
SELECT 
  t3.ref_id,
  t3.term_no,
  t1.service_fee - t1.mitigate_service_fee service_fee_repay_2017,
  t2.repaid_at ,
  concat(date(t2.repaid_at), '还款服务费', round(t1.service_fee - t1.mitigate_service_fee, 2)) remark
FROM
  new_transaction.xjd_repay_plan_repay_record_ref t1 
  JOIN temp.xjd_repay_plan_repay_record_ref_repay_date t2 
    ON t2.xjd_ref_id = t1.id 
    AND t2.repaid_at >= '2017-01-01 00:00:00' 
     and t2.repaid_at < '2017-06-01 00:00:00'
  JOIN user_repayment_plan t3 
    ON t3.id = t1.plan_id AND t3.fund_code <> 160
    AND t1.service_fee - t1.mitigate_service_fee > 0
    AND t3.ref_id IN %s ;
"""

sql_ms = """
SELECT 
  t2.ref_id,
  t1.term_no,
  t1.qg_service_fee service_fee_repay_2017,
  t1.repaid_at ,
  concat(date(t1.repaid_at), '还款服务费',  round(t1.qg_service_fee, 2)) remark
FROM
  temp.new_ms_transaction_with_term_details t1 
  JOIN user_repayment_plan t2 ON t2.real_loan_id = t1.loan_id  AND t1.term_no = t2.term_no
  AND t1.qg_service_fee > 0
  AND t2.fund_code = 160 AND t1.repaid_at >= '2017-01-01 00:00:00'  
"""


# 获取15-16年全部的应还贷后服务费明细
def get_plan(watch_day):
    df = pd.read_sql(sql_plan % (watch_day,), con=engine_audit, chunksize=step)
    df_plan = pd.DataFrame()
    for tmp in df:
        df_plan = df_plan.append(tmp)
    if len(df_plan) > 0:
        df_plan.to_csv(path + 'plan_%s_%s.csv' % (watch_day.year, watch_day.month), index=None, encoding='utf-8')


# 获取除新马上之外的其他所有的实还贷后服务费
def get_ref(watch_day):
    df_ref = pd.DataFrame()
    df = pd.read_sql(sql_ref, con=engine_audit, chunksize=step)
    for tmp in df:
        df_ref = df_ref.append(tmp)
    df_ref.fillna(0, inplace=True)
    df_ref.to_csv(path + 'ref_%s_%s.csv' % (watch_day.year, watch_day.month), index=None, encoding='utf-8')


# 获取新马上的实还贷后服务费
def get_new_ms(watch_day):
    df = pd.read_sql(sql_new_ms % (watch_day,), con=engine_audit, chunksize=step)
    df_new_ms = pd.DataFrame()
    for tmp in df:
        df_new_ms = df_new_ms.append(tmp)
    if len(df_new_ms) > 0:
        df_new_ms.to_csv(path + 'new_ms_%s_%s.csv' % (watch_day.year, watch_day.month), index=None, encoding='utf-8')


def get_details(watch_day):
    df = pd.read_sql(sql_detail % (watch_day.year - 1,), con=engine_audit, chunksize=step)
    df_details = pd.DataFrame()
    for tmp in df:
        df_details = df_details.append(tmp)
    if len(df_details) > 0:
        df_details.to_csv(path + 'df_details_%s_%s.csv' % (watch_day.year, watch_day.month), index=None, encoding='utf-8')


def handle_non_ms(watch_day):
    df_plan = pd.read_csv(path + 'plan_2017_1.csv', encoding='utf8')
    print len(df_plan)
    # print df_plan[[u'应收贷前服务费', u'应收贷后服务费']].sum()
    df_plan['deadline'] = pd.to_datetime(df_plan['deadline'])
    df_plan['loan_paid_at'] = pd.to_datetime(df_plan['loan_paid_at'])
    df_plan = df_plan.ix[df_plan.loan_paid_at < watch_day]
    print '应收贷前服务费', df_plan[u'应收贷前服务费'].sum()
    print '应收贷后服务费', df_plan.ix[df_plan.deadline < watch_day, u'应收贷后服务费'].sum()

    df_ref = pd.read_csv(path + 'ref_2017_1.csv', encoding='utf8')
    print '去掉新马上之前', len(df_ref)
    print df_ref[[u'实还贷前服务费', u'实还贷后服务费']].sum()
    df_ref['repaid_at'] = pd.to_datetime(df_ref['repaid_at'])
    df_no_ms = df_plan.ix[df_plan.fund_code != 160]
    df_ref_1 = df_ref.ix[df_ref.plan_id.isin(df_no_ms.plan_id)]
    print '去掉新马上之后', len(df_ref_1)
    print df_ref_1[[u'实还贷前服务费', u'实还贷后服务费']].sum()

    df_ref_2016 = df_ref_1.ix[df_ref_1.repaid_at < watch_day]
    print 'ref表长度：', len(df_ref_2016)
    print 'before merge'
    print df_ref_2016[[u'实还贷前服务费', u'实还贷后服务费']].sum()
    df_ref_2016 = df_ref_2016.groupby(['plan_id']).agg(
        {u'实还贷前服务费': 'sum', u'实还贷后服务费': 'sum', u'贷前服务费减免金额': 'sum', u'贷后服务费减免金额': 'sum'}).reset_index()
    df_plan_ref_2016 = pd.merge(df_no_ms, df_ref_2016, on='plan_id', how='left')
    df_plan_ref_2016.fillna(0, inplace=True)
    print 'after merge'
    print df_plan_ref_2016[[u'实还贷前服务费', u'实还贷后服务费']].sum()

    con1 = df_plan_ref_2016['deadline'] > watch_day
    df_plan_ref_2016[u'应收贷后服务费deadline'] = df_plan_ref_2016[u'应收贷后服务费']
    df_plan_ref_2016.ix[con1, u'应收贷后服务费deadline'] = 0

    df_plan_ref_2016['E'] = 0
    df_plan_ref_2016['F'] = 0
    filter_condition = df_plan_ref_2016[u'应收贷后服务费'] - df_plan_ref_2016[u'实还贷后服务费'] - df_plan_ref_2016[u'贷后服务费减免金额'] > 0
    df_plan_ref_2016.ix[filter_condition, 'E'] = df_plan_ref_2016.ix[filter_condition, u'应收贷后服务费deadline'] - \
                                                 df_plan_ref_2016.ix[filter_condition, u'实还贷后服务费'] - \
                                                 df_plan_ref_2016.ix[filter_condition, u'贷后服务费减免金额']
    df_plan_ref_2016.ix[~filter_condition, 'F'] = df_plan_ref_2016.ix[~filter_condition, u'应收贷后服务费deadline'] - \
                                                  df_plan_ref_2016.ix[~filter_condition, u'实还贷后服务费'] - \
                                                  df_plan_ref_2016.ix[~filter_condition, u'贷后服务费减免金额']

    df_plan_ref_2016['no_repay_fee'] = 0

    df_plan_ref_2016['no_repay_fee'] = df_plan_ref_2016[u'应收贷前服务费'] - df_plan_ref_2016[u'实还贷前服务费'] - df_plan_ref_2016[
        u'贷前服务费减免金额'] + \
                                       df_plan_ref_2016[u'应收贷后服务费deadline'] - df_plan_ref_2016[u'实还贷后服务费'] - \
                                       df_plan_ref_2016[u'贷后服务费减免金额'] - df_plan_ref_2016['F']
    print df_plan_ref_2016[[u'应收贷前服务费', u'应收贷后服务费deadline', u'实还贷前服务费', u'贷前服务费减免金额', u'实还贷后服务费', u'贷后服务费减免金额']].sum()
    df_plan_ref_2016.to_csv(path + 'summary_%s_%s.csv' % (watch_day.year, watch_day.month), encoding='utf8')
    print df_plan_ref_2016[columns].sum()


def handle_ms(watch_day):
    df_plan = pd.read_csv(path + 'plan_2017_1.csv', encoding='utf8')
    df_plan['deadline'] = pd.to_datetime(df_plan['deadline'])
    df_plan['loan_paid_at'] = pd.to_datetime(df_plan['loan_paid_at'])
    df_ms_plan = df_plan.ix[(df_plan.loan_paid_at < watch_day) & (df_plan.fund_code == 160)]
    print df_ms_plan[[u'应收贷前服务费', u'应收贷后服务费']].sum()

    df_ms_repay = pd.read_csv(path + 'new_ms_2017_1.csv', encoding='utf8')
    # print '去掉新马上之前', len(df_ref)
    print df_ms_repay[[u'实还贷前服务费', u'实还贷后服务费']].sum()
    df_ms_repay['repaid_at'] = pd.to_datetime(df_ms_repay['repaid_at'])
    df_ms_repay = df_ms_repay.groupby(['loan_id', 'term_no']).agg(
        {u'实还贷前服务费': 'sum', u'实还贷后服务费': 'sum', u'贷前服务费减免金额': 'sum', u'贷后服务费减免金额': 'sum'}).reset_index()
    print 'before merge'
    print df_ms_repay[[u'实还贷前服务费', u'实还贷后服务费']].sum()
    df_ms_plan_ref = pd.merge(df_ms_plan, df_ms_repay, on=['loan_id', 'term_no'], how='left')
    print len(df_ms_plan_ref), len(df_ms_plan), len(df_ms_repay)
    df_ms_plan_ref.fillna(0, inplace=True)
    print 'after merge'
    print df_ms_plan_ref[[u'实还贷前服务费', u'实还贷后服务费']].sum()

    con1 = df_ms_plan_ref['deadline'] > watch_day
    df_ms_plan_ref[u'应收贷后服务费deadline'] = df_ms_plan_ref[u'应收贷后服务费']
    df_ms_plan_ref.ix[con1, u'应收贷后服务费deadline'] = 0

    df_ms_plan_ref['E'] = 0
    df_ms_plan_ref['F'] = 0
    filter_condition = df_ms_plan_ref[u'应收贷后服务费deadline'] - df_ms_plan_ref[u'实还贷后服务费'] - df_ms_plan_ref[
        u'贷后服务费减免金额'] > 0
    df_ms_plan_ref.ix[filter_condition, 'E'] = df_ms_plan_ref.ix[filter_condition, u'应收贷后服务费deadline'] - \
                                               df_ms_plan_ref.ix[filter_condition, u'实还贷后服务费'] - df_ms_plan_ref.ix[
                                                   filter_condition, u'贷后服务费减免金额']

    df_ms_plan_ref.ix[~filter_condition, 'F'] = df_ms_plan_ref.ix[~filter_condition, u'应收贷后服务费deadline'] - \
                                                df_ms_plan_ref.ix[~filter_condition, u'实还贷后服务费'] - df_ms_plan_ref.ix[
                                                    ~filter_condition, u'贷后服务费减免金额']
    df_ms_plan_ref['no_repay_fee'] = 0
    df_ms_plan_ref['no_repay_fee'] = df_ms_plan_ref[u'应收贷前服务费'] - df_ms_plan_ref[u'实还贷前服务费'] - df_ms_plan_ref[
        u'贷前服务费减免金额'] + df_ms_plan_ref[u'应收贷后服务费deadline'] - df_ms_plan_ref[u'实还贷后服务费'] - df_ms_plan_ref[
                                         u'贷后服务费减免金额'] - df_ms_plan_ref['F']

    df_ms_plan_ref.to_csv(path + 'summary_ms_%s_%s.csv' % (watch_day.year, watch_day.month), encoding='utf8')
    print df_ms_plan_ref[columns].sum()


def last_process(watch_day):
    df = pd.read_csv(path + 'summary_%s_%s.csv' % (watch_day.year, watch_day.month), encoding='utf8', low_memory=False)

    df[[u'应收贷前服务费', u'应收贷后服务费', u'应收贷后服务费deadline', u'实还贷前服务费', u'实还贷后服务费', u'贷前服务费减免金额', u'贷后服务费减免金额', u'E', u'F',
        u'no_repay_fee']].sum()
    if watch_day > datetime.datetime(2016, 4, 1):
        df2 = pd.read_csv(path + 'summary_ms_%s_%s.csv' % (watch_day.year, watch_day.month), encoding='utf8',
                          low_memory=False)
        df = pd.concat([df, df2])
    df = df[
        [u'loan_id', u'ref_id', u'term_no', u'plan_id', u'应收贷前服务费', u'应收贷后服务费', u'应收贷后服务费deadline', u'deadline',
         u'实还贷前服务费', u'实还贷后服务费', u'贷前服务费减免金额', u'贷后服务费减免金额', u'E', u'F', u'no_repay_fee', u'class']]
    print df[
        [u'应收贷前服务费', u'应收贷后服务费', u'应收贷后服务费deadline', u'实还贷前服务费', u'实还贷后服务费', u'贷前服务费减免金额', u'贷后服务费减免金额', u'E', u'F',
         u'no_repay_fee']].sum()
    df.rename(columns={'no_repay_fee': u'未还服务费'}, inplace=True)
    df.to_csv(path + 'all_details_%s_%s.csv' % (watch_day.year, watch_day.month),
              index=None, encoding='utf8')
    df[u'未还服务费'] = np.round(df[u'未还服务费'], 6)
    loan_ids = df.ix[df[u'未还服务费'] < 0, 'loan_id']
    df = df.ix[(df[u'未还服务费'] <> 0) & (~df['loan_id'].isin(loan_ids))]
    df.to_csv(path + 'no_repay_fee_details_%s_%s.csv' % (watch_day.year, watch_day.month), index=None, encoding='utf8')
    df[['loan_id', 'term_no', u'未还服务费']].to_excel(
        path_out + u'截至%s-%s-%s未还的服务费明细.xlsx' % (watch_day.year, watch_day.month, watch_day.day), index=None)


def get_repay_after(watch_day):
    df_all = pd.read_csv(path + 'no_repay_fee_details_%s_%s.csv' % (watch_day.year, watch_day.month), encoding='utf8')
    df_all.deadline = pd.to_datetime(df_all.deadline)
    ref_ids = df_all.ref_id.drop_duplicates().astype(str).tolist()
    res = []
    for i in xrange(0, len(ref_ids), step):
        df = pd.read_sql(sql=sql_repay % (str(tuple(ref_ids[i:i + step])).replace(',)', ')'),), con=engine_audit)
        res.append(df)
    df_repay = pd.concat(res)
    df_ms = pd.read_sql(sql=sql_ms, con=engine_audit)
    df_repay = pd.concat([df_repay, df_ms])
    gp_repay = pd.pivot_table(df_repay, index=['ref_id', 'term_no'], values=['service_fee_repay_2017', 'remark'],
                              aggfunc={'service_fee_repay_2017': np.sum,
                                       'remark': lambda x: ';'.join(list(x))}).reset_index()
    df_all_2017 = pd.merge(df_all, gp_repay, on=['ref_id', 'term_no'], how='left')
    df_all_2017.fillna(0, inplace=True)
    cond = (df_all_2017.deadline >= watch_day) & (df_all_2017.service_fee_repay_2017 > 0)
    df_all_2017.ix[cond, 'service_fee_repay_2017'] = np.minimum(df_all_2017.ix[cond, 'service_fee_repay_2017'],
                                                                    df_all_2017.ix[cond, u'应收贷前服务费'])

    df_all_2017.rename(columns={'service_fee_repay_2017': u'之后还款服务费'}, inplace=True)

    # df_all_2017.to_csv(path + 'service_fee_handled.csv', index=None, encoding='utf8')

    df_all_2017[['loan_id', 'term_no', u'未还服务费', u'之后还款服务费']].to_excel(path_out + u'截至时间之后还款的服务费.xlsx', index=None)


# 注意顺序，输入输出的文件依赖
# todo: 有两个观测时间点：2016-1-1和2017-1-1，所以应该是出两份数据
# todo: 可以一次性把所有数据都读出来保存在文件里，用的时候进行筛选
if __name__ == '__main__':
    # 获取数据，直接取全量的
    get_plan(datetime.datetime(2017, 1, 1))
    get_ref(datetime.datetime(2017, 1, 1))
    get_new_ms(datetime.datetime(2017, 1, 1))
    # # # 根据时间点进行处理
    handle_non_ms(datetime.datetime(2016, 1, 1))
    handle_non_ms(datetime.datetime(2017, 1, 1))
    handle_ms(datetime.datetime(2017, 1, 1))
    # # #
    # last_process(datetime.datetime(2016, 1, 1))
    # last_process(datetime.datetime(2017, 1, 1))
    # insert_into_sql(datetime.datetime(2016, 1, 1))
    # insert_into_sql(datetime.datetime(2017, 1, 1))

    get_repay_after(datetime.datetime(2017, 1, 1))
