# coding: utf-8

# In[1]:


import sys

# 加入父级搜索路径，根据情况修改
sys.path.insert(0, '../..')

# In[2]:


import pandas as pd
import numpy as np
import datetime
import os
from api import db_connect

reload(db_connect)
# 引入数据库连接，并使修改生效
pd.options.mode.chained_assignment = None
pd.set_option('display.float_format', lambda x: '%.5f' % x)
engine_audit = db_connect.sql_engine(db_connect.audit, 'temp', False).get_engine()

# In[3]:


today = datetime.datetime.now()
date_str = today.strftime('%Y%m%d')
# 主程序代码所在的路径
base_path = os.getcwd()
# 输出文件路径
file_path = os.path.join(base_path, date_str)
# 临时文件路径
tmp_path = os.path.join(base_path, date_str, 'temp')
# 明细文件路径
detail_path = os.path.join(base_path, date_str, 'details')
if not os.path.exists(file_path):
    os.makedirs(file_path)
if not os.path.exists(tmp_path):
    os.makedirs(tmp_path)
if not os.path.exists(detail_path):
    os.makedirs(detail_path)

# ----
# # 汇总统计

# In[68]:


# 总服务费、计提风险金、应收贷前服务费
sql_loan = """select 
  date_format(t2.loan_paid_at, '%%Y-%%m') 年月,
  t1.class 类别,
  sum(t2.all_service_fee) 总应收服务费,
  sum(t1.bad_debt_fee) 计提风险金总额,
  sum(t1.service_fee1) 应收贷前服务费 
from
  test_mzh.loan_manifest_service_fee_0722 t1 
  join temp.loan_manifest t2 
    on t2.id = t1.lm_id 
    and t2.loan_paid_at >= '2015-01-01 00:00:00' 
    and t2.loan_paid_at < '2017-01-01 00:00:00' 
group by 1, 2 ;"""
# 应收贷后服务费，按每期deadline和类别分类统计
sql_plan = """
select 
  date_format(t2.deadline, '%%Y-%%m') 年月,
  t1.class 类别,
  sum(t1.service_fee2) 应收贷后服务费 
from
  test_mzh.user_repayment_plan_service_fee_0722 t1 
  join temp.user_repayment_plan t2 
    on t2.id = t1.plan_id 
    and t2.deadline >= '2015-01-01 00:00:00' 
    and t2.deadline < '2017-01-01 00:00:00' 
  join temp.loan_manifest t3 
    on t3.ref_id = t2.ref_id 
group by 1,2;
"""
# 实还贷前、贷后服务费，及服务费减免，不含新马上
sql_ref = """
select 
  date_format(t3.repaid_at, '%%Y-%%m') 年月,
  t5.class 类别,
  sum(t1.service_fee1) 实还贷前服务费,
  sum(t1.service_fee2) 实还贷后服务费,
  sum(t1.mitigate_service_fee1) 减免贷前服务费,
  sum(t1.mitigate_service_fee2) 减免贷后服务费 
from
  test_mzh.xjd_repay_plan_repay_record_ref_service_fee_0722 t1 
  join temp.xjd_repay_plan_repay_record_ref t2 
    on t2.id = t1.xjd_ref_id 
  join temp.xjd_repay_plan_repay_record_ref_repay_date t3 
    on t3.xjd_ref_id = t2.id 
    and t3.repaid_at >= '2015-01-01 00:00:00' 
    and t3.repaid_at < '2017-01-01 00:00:00' 
  join temp.loan_manifest t4 
    on t4.ref_id = t1.ref_id 
    and t4.funding_code <> 160 
  join temp.loan_class t5 
    on t5.ref_id = t1.ref_id 
group by 1, 2 ;
"""
# 新马上实还
sql_ms = """
select 
  date_format(t2.repaid_at, '%%Y-%%m') 年月,
  t4.class 类别,
  sum(t1.service_fee1) 实还贷前服务费,
  sum(t1.service_fee2) 实还贷后服务费,
  sum(t1.mitigate_service_fee1) 减免贷前服务费,
  sum(t1.mitigate_service_fee2) 减免贷后服务费 
from
  test_mzh.new_ms_transaction_with_term_details_service_fee_0722 t1 
  join test_8_2.new_ms_transaction_with_term_details t2 
    on t2.id = t1.ms_trans_id 
    and t2.repaid_at >= '2015-01-01 00:00:00' 
    and t2.repaid_at < '2017-01-01 00:00:00' 
  join temp.user_repayment_plan t3 
    on t3.id = t1.plan_id 
    and t3.fund_code = 160 
  join temp.loan_class t4 
    on t4.ref_id = t1.ref_id 
group by 1, 2 ;
"""

# In[69]:


# df1 = pd.read_sql(sql=sql_loan, con=engine_audit)
df2 = pd.read_sql(sql=sql_plan, con=engine_audit)
# df3 = pd.read_sql(sql=sql_ref, con=engine_audit)
# df4 = pd.read_sql(sql=sql_ms, con=engine_audit)


# In[70]:


df34 = pd.concat([df3, df4])
gp = df34.groupby([u'年月', u'类别'])[[u'实还贷前服务费', u'实还贷后服务费', u'减免贷前服务费', u'减免贷后服务费']].agg('sum').reset_index()
df = df1.merge(df2, on=[u'年月', u'类别'], how='outer').merge(gp, on=[u'年月', u'类别'], how='outer')
df.fillna(0, inplace=True)
df_pivot = df.pivot_table(index=u'年月', columns=u'类别',
                          values=[u'总应收服务费', u'计提风险金总额', u'应收贷前服务费', u'应收贷后服务费', u'实还贷前服务费', u'实还贷后服务费', u'减免贷前服务费',
                                  u'减免贷后服务费'], fill_value=0)
# df_pivot.to_excel(os.path.join(file_path ,  u'现金贷服务费分类汇总.xlsx'))


# ---
# # 根据明细计算EF列

# In[7]:


# 取明细计算EF列
sql_detail_plan = """
select 
  t2.fund_code,
  date(t3.loan_paid_at) loan_paid_at, 
  date(t2.deadline) deadline,
  date_format(t2.deadline,'%%Y-%%m') 年月,
  t2.ref_id,
  t1.plan_id,
  t2.term_no,
  t1.service_fee1 应收贷前服务费,
  t1.service_fee2 应收贷后服务费
from
  test_mzh.user_repayment_plan_service_fee_0722 t1 
  join temp.user_repayment_plan t2 
    on t2.id = t1.plan_id 
  join temp.loan_manifest t3 
    on t3.ref_id = t2.ref_id 
    and t3.loan_paid_at >= '2015-01-01 00:00:00' 
    and t3.loan_paid_at < '2017-01-01 00:00:00';
"""
sql_detail_ref = """
select 
  t1.ref_id,
  t1.plan_id,
  date(t3.repaid_at) repaid_at,
  date_format(t3.repaid_at,'%%Y-%%m') 年月,
  t1.service_fee1 实还贷前服务费,
  t1.service_fee2 实还贷后服务费,
  t1.mitigate_service_fee1 减免贷前服务费,
  t1.mitigate_service_fee2 减免贷后服务费 
from
  temp.xjd_repay_plan_repay_record_ref_repay_date t3 
  left join test_mzh.xjd_repay_plan_repay_record_ref_service_fee_0722 t1 
    on t3.xjd_ref_id = t1.xjd_ref_id 
    and t3.repaid_at >= '2015-01-01 00:00:00' 
    and t3.repaid_at < '2017-01-01 00:00:00' 
  join temp.loan_manifest t4 
    on t4.ref_id = t1.ref_id 
    and t4.funding_code <> 160 ;
"""
sql_detail_ms = """
select 
  t1.ref_id,
  t1.plan_id,
  date(t2.repaid_at) repaid_at,
  date_format(t2.repaid_at,'%%Y-%%m') 年月,
  t1.service_fee1 实还贷前服务费,
  t1.service_fee2 实还贷后服务费,
  t1.mitigate_service_fee1 减免贷前服务费,
  t1.mitigate_service_fee2 减免贷后服务费 
from
  test_mzh.new_ms_transaction_with_term_details_service_fee_0722 t1 
  join test_8_2.new_ms_transaction_with_term_details t2 
    on t2.id = t1.ms_trans_id 
    and t2.repaid_at >= '2015-01-01 00:00:00' 
    and t2.repaid_at < '2017-01-01 00:00:00' 
  join temp.user_repayment_plan t3 
    on t3.id = t1.plan_id 
    and t3.fund_code = 160 ;
"""
sql_ref_class = """
select ref_id, loan_id, class from temp.loan_class
"""

# In[8]:


step = 10000
df_plan_gen = pd.read_sql(sql=sql_detail_plan, con=engine_audit, chunksize=step)
df_plan_detail = pd.DataFrame()
for tmp in df_plan_gen:
    df_plan_detail = df_plan_detail.append(tmp)
df_plan_detail.to_csv(os.path.join(file_path, 'plan_detail.csv'), index=None, encoding='utf8')

df_ref_gen = pd.read_sql(sql=sql_detail_ref, con=engine_audit, chunksize=step)
df_ref_detail = pd.DataFrame()
for tmp in df_ref_gen:
    df_ref_detail = df_ref_detail.append(tmp)
df_ref_detail.to_csv(os.path.join(file_path, 'ref_detail.csv'), index=None, encoding='utf8')

df_ms_gen = pd.read_sql(sql=sql_detail_ms, con=engine_audit, chunksize=step)
df_ms_detail = pd.DataFrame()
for tmp in df_ms_gen:
    df_ms_detail = df_ms_detail.append(tmp)
df_ms_detail.to_csv(os.path.join(file_path, 'ms_detail.csv'), index=None, encoding='utf8')

df_class_gen = pd.read_sql(sql=sql_ref_class, con=engine_audit, chunksize=step)
df_ref_class = pd.DataFrame()
for tmp in df_class_gen:
    df_ref_class = df_ref_class.append(tmp)
df_ref_class.to_csv(os.path.join(file_path, 'ref_class.csv'), index=None, encoding='utf8')

df_plan_detail = pd.read_csv(file_path_ + 'plan_detail.csv', encoding='utf8', low_memory=False)
df_ref_detail = pd.read_csv(file_path_ + 'ref_detail.csv', encoding='utf8', low_memory=False)
df_ms_detail = pd.read_csv(file_path_ + 'ms_detail.csv', encoding='utf8', low_memory=False)
df_ref_class = pd.read_csv(file_path_ + 'ref_class.csv', encoding='utf8', low_memory=False)
df_plan_detail.deadline = pd.to_datetime(df_plan_detail.deadline)
# ---
# ## EF明细
# - 老方法，忽略期数，按照订单合并，通过透视表，把每项金额变成一个矩阵，行是订单，列是月份
# - 先按行累计求和，再对应位置相减，即为每个订单的EF列
# - 用来计算EF的汇总比较方便

# In[73]:


# 分出明细，两个观察时间点：2015-12-31和2016-12-31
df_loan_detail_2015 = df_plan_detail.loc[df_plan_detail.loan_paid_at < datetime.date(2016, 1, 1)]
df_loan_detail_2016 = df_plan_detail.loc[df_plan_detail.loan_paid_at < datetime.date(2017, 1, 1)]
df_plan_detail_2015 = df_plan_detail.loc[df_plan_detail.deadline < datetime.date(2016, 1, 1)]
df_plan_detail_2016 = df_plan_detail.loc[df_plan_detail.deadline < datetime.date(2017, 1, 1)]
df_repay_detail = pd.concat([df_ref_detail, df_ms_detail])
df_repay_detail_2015 = df_repay_detail.loc[df_repay_detail.repaid_at < datetime.date(2016, 1, 1)]
df_repay_detail_2016 = df_repay_detail.loc[df_repay_detail.repaid_at < datetime.date(2017, 1, 1)]

# In[51]:


df_repay_detail_2016[[u'实还贷前服务费', u'实还贷后服务费', u'减免贷前服务费', u'减免贷后服务费']].sum()

# In[54]:


df_loan_detail_2016[[u'应收贷前服务费', u'应收贷后服务费']].sum()

# In[55]:


df_plan_detail_2016[[u'应收贷前服务费', u'应收贷后服务费']].sum()

# In[21]:


len(df_repay_detail), len(df_repay_detail_2016), len(df_plan_detail), len(df_loan_detail_2016), len(df_plan_detail_2016)
# (2569038, 2569038, 3533573, 3533573, 2589535)


# In[74]:


# 根据明细出EF列按月汇总，直接用16年底的就可以
gp_plan_2016 = df_plan_detail_2016.groupby([u'年月', 'ref_id'])[u'应收贷后服务费'].agg('sum').reset_index()
gp_repay_2016 = df_repay_detail_2016.groupby([u'年月', 'ref_id'])[[u'实还贷后服务费', u'减免贷后服务费']].agg('sum').reset_index()
df_plan_repay_2016 = pd.merge(gp_plan_2016, gp_repay_2016, on=[u'年月', 'ref_id'], how='outer')
df_plan_repay_2016.fillna(0, inplace=True)
len(df_plan_repay_2016), len(df_plan_detail_2016)
# (2719052, 2589535)


# In[75]:


df_plan_repay_2016[u'应收贷后服务费'].sum()

# In[77]:


df_plan_repay_2016[u'实还贷后服务费（包含减免）'] = df_plan_repay_2016[u'实还贷后服务费'] + df_plan_repay_2016[u'减免贷后服务费']
req_fee2 = df_plan_repay_2016.pivot('ref_id', u'年月', u'应收贷后服务费')
req_fee2.fillna(0, inplace=True)
real_fee2 = df_plan_repay_2016.pivot('ref_id', u'年月', u'实还贷后服务费（包含减免）')
real_fee2.fillna(0, inplace=True)

req_fee2_cum = np.cumsum(req_fee2, axis=1)
real_fee2_cum = np.cumsum(real_fee2, axis=1)
E = np.maximum(req_fee2_cum - real_fee2_cum, 0)
F = np.minimum(req_fee2_cum - real_fee2_cum, 0)
cols = E.columns
E = E.reset_index()
F = F.reset_index()
# 按月和class统计
E = E.merge(df_ref_class, on='ref_id', how='left')
F = F.merge(df_ref_class, on='ref_id', how='left')
E_gp = E.groupby('class')[cols].agg(sum).T
F_gp = F.groupby('class')[cols].agg(sum).T

EF_summuary = pd.merge(E_gp, F_gp, left_index=True, right_index=True)
mul_cols = pd.MultiIndex.from_product([[u'应收贷后-实还贷后-贷后减免>=0', u'应收贷后-实还贷后-贷后减免<0'], ['A', 'B', 'C']],
                                      names=[None, '类别'])
EF_summuary.columns = mul_cols

all_summary = pd.merge(df_pivot, EF_summuary, left_index=True, right_index=True)
all_summary = all_summary[
    [u'总应收服务费', u'计提风险金总额', u'应收贷前服务费', u'应收贷后服务费', u'实还贷前服务费', u'实还贷后服务费', u'减免贷前服务费', u'减免贷后服务费',
     u'应收贷后-实还贷后-贷后减免>=0', u'应收贷后-实还贷后-贷后减免<0']]
all_summary.to_excel(os.path.join(file_path, u'现金贷服务费分类汇总.xlsx'))
# 按月出明细
for col in cols:
    E_sub = E[['loan_id', 'ref_id', 'class', col]].rename(columns={col: 'E'})
    F_sub = F[['ref_id', col]].rename(columns={col: 'F'})
    EF_sub = pd.merge(E_sub, F_sub, on='ref_id')
    EF_sub = EF_sub[['loan_id', 'class', 'E', 'F']]
    EF_sub = EF_sub.loc[(np.round(EF_sub.E, 2) != 0) | (np.round(EF_sub.F, 2) != 0)]
    if len(EF_sub) > 0:
        EF_sub.to_excel(os.path.join(detail_path, col + '.xlsx'), index=None)

# In[34]:


all_summary.to_excel(os.path.join(file_path, u'现金贷服务费分类汇总.xlsx'))
# 按月出明细
for col in cols:
    E_sub = E[['loan_id', 'ref_id', 'class', col]].rename(columns={col: 'E'})
    F_sub = F[['ref_id', col]].rename(columns={col: 'F'})
    EF_sub = pd.merge(E_sub, F_sub, on='ref_id')
    EF_sub = EF_sub[['loan_id', 'class', 'E', 'F']]
    EF_sub = EF_sub.loc[(np.round(EF_sub.E, 2) != 0) | (np.round(EF_sub.F, 2) != 0)]
    if len(EF_sub) > 0:
        EF_sub.to_excel(os.path.join(detail_path, col + '.xlsx'), index=None)

# all_summary.to_excel(os.path.join(file_path , u'现金贷服务费分类汇总.xlsx'))
mul_cols_1 = pd.MultiIndex.from_product([[u'每日递延', u'AR'], ['A', 'B', 'C']], names=[None, '类别'])
ar_summary = pd.DataFrame(index=all_summary.index, columns=mul_cols_1, data=0.0)
all_summary = pd.merge(all_summary, ar_summary, left_index=True, right_index=True)
mul_cols_1 = pd.MultiIndex.from_product([[u'每日递延', u'AR'], ['A', 'B', 'C']], names=[None, '类别'])
ar_summary = pd.DataFrame(index=all_summary.index, columns=mul_cols_1, data=0.0)
all_summary = pd.merge(all_summary, ar_summary, left_index=True, right_index=True)
all_summary
# # 出未还明细
# - 截止2015年底未还和截止2016年底未还
df_loan_detail_2015 = df_plan_detail.loc[df_plan_detail.loan_paid_at < datetime.date(2016, 1, 1)]
df_loan_detail_2016 = df_plan_detail.loc[df_plan_detail.loan_paid_at < datetime.date(2017, 1, 1)]
df_plan_detail_2015 = df_plan_detail.loc[df_plan_detail.deadline < datetime.date(2016, 1, 1)]
df_plan_detail_2016 = df_plan_detail.loc[df_plan_detail.deadline < datetime.date(2017, 1, 1)]
df_repay_detail = pd.concat([df_ref_detail, df_ms_detail])
df_repay_detail_2015 = df_repay_detail.loc[df_repay_detail.repaid_at < datetime.date(2016, 1, 1)]
df_repay_detail_2016 = df_repay_detail.loc[df_repay_detail.repaid_at < datetime.date(2017, 1, 1)]
# ## 2015年底
# - 放款时间在15年

# In[35]:


df_plan_detail_all = df_loan_detail_2015
end_date = datetime.date(2016, 1, 1)
df_repay_detail_all = df_repay_detail_2015

# In[44]:


df_plan_detail_all = df_loan_detail_2016
end_date = datetime.date(2017, 1, 1)
df_repay_detail_all = df_repay_detail_2016

# In[45]:


# 未还明细
df_plan_detail_all[u'应收贷后服务费deadline'] = df_plan_detail_all[u'应收贷后服务费']
df_plan_detail_all.loc[df_plan_detail_all.deadline >= end_date, u'应收贷后服务费deadline'] = 0
gp_repay = df_repay_detail_all.groupby([u'plan_id'])[[u'实还贷前服务费', u'减免贷前服务费', u'实还贷后服务费', u'减免贷后服务费']].agg(
    'sum').reset_index()
df_plan_repay = pd.merge(df_plan_detail_all, gp_repay, on=[u'plan_id'], how='left')
df_plan_repay.fillna(0, inplace=True)
df_plan_repay['E'] = np.maximum(
    df_plan_repay[u'应收贷后服务费deadline'] - df_plan_repay[u'实还贷后服务费'] - df_plan_repay[u'减免贷后服务费'], 0)
df_plan_repay['F'] = np.minimum(
    df_plan_repay[u'应收贷后服务费deadline'] - df_plan_repay[u'实还贷后服务费'] - df_plan_repay[u'减免贷后服务费'], 0)
df_plan_repay['no_repay_fee'] = (df_plan_repay[u'应收贷前服务费'] - df_plan_repay[u'实还贷前服务费'] - df_plan_repay[u'减免贷前服务费']) + (
df_plan_repay[u'应收贷后服务费deadline'] - df_plan_repay[u'实还贷后服务费'] - df_plan_repay[u'减免贷后服务费'] - df_plan_repay['F'])
df_res = pd.merge(df_plan_repay, df_ref_class, on='ref_id', how='left')
# df_res.to_csv(path + 'service_fee_no_repay_all.csv', index=None, encoding='utf8')
# df_res1 = df_res[['loan_id', 'class', 'term_no', 'deadline', u'应收贷前服务费', u'应收贷后服务费deadline', u'实还贷前服务费', u'实还贷后服务费', u'减免贷前服务费', u'减免贷后服务费', u'E', u'F', u'no_repay_fee']]
# df_res1.columns = [['loan_id', u'用户等级', '期数', '应还款日', u'应收贷前服务费', u'应收贷后服务费', u'实还贷前服务费', u'实还贷后服务费', u'减免贷前服务费', u'减免贷后服务费', u'E', u'F', u'未还服务费']]
# df_res1[u'未还服务费'] = np.round(df_res1[u'未还服务费'], 6)
# df_res2 = df_res1.loc[df_res1[u'未还服务费'] != 0]
# df_res2.to_csv(path + u'未还服务费明细.csv', index=None, encoding='utf8')
# df_res2.to_excel(path + u'未还服务费明细.xlsx', index=None)


# ## 未还服务费的后续还款

# In[37]:


sql_repay = """
SELECT 
  t3.ref_id,
  t3.term_no,
  t1.service_fee service_fee_repay,
  date(t2.repaid_at) repaid_at 
FROM
  new_transaction.xjd_repay_plan_repay_record_ref t1 
  JOIN temp.xjd_repay_plan_repay_record_ref_repay_date t2 
    ON t2.xjd_ref_id = t1.id 
    AND t2.repaid_at >= '%s'   
  JOIN new_transaction.user_repayment_plan t3 
    ON t3.id = t1.plan_id AND t3.fund_code <> 160
    AND t3.ref_id IN %s ;
"""

sql_ms = """
SELECT 
  t2.ref_id,
  t1.term_no,
  t1.qg_service_fee service_fee_repay,
  date(t1.repaid_at) repaid_at 
FROM
  test_8_2.new_ms_transaction_with_term_details t1 
  JOIN test_8_2.user_repayment_plan t2 ON t2.real_loan_id = t1.loan_id  AND t1.term_no = t2.term_no
  AND t1.qg_service_fee > 0
  AND t2.fund_code = 160 AND t1.repaid_at >= '%s'
"""

# In[46]:


step = 10000
df_all = df_res
ref_ids = df_all.ref_id.drop_duplicates().astype(str).tolist()
res = []
for i in xrange(0, len(ref_ids), step):
    df = pd.read_sql(sql=sql_repay % (end_date, str(tuple(ref_ids[i:i + step])).replace(',)', ')'),), con=engine_audit)
    res.append(df)
df_repay = pd.concat(res)
if end_date >= datetime.date(2016, 6, 1):
    df_ms = pd.read_sql(sql=sql_ms % end_date, con=engine_audit)
    df_repay = pd.concat([df_repay, df_ms])
gp_repay = pd.pivot_table(df_repay, index=['ref_id', 'term_no'], values=['service_fee_repay'],
                          aggfunc=np.sum).reset_index()
# df_all_2017 = pd.merge(df_all, gp_repay, on=['ref_id', 'term_no'], how='left')
# df_all_2017.fillna(0, inplace=True)
# cond = (df_all_2017.deadline >= datetime.datetime(2017, 1, 1)) & (df_all_2017.service_fee_repay_2017 > 0)
# df_all_2017.loc[cond, 'service_fee_repay_2017'] = np.minimum(df_all_2017.loc[cond, 'service_fee_repay_2017'],
#                                                                 df_all_2017.loc[cond, u'应收贷前服务费'])
# df_all_2017.rename(columns={'no_repay_fee': u'2016年底未还服务费', 'service_fee_repay_2017': u'2017年还款的服务费'}, inplace=True)
# cols_name = ['fund_code', 'loan_id', 'class', 'term_no', 'deadline', u'应收贷前服务费', u'应收贷后服务费deadline', u'实还贷前服务费', u'实还贷后服务费', u'减免贷前服务费', u'减免贷后服务费', u'E', u'F', u'2016年底未还服务费', u'2017年还款的服务费']
# df_all_2017[u'2016年底未还服务费'] = np.round(df_all_2017[u'2016年底未还服务费'], 6)
# df_all_2017.to_csv(path + u'2017还款服务费.csv', index=None, encoding='utf8')
# df_all_2017.loc[df_all_2017[u'2016年底未还服务费'] <> 0, cols_name].to_excel(path + u'2017年还款的服务费.xlsx', index=None)


# In[47]:


df_repay1 = df_repay.drop_duplicates(subset=['ref_id', 'term_no'], keep='last')
df_repay1 = pd.merge(df_repay1, gp_repay)
df_all_after = pd.merge(df_all, df_repay1, on=['ref_id', 'term_no'], how='left')
df_all_after.fillna(0, inplace=True)
cond = (df_all_after.deadline >= end_date) & (df_all_after.service_fee_repay > 0)
df_all_after.loc[cond, 'service_fee_repay'] = np.minimum(df_all_after.loc[cond, 'service_fee_repay'],
                                                         df_all_after.loc[cond, u'应收贷前服务费'])
df_all_after.rename(columns={'no_repay_fee': u'未还服务费', 'service_fee_repay': u'期后还款的服务费'}, inplace=True)
cols_name = ['fund_code', 'loan_id', 'class', 'term_no', 'deadline', 'repaid_at', u'应收贷前服务费', u'应收贷后服务费deadline',
             u'实还贷前服务费', u'实还贷后服务费', u'减免贷前服务费', u'减免贷后服务费', u'E', u'F', u'未还服务费', u'期后还款的服务费']
df_all_after[u'未还服务费'] = np.round(df_all_after[u'未还服务费'], 6)
df_all_after_sub = df_all_after.loc[df_all_after[u'未还服务费'] <> 0]
excel_file_no_repay = pd.ExcelWriter(path=os.path.join(file_path, u'截止%s底未还的服务费.xlsx' % (end_date.year - 1)))
excel_file_repay_after = pd.ExcelWriter(path=os.path.join(file_path, u'截止%s未还的服务费的期后还款服务费.xlsx' % (end_date.year - 1)))
max_len = 1000000
len_all = 0
for i in xrange(0, len(df_all_after_sub), max_len):
    tmp = df_all_after_sub[i:i + max_len]
    tmp.to_excel(excel_writer=excel_file_repay_after, sheet_name='%d-%d' % (i, i + max_len), index=None,
                 columns=cols_name)
    del tmp[u'期后还款的服务费']
    tmp.to_excel(excel_writer=excel_file_no_repay, sheet_name='%d-%d' % (i, i + max_len), index=None, columns=cols_name)
    len_all += len(tmp)
excel_file_repay_after.save()
excel_file_no_repay.save()
len_all, len(df_all_after_sub)

# In[49]:


df_all_after[[u'应收贷前服务费', u'应收贷后服务费', u'应收贷后服务费deadline', u'实还贷前服务费', u'实还贷后服务费', u'减免贷前服务费', u'减免贷后服务费', u'未还服务费',
              u'期后还款的服务费']].sum()

# In[63]:


13200431.05999 - 11390688.20999

# ### 按loan_id合并统计出表

# In[ ]:


df_all_2017_sub

# ### 查找差异

# In[67]:


E.head()

# In[97]:


a = pd.merge(E[['ref_id', '2015-12']], F[['ref_id', '2015-12']], on='ref_id')

# In[98]:


b = df_all_2017.groupby('ref_id')[['E', 'F']].agg(sum).reset_index()
b.head()

# In[99]:


len(a), len(b)

# In[100]:


c = pd.merge(a, b)
c['diff1'] = np.round(c['2015-12_x'] - c['E'], 2)
c['diff2'] = np.round(c['2015-12_y'] - c['F'], 2)

# In[101]:


c.loc[c.diff2 != 0]

# In[107]:


c.loc[c.diff1 != 0]['diff2'].sum()

# In[103]:


pd.concat([req_fee2.loc[2292523].to_frame().T, real_fee2.loc[2292523].to_frame().T])

# In[104]:


E.loc[E.ref_id == 2292523]

# In[105]:


F.loc[F.ref_id == 2292523]

# In[106]:


df_all_2017.loc[df_all_2017.ref_id == 2292523].T


# In[ ]:
