# -*- coding: utf-8 -*-
import sys
import numpy as np
import pandas as pd
import datetime
from sqlalchemy import create_engine
reload(sys)
sys.setdefaultencoding('utf8')


'''
根据loan_id 展开赔付
注意：首先根据 导出数据_2016.sql 执行将数据导出到对应的文件中
涉及到的文件： loan_{year}.csv  fund_{year}.csv plan_{year}.csv ACD_{year}.csv 
repay_online_{year}.csv repay_offline_{year}.csv 
fund_kaola_{year}.csv fund_kjmx_{year}.csv
hr_repay_online_{year}.csv hr_repay_offline_{year}.csv
kjmx_repay_online_{year}.csv kjmx_repay_offline_{year}.csv

说明：
1、考拉应还资金方金额  将 2017.1.1 号的应还 计算到 2016年
2、橘子理财、小嬴理财、口袋理财、米庄理财 将 2017.1.1-2017.1.2 号的结算到 2016年
3、华融的实还金额中加入 实还时间 2017.1.1-2017.1.10 号 应还在 2016年的还款统计到 2016年
4、晋商的 2016.12.30-2016.12.31 的应还资金方金额归为 0 
5、针对考拉（T+1），橘子理财、小嬴理财、口袋理财、米庄理财 （T+2） 各自的 应还 提前1天或2天，所以这5个资金方的对应的 实际还款 也需要提前到 2016年
'''



engine_new_transaction = create_engine(
    'mysql+mysqldb://yulong_rw:TouBStYwN8wkdxVt@172.16.3.201:3306/all_back_0630?charset=utf8',
    echo=True)

path='E:/peifu/2016/'


year=2016
time = '2017-01-01'

df_pay=pd.read_csv(path+'loan_%d.csv' % year)
df_fund=pd.read_csv(path+'fund_%d.csv' % year)
df_fund=df_fund.ix[~df_fund.funding_code.isin([0, 1, 2, 3, 4, 5])]
if year == 2016:
    df_plan=pd.read_csv(path+'plan_%d.csv' % year)
    df_acd=pd.read_csv(path+'ACD_%d.csv' % year)
    df_kaola=pd.read_csv(path+'fund_kaola_%d.csv' % year)
    #T+1处理
    df_kaola.ix[df_kaola.fund_deadline >= time,'fund_deadline'] = datetime.date(2016,12,31)
    #T+2处理
    df_kjxm=pd.read_csv(path+'fund_kjmx_%d.csv' % year)
    df_kjxm.ix[df_kjxm.fund_deadline >= time,'fund_deadline'] = datetime.date(2016,12,31)
    df_fund=pd.concat([df_fund,df_plan,df_acd,df_kaola,df_kjxm],ignore_index=True)
df_fund.fillna(0,inplace=True)
df_fund.ref_id=df_fund.ref_id.apply(lambda x:str(x).replace('.0',''))
df_fund.ref_id=df_fund.ref_id.astype(int)
df_fund.funding_code=df_fund.funding_code.astype(int)
df_fund.term_no = df_fund.term_no.astype(int)
#计算资金方应还的金额
df_fund.fund_deadline=pd.to_datetime(df_fund.fund_deadline).dt.date
df_fund_plan=df_fund.ix[df_fund.fund_deadline < datetime.date(2017,1,1)]
df_fund_plan.fund_deadline=pd.to_datetime(df_fund_plan.fund_deadline).dt.date
#晋商 2016.12.30-12.31 还资金方为
df_fund_plan.ix[(df_fund_plan.funding_code == 180) & (df_fund_plan.fund_deadline >= datetime.date(2017,1,1)),'fund_prin_inst'] =0
df_fund_plan=df_fund_plan.groupby(['ref_id','term_no','funding_code','fund_deadline'])['fund_prin_inst'].sum().reset_index()
df_fund_plan.ref_id=df_fund_plan.ref_id.astype(int)
df_fund_plan.term_no=df_fund_plan.term_no.astype(int)
df_fund_plan.fund_deadline=pd.to_datetime(df_fund_plan.fund_deadline).dt.date
df_fund_plan=df_fund_plan.merge(df_fund_plan.groupby(['ref_id'])['fund_prin_inst'].sum().reset_index().rename(columns={'fund_prin_inst':'sum_fund_prin_inst'}))
#客户还款--还款时间2016年
df_online_repay=pd.read_csv(path+'repay_online_%d.csv' % year)
df_offline_repay=pd.read_csv(path+'repay_offline_%d.csv' % year)
df_repay=pd.concat([df_online_repay,df_offline_repay],ignore_index=True)
df_repay=df_repay.ix[df_repay.fund_code != 160]
df_repay = df_repay.ix[df_repay.ref_id.isin(df_pay.ref_id)]
#考拉
df_repay.ix[(df_repay.fund_code == 170) & (df_repay.deadline >= '2017-01-02'),'real_prin_inst'] = 0
df_repay.ix[(df_repay.fund_code == 170) & (df_repay.deadline == '2017-01-01'),'deadline'] = '2016-12-31'
#橘子小樱米庄
df_repay.ix[(df_repay.fund_code.isin([230,240,250,260])) & (df_repay.deadline >= '2017-01-03'),'real_prin_inst'] = 0
df_repay.ix[(df_repay.fund_code.isin([230,240,250,260])) & (df_repay.deadline < '2017-01-03'),'deadline'] = '2016-12-31'
#其他资金方--除去还资金方
df_repay.ix[ (df_repay.fund_code != 150) & (df_repay.deadline >= '2017-01-01'),'real_prin_inst'] = 0
#华融---需要使用还资金方时间

#华融逾期还款
df_hr_online_repay=pd.read_csv(path+'hr_repay_online_%d.csv' % year)
df_hr_offline_repay=pd.read_csv(path+'hr_repay_offline_%d.csv' % year)
#考拉橘子小樱米庄 2017.1.1-2017.1.2
df_kjxm_online_repay=pd.read_csv(path+'kjmx_repay_online_2016.csv')
df_kjxm_offline_repay = pd.read_csv(path+'kjmx_repay_offline_2016.csv')
df_kjxm_repay=pd.concat([df_kjxm_online_repay,df_kjxm_offline_repay],ignore_index=True)
df_kjxm_repay.deadline = pd.to_datetime(df_kjxm_repay.deadline).dt.date
df_kjxm_repay.repaid_at = pd.to_datetime(df_kjxm_repay.repaid_at).dt.date
#考拉
df_kjxm_repay.ix[(df_kjxm_repay.fund_code == 170) & (df_kjxm_repay.repaid_at >= datetime.date(2017,1,2)),'real_prin_inst'] = 0
df_kjxm_repay.ix[(df_kjxm_repay.fund_code == 170) & (df_kjxm_repay.repaid_at == datetime.date(2017,1,1)),'deadline'] = datetime.date(2016,12,31)
df_kjxm_repay.ix[(df_kjxm_repay.fund_code == 170) & (df_kjxm_repay.deadline > datetime.date(2017,1,2)) ,'real_prin_inst'] = 0
#小嬴橘子米庄口袋理财T+2
df_kjxm_repay.ix[(df_kjxm_repay.fund_code.isin([230,240,250,260])) & (df_kjxm_repay.deadline >= '2017-01-03') ,'real_prin_inst'] = 0
df_kjxm_repay.ix[(df_kjxm_repay.fund_code.isin([230,240,250,260])) & (df_kjxm_repay.deadline >= '2017-01-03') ,'real_prin_inst'] = 0

df_repay=pd.concat([df_repay,df_hr_online_repay,df_hr_offline_repay,df_kjxm_repay],ignore_index=True)
df_repay.term_no=df_repay.term_no.astype(int)
df_repay.ref_id=df_repay.ref_id.astype(int)
df_repay_other=df_repay.ix[~df_repay.fund_code.isin([150,29,50,80,130])]
df_repay_other.deadline=pd.to_datetime(df_repay_other.deadline).dt.date
df_repay_other.repaid_at=pd.to_datetime(df_repay_other.repaid_at).dt.date
# =================== TODO 本次修改的内容 begin
# df_repay_other_tq=df_repay_other.ix[df_repay_other.deadline > df_repay_other.repaid_at]
# df_repay_other_tq.ix[ (df_repay_other_tq.deadline == datetime.date(2016,2,1)) & (
#     df_repay_other_tq.repaid_at <= datetime.date(2016,1,31)
# ),'deadline'] = datetime.date(2016, 1, 31)
# df_repay_other_tq.ix[ (df_repay_other_tq.deadline == datetime.date(2016,5,1)
# ) & (
#     df_repay_other_tq.repaid_at <= datetime.date(2016,4,30)
# ),'deadline'] = datetime.date(2016, 4, 30)
#
# df_repay_other_tq.ix[ (
#    df_repay_other_tq.deadline >= datetime.date(2016,7,1)
# ) & (
#     df_repay_other_tq.deadline <= datetime.date(2016,7,5)
# ) & (
#     df_repay_other_tq.repaid_at <= datetime.date(2016,6,30)
# ),'deadline'] = datetime.date(2016, 6, 30)
#
# df_repay_other_tq.ix[ (
#    df_repay_other_tq.deadline >= datetime.date(2016,10,1)
# ) & (
#     df_repay_other_tq.deadline <= datetime.date(2016,10,15)
# ) & (
#     df_repay_other_tq.repaid_at <= datetime.date(2016,9,30)
# ),'deadline'] = datetime.date(2016, 9, 30)
# df_repay_other_yq=df_repay_other.ix[df_repay_other.deadline < df_repay_other.repaid_at]
# df_repay_other_yq['deadline'] = df_repay_other_yq['repaid_at']
# df_repay_other = pd.concat([df_repay_other_tq,df_repay_other_yq],ignore_index=True)
df_repay_other.ix[df_repay_other.deadline < df_repay_other.repaid_at,'deadline'] = \
    df_repay_other.ix[df_repay_other.deadline < df_repay_other.repaid_at]['repaid_at']
# =================== TODO 本次修改的内容 end


#df_repay_other=df_repay_other.groupby(['deadline','fund_code'])['real_prin_inst'].sum().reset_index().rename(columns={'deadline':'day_hk'})
#使用资金方的deadline
df_repay_fund_deadline=df_repay.ix[df_repay.fund_code.isin([150,29,50,80,130])]
df_repay_fund_deadline=pd.merge(df_repay_fund_deadline,df_fund[['ref_id','term_no','fund_deadline']],on=['ref_id','term_no'],how='inner')
df_repay_fund_deadline=df_repay_fund_deadline.ix[df_repay_fund_deadline.fund_deadline < datetime.date(2017,1,1)]
df_repay_fund_deadline.fund_deadline = pd.to_datetime(df_repay_fund_deadline.fund_deadline).dt.date
df_repay_fund_deadline.repaid_at = pd.to_datetime(df_repay_fund_deadline.repaid_at).dt.date

df_repay_fund_deadline.ix[(df_repay_fund_deadline.fund_deadline < df_repay_fund_deadline.repaid_at) &
                          (df_repay_fund_deadline.repaid_at < datetime.date(2017,1,1)),'fund_deadline'] = \
df_repay_fund_deadline.ix[(df_repay_fund_deadline.fund_deadline < df_repay_fund_deadline.repaid_at) &
                          (df_repay_fund_deadline.repaid_at < datetime.date(2017,1,1))]['repaid_at']

# =================== TODO 本次修改的内容 begin
# df_repay_fund_deadline_tq=df_repay_fund_deadline.ix[df_repay_fund_deadline.fund_deadline > df_repay_fund_deadline.repaid_at]
# df_repay_fund_deadline_tq.ix[(
#    df_repay_fund_deadline_tq.fund_deadline == datetime.date(2016,2,1)
# ) & (
#     df_repay_fund_deadline_tq.repaid_at <= datetime.date(2016,1,31)
# ),'fund_deadline'] = datetime.date(2016, 1, 31)
# df_repay_fund_deadline_tq.ix[ (
#    df_repay_fund_deadline_tq.fund_deadline == datetime.date(2016,5,1)
# ) & (
#     df_repay_fund_deadline_tq.repaid_at <= datetime.date(2016,4,30)
# ),'fund_deadline'] = datetime.date(2016, 4, 30)
#
# df_repay_fund_deadline_tq.ix[ (
#     df_repay_fund_deadline_tq.fund_deadline >= datetime.date(2016,7,1)
# ) & (
#     df_repay_fund_deadline_tq.fund_deadline <= datetime.date(2016,7,5)
# ) & (
#     df_repay_fund_deadline_tq.repaid_at <= datetime.date(2016,6,30)
# ),'fund_deadline'] = datetime.date(2016, 6, 30)
#
# df_repay_fund_deadline_tq.ix[ (
#      df_repay_fund_deadline_tq.fund_deadline >= datetime.date(2016,10,1)
# ) & (
#     df_repay_fund_deadline_tq.fund_deadline <= datetime.date(2016,10,15)
# ) & (
#     df_repay_fund_deadline_tq.repaid_at <= datetime.date(2016,9,30)
# ),'fund_deadline'] = datetime.date(2016, 9, 30)
# =================== TODO 本次修改的内容 end
# TODO ====华融逾期还款提前 2017.1-10 的数据 按照 deadline 进行统计即可 其他按照还款时间进行统计
# df_repay_fund_deadline_yq=df_repay_fund_deadline.ix[df_repay_fund_deadline.fund_deadline < df_repay_fund_deadline.repaid_at]
# df_repay_fund_deadline_yq.ix[(
#     df_repay_fund_deadline_yq.fund_code != 150
# ),'fund_deadline']= \
#     df_repay_fund_deadline_yq.ix[
#      (df_repay_fund_deadline_yq.fund_code != 150)]['repaid_at']
# df_repay_fund_deadline=pd.concat([df_repay_other_tq,df_repay_other_yq],ignore_index=True)


df_repay_fund_deadline['deadline']=df_repay_fund_deadline['fund_deadline']
df_repay_fund_deadline.drop(['fund_deadline'],inplace=True,axis=1)
# df_repay_fund_deadline=df_repay_fund_deadline.groupby(['fund_deadline','fund_code'])['real_prin_inst'].sum().reset_index().rename(columns={'fund_deadline':'day_hk'})

df_repay_real=pd.concat([df_repay_other,df_repay_fund_deadline],ignore_index=True)


# df_repay_real.ref_id=df_repay_real.ref_id.astype(int)
# df_repay_real.term_no = df_repay_real.term_no.astype(int)
# df_repay_real=df_repay_real.merge(df_repay_real.groupby(['ref_id'])['real_prin_inst'].sum().reset_index().rename(columns={'real_prin_inst':'sum_real_prin_inst'}))

#比对数据
# df=pd.merge(df_pay,df_fund_plan,on='ref_id',how='left')
# df=pd.merge(df_fund_plan,df_repay_real,on=['ref_id','term_no'],how='outer')
# df.fillna(0,inplace=True)
# df['diff']=np.round(df.sum_fund_prin_inst - df.sum_real_prin_inst,2)

# df=df.ix[df['diff'] != 0]
# df=df.ix[(df['diff'] > 0.5) | (df['diff'] < -0.5)]

# df_fund_plan = df.ix[df.fund_prin_inst != 0]
# df_fund_plan.drop_duplicates(['ref_id','term_no'],inplace=True)
df_fund_plan = df_fund_plan.groupby(['funding_code','fund_deadline'])['fund_prin_inst'].sum().reset_index()\
    .rename(columns={'funding_code':'fund_code','fund_deadline':'day_hk'})
df_fund_plan.fund_code=df_fund_plan.fund_code.astype(int)
df_fund_plan.day_hk=pd.to_datetime(df_fund_plan.day_hk).dt.date

# df_repay=df.ix[df.real_prin_inst != 0]
# df_repay.drop_duplicates(['ref_id','term_no','real_prin_inst','repaid_at'],inplace=True)
df_repay = df_repay_real.groupby(['fund_code','deadline'])['real_prin_inst'].sum().reset_index().rename(columns={'deadline':'day_hk'})
df_repay.fund_code=df_repay.fund_code.astype(int)
df_repay.day_hk=pd.to_datetime(df_repay.day_hk).dt.date

df=pd.merge(df_fund_plan,df_repay,on=['fund_code','day_hk'],how='outer')
df.fillna(0,inplace=True)
df['peifu']=np.round(df.fund_prin_inst - df.real_prin_inst,2)

df.to_csv(path+'peifu_%d.csv' % year,index=None)





