# coding: utf-8

# In[1]:


import sys

# 加入父级搜索路径，根据情况修改
sys.path.insert(0, '../..')

# In[2]:


import pandas as pd
import numpy as np
import datetime
import os
from api import db_connect

reload(db_connect)
# 引入数据库连接，并使修改生效
pd.options.mode.chained_assignment = None
pd.set_option('display.float_format', lambda x: '%.5f' % x)
engine_audit = db_connect.sql_engine(db_connect.audit, 'temp', False).get_engine()
engine_mzh = db_connect.sql_engine(db_connect.audit, 'test_mzh', False).get_engine()

# In[3]:


today = datetime.datetime.now()
date_str = today.strftime('%Y%m%d')
# 主程序代码所在的路径
base_path = os.getcwd()
# 输出文件路径
file_path = os.path.join(base_path, date_str)
# 临时文件路径
tmp_path = os.path.join(base_path, date_str, 'temp')
if not os.path.exists(file_path):
    os.makedirs(file_path)
if not os.path.exists(tmp_path):
    os.makedirs(tmp_path)

# ### 读取放款表、还款表的数据，包含类别、放款金额、期数、总的服务费

# In[34]:


sql_plan = """
select 
  lm.id lm_id,
  lm.ref_id,
  lc.class,
  lm.loan_id,
  lm.contract_loan_amount,
  lm.contract_term,
  year(lm.loan_paid_at) year,
  month(lm.loan_paid_at) month,
  lm.all_service_fee,
  urp.id plan_id,
  urp.term_no,
  urp.service_fee
from
  temp.loan_manifest lm 
join temp.loan_class lc 
    on lc.ref_id = lm.ref_id 
join temp.user_repayment_plan urp 
    on urp.ref_id = lm.ref_id;
"""

# ### 说明
# - 为了避免在拆分服务费的时候，放款表和还款计划表拆分不一致，不对两个表分别拆分，只拆还款计划，然后汇总得到放款表拆分，同时也可以减少等待的时间

# In[35]:


step = 10000
df_plan_gen = pd.read_sql(sql=sql_plan, con=engine_audit, chunksize=step)
df_plan = pd.DataFrame()
for tmp in df_plan_gen:
    df_plan = df_plan.append(tmp)
df_plan.to_csv(os.path.join(tmp_path, 'plan_class.csv'), index=None, encoding='utf8')

# ### 从文件读取

# In[4]:


df_plan = pd.read_csv(os.path.join(tmp_path, 'plan_class.csv'), encoding='utf8')
step = 10000
df_plan.head()

# ### 读取拆分比例
# - 需事先根据财务需求，准备好服务费的比例表，最好每次都新建一个表，以日期为后缀区分开

# In[5]:


df_rate = pd.read_sql_table('service_fee_ratio_0722', con=engine_mzh,
                            columns=['class', 'year', 'month', 'bad_debt_ratio', 'service_fee1_ratio'])
df_rate.head(1)

# ### 1. 拆分总的应收贷前和贷后
# - 筛选出15年和16年的
# - 如果有类别为空的（失败订单），可以自己定义其类别

# In[6]:


df_plan_rate = pd.merge(df_plan, df_rate, on=['class', 'year', 'month'], how='left')
df_plan_rate = df_plan_rate.loc[df_plan_rate.year.isin([2015, 2016])]
df_plan_rate.loc[df_plan_rate.bad_debt_ratio.isnull()]

# ### 服务费拆分计算规则
# - 计提风险金
# \begin{align}
# \text{计提风险金} = \text{放款金额} \times \text{计提风险金比例}
# \end{align}
# - 应收贷前，当计提风险金超过总服务费的时候，应收贷前=总服务费
# \begin{align}
# \text{应收贷前} = \min\left\{ \left( \text{总服务费} - \text{计提风险金} \right) \times \text{贷前服务费比例} + \text{计提风险金}, \text{总服务费} \right\}
# \end{align}
#
# \begin{align}
# \text{应收贷后} = \left( \text{总服务费} - \text{计提风险金} \right) \times \text{贷后服务费比例}
# = \text{总服务费} - \text{应收贷前}
# \end{align}
# - 失败订单，总应收服务费未0，不存在贷前和贷后的拆分
# - 存在拆分贷后服务费为0的情况，也即贷前服务费大于总服务费，此时贷前就是全部的服务费，贷后为0

# In[ ]:


df_plan_rate.loc[df_plan_rate.all_service_fee1 >= 0, 'all_service_fee1'] = np.minimum(np.round((df_plan_rate.loc[
                                                                                                    df_plan_rate.all_service_fee1 > 0, 'all_service_fee'] -
                                                                                                df_plan_rate.loc[
                                                                                                    df_plan_rate.all_service_fee1 > 0, 'bad_debt_fee'])
                                                                                               * df_plan_rate.loc[
                                                                                                   df_plan_rate.all_service_fee1 > 0, 'service_fee1_ratio'] +
                                                                                               df_plan_rate[
                                                                                                   'bad_debt_fee'], 2),
                                                                                      df_plan_rate['all_service_fee'])
df_plan_rate.loc[df_plan_rate.all_service_fee1 < 0, 'all_service_fee1'] = np.maximum(np.round((df_plan_rate.loc[
                                                                                                   df_plan_rate.all_service_fee1 > 0, 'all_service_fee'] -
                                                                                               df_plan_rate.loc[
                                                                                                   df_plan_rate.all_service_fee1 > 0, 'bad_debt_fee'])
                                                                                              * df_plan_rate.loc[
                                                                                                  df_plan_rate.all_service_fee1 > 0, 'service_fee1_ratio'] +
                                                                                              df_plan_rate[
                                                                                                  'bad_debt_fee'], 2),
                                                                                     df_plan_rate['all_service_fee'])

# In[10]:


df_plan_rate['all_service_fee'] = np.round(df_plan_rate['all_service_fee'], 2)
df_plan_rate['bad_debt_fee'] = np.round(df_plan_rate['contract_loan_amount'] * df_plan_rate['bad_debt_ratio'], 2)
# df_plan_rate['all_service_fee1'] = np.minimum(np.round((df_plan_rate['all_service_fee'] - df_plan_rate['bad_debt_fee'])
#                                                        * df_plan_rate['service_fee1_ratio'] + df_plan_rate['bad_debt_fee'], 2),
#                                               df_plan_rate['all_service_fee'])
df_plan_rate.loc[df_plan_rate.all_service_fee >= 0, 'all_service_fee1'] = np.minimum(np.round((df_plan_rate.loc[
                                                                                                   df_plan_rate.all_service_fee >= 0, 'all_service_fee'] -
                                                                                               df_plan_rate.loc[
                                                                                                   df_plan_rate.all_service_fee >= 0, 'bad_debt_fee'])
                                                                                              * df_plan_rate.loc[
                                                                                                  df_plan_rate.all_service_fee >= 0, 'service_fee1_ratio'] +
                                                                                              df_plan_rate[
                                                                                                  'bad_debt_fee'], 2),
                                                                                     df_plan_rate['all_service_fee'])
df_plan_rate.loc[df_plan_rate.all_service_fee < 0, 'all_service_fee1'] = np.maximum(np.round((df_plan_rate.loc[
                                                                                                  df_plan_rate.all_service_fee < 0, 'all_service_fee'] -
                                                                                              df_plan_rate.loc[
                                                                                                  df_plan_rate.all_service_fee < 0, 'bad_debt_fee'])
                                                                                             * df_plan_rate.loc[
                                                                                                 df_plan_rate.all_service_fee < 0, 'service_fee1_ratio'] +
                                                                                             df_plan_rate[
                                                                                                 'bad_debt_fee'], 2),
                                                                                    df_plan_rate['all_service_fee'])
df_plan_rate['all_service_fee2'] = df_plan_rate['all_service_fee'] - df_plan_rate['all_service_fee1']

# ### 2. 拆分每期的应收贷前和贷后
# - 还款计划的贷前（贷后）是总的贷前（贷后）平分在每期上，但是需要注意最后一期的调整，保证总额一致

# In[14]:


df_plan_rate['service_fee1'] = np.round(df_plan_rate['all_service_fee1'] / df_plan_rate['contract_term'], 2)
con1 = df_plan_rate.term_no == df_plan_rate.contract_term
df_plan_rate.loc[con1, 'service_fee1'] = df_plan_rate.loc[con1, 'all_service_fee1'] - df_plan_rate.loc[
                                                                                          con1, 'service_fee1'] * (
                                                                                      df_plan_rate.loc[
                                                                                          con1, 'contract_term'] - 1)
df_plan_rate['service_fee2'] = df_plan_rate['service_fee'] - df_plan_rate['service_fee1']
df_plan_rate.loc[
    df_plan_rate.all_service_fee == 0, ['all_service_fee1', 'all_service_fee2', 'service_fee1', 'service_fee2']] = 0

# ### 检查是否出现金额不一致的

# In[15]:


gp = df_plan_rate.groupby('ref_id')[['service_fee1', 'service_fee2']].agg('sum').reset_index().rename(
    columns={'service_fee1': 'sum_1', 'service_fee2': 'sum_2'})
df_plan_rate_test = df_plan_rate.merge(gp)
len(df_plan_rate_test.loc[np.round(df_plan_rate_test.all_service_fee1 - df_plan_rate_test.sum_1, 2) <> 0]), len(
    df_plan_rate_test.loc[np.round(df_plan_rate_test.all_service_fee2 - df_plan_rate_test.sum_2, 2) <> 0])

# In[16]:


df_loan_rate_from_plan = df_plan_rate[
    ['class', 'lm_id', 'ref_id', 'loan_id', 'bad_debt_ratio', 'service_fee1_ratio', 'bad_debt_fee', 'all_service_fee1',
     'all_service_fee2']]
df_loan_rate_from_plan.rename(columns={'all_service_fee1': 'service_fee1', 'all_service_fee2': 'service_fee2'},
                              inplace=True)
df_loan_rate_from_plan.drop_duplicates('ref_id', inplace=True)
df_loan_rate_from_plan.service_fee2 = np.round(df_loan_rate_from_plan.service_fee2, 2)
df_loan_rate_from_plan.service_fee1 = np.round(df_loan_rate_from_plan.service_fee1, 2)
df_loan_rate_from_plan[['service_fee1', 'service_fee2']].sum()

# In[17]:


df_plan_rate.service_fee2 = np.round(df_plan_rate.service_fee2, 2)
df_plan_rate.service_fee1 = np.round(df_plan_rate.service_fee1, 2)
df_plan_rate[['service_fee1', 'service_fee2']].sum()

# In[19]:


531276708.44740 + 16917844.43015 - 548194552.880000, 531276708.44839 + 16917844.43004 - 548194552.880000

# ### 写入到数据库
# - 每次新建一个表，保留之前的结果，注意修改表名

# In[21]:


step = 10000
df_loan_rate_from_plan.rename(columns={'service_fee1_ratio': 'fee1_rate', 'bad_debt_ratio': 'bad_debt_rate'},
                              inplace=True)
df_loan_rate_from_plan.to_sql('loan_manifest_service_fee_0722', con=engine_mzh, chunksize=step, if_exists='append',
                              index=None)
df_plan_rate = df_plan_rate[['class', 'plan_id', 'ref_id', 'loan_id', 'service_fee1', 'service_fee2']]
df_plan_rate.to_sql('user_repayment_plan_service_fee_0722', con=engine_mzh, chunksize=step, if_exists='append',
                    index=None)
df_plan_rate[['plan_id', 'service_fee1', 'service_fee2']].to_csv(os.path.join(tmp_path, 'plan_service_fee.csv'),
                                                                 index=None, encoding='utf8')


# In[ ]:
