# -*- encoding: utf8 -*-

"""
白条非去哪儿收入确认表
"""
from __future__ import unicode_literals
import os
import copy
import datetime
import numpy as np
import pandas as pd

from db_con.db_connect import sql_engine

pd.options.mode.chained_assignment = None
pd.set_option('display.float_format', lambda x: '%.5f' % x)
mysql = sql_engine('audit', 'audit', False).get_engine()
mysql_analysis = sql_engine('analysis', 'analysis_tmp', False).get_engine()
max_limit = 900000
file_path = 'E:/审计出表/白条/收入确认表'
if not os.path.exists(file_path):
    os.makedirs(file_path)
sql_loan_plan = """SELECT
  bo.order_no            产品编号,
  bo.ref_id,
  fc.fund_name           资金方,
  bo.loan_paid_at        放款时间,
  year(bo.loan_paid_at)  年,
  month(bo.loan_paid_at) 月,
  bo.funding_loan_amount 放款金额,
  bo.is_active,
  bo.contract_term       总期数,
  ubrp.deadline          应还日,
  ubrp.term_no           当前期数,
  ubrp.principle         应还本金,
  ubrp.interest          应还服务费
FROM baitiao_audit.baitiao_order bo
  JOIN baitiao_audit.fund_corp fc
    ON fc.id = bo.funding_corp_id AND bo.funding_corp_id = 5 AND bo.is_active = 1 AND bo.loan_paid_at < '2017-10-01'
  JOIN new_transaction.user_bt_repayment_plan ubrp ON ubrp.ref_id = bo.ref_id
UNION ALL
SELECT
  bo.order_no               产品编号,
  bo.ref_id,
  fc.fund_name              资金方,
  bo.train_cancel_at        放款时间,
  year(bo.train_cancel_at)  年,
  month(bo.train_cancel_at) 月,
  bo.funding_loan_amount    放款金额,
  bo.is_active,
  bo.contract_term          总期数,
  ubrp.deadline             应还日,
  ubrp.term_no              当前期数,
  ubrp.principle            应还本金,
  ubrp.interest             应还服务费
FROM baitiao_audit.baitiao_order bo
  JOIN baitiao_audit.fund_corp fc
    ON fc.id = bo.funding_corp_id AND bo.funding_corp_id = 5 AND bo.is_active = -2 AND
       bo.train_cancel_at < '2017-10-01'
  JOIN new_transaction.user_bt_repayment_plan ubrp ON ubrp.ref_id = bo.ref_id; 
"""

sql_class = """
SELECT ref_id, class 风险等级 FROM analysis_tmp.nk_bt_order_all_0930 WHERE ref_id IN {}
"""

sql_ratio = """
SELECT class 风险等级, year 年, month 月, bad_debt_ratio 计提风险金比例 FROM test_mzh.bt_service_fee_ratio
"""

df_loan_gen = pd.read_sql(sql=sql_loan_plan, con=mysql, chunksize=10000)
res = []
for tmp in df_loan_gen:
    res.append(tmp)
df_loan = pd.concat(res)
df_loan.sort_values(by=['ref_id', '当前期数'], inplace=True)
df_ratio = pd.read_sql(sql=sql_ratio, con=mysql)

step = 5000
ref_ids = tuple(df_loan.ref_id.astype(str).unique())
res = []
for i in xrange(0, len(ref_ids), step):
    tmp = pd.read_sql(sql=sql_class.format(str(ref_ids[i:i + step]).replace(',)', ')')), con=mysql_analysis)
    res.append(tmp)
df_class = pd.concat(res)
df_class.ref_id = df_class.ref_id.astype(int)
df_loan = pd.merge(df_loan, df_class, on='ref_id', how='left')
print('{}, {}'.format(len(ref_ids), df_class.ref_id.nunique()))
print(tuple(df_loan.loc[df_loan['风险等级'].isnull(), 'ref_id'].astype(str)))

df_loan = pd.merge(df_loan, df_ratio, on=['风险等级', '年', '月'])
df_loan['计提风险金'] = np.round(df_loan['放款金额'] * df_loan['计提风险金比例'], 2)
df_loan['应还服务费'] = np.round(df_loan['应还服务费'], 2)

print(df_loan.groupby([df_loan['资金方'], df_loan['放款时间'].dt.month])[['应还本金', '应还服务费']].agg('sum').reset_index())
col_names = list(df_loan.columns)
col_names.extend(['assets', 'assets_remain'])
need_process_list = df_loan.values.tolist()
processed_list = []

item = need_process_list[0]
ref_id = item[1]
loan_amount = item[6]
fee = item[12]
remain = item[15]
all_remain = remain

# 1, 6, 12, 15
for i in range(1, len(need_process_list)):
    if need_process_list[i][1] == ref_id:
        # 处理前一条记录
        fee = need_process_list[i][12]
        item.append(min(remain, fee) if loan_amount > 0 else max(remain, fee))
        item.append(0)
        processed_list.append(copy.copy(item))
        item = need_process_list[i]
        remain = max(0, remain - fee) if loan_amount > 0 else min(0, remain - fee)
    else:
        # 处理remain，获取新记录
        item.append(min(remain, fee) if loan_amount > 0 else max(remain, fee))
        remain = max(0, remain - fee) if loan_amount > 0 else min(0, remain - fee)
        item.append(remain)
        processed_list.append(copy.copy(item))

        item = need_process_list[i]
        ref_id = item[1]
        loan_amount = item[6]
        fee = item[12]
        remain = item[15]
        all_remain += remain
item.append(min(remain, fee) if loan_amount > 0 else max(remain, fee))
remain = max(0, remain - fee) if loan_amount > 0 else min(0, remain - fee)
item.append(remain)
processed_list.append(copy.copy(item))
df_res = pd.DataFrame(processed_list, columns=col_names)
print(df_res[['assets', 'assets_remain']].sum())
print('{} - {} - {} = {}'.format(all_remain, df_res['assets'].sum(), df_res['assets_remain'].sum(),
                                 all_remain - df_res['assets'].sum() - df_res['assets_remain'].sum()))
df_res['放款时间'] = df_res['放款时间'].dt.date
df_res['应还日'] = df_res['应还日'].dt.date
view_day = datetime.date(2017, 9, 30)


def cal_debt_age(x):
    date_end = max(x['应还日'], view_day)
    date_start = x['放款时间']
    return (date_end.year - date_start.year) * 12 + (date_end.month - date_start.month)


df_res['账龄'] = df_res[['放款时间', '应还日']].apply(lambda x: cal_debt_age(x), axis=1)

asset_remain = df_res.loc[df_res.assets_remain != 0]
asset_remain[['产品编号', '风险等级', '放款时间', '应还服务费', 'assets_remain']].to_excel(os.path.join(file_path, '非去哪儿asset_remain.xlsx'),
                                                                          index=None)

df_res['应还年月'] = df_res['应还日'].apply(lambda x: x.strftime('%Y-%m'))
gp1 = df_res.groupby(['当前期数', '应还年月', '风险等级'])['assets'].agg('sum').reset_index()
gp1.to_excel(os.path.join(file_path, '非去哪儿汇总（期数-应还年月-风险等级）.xlsx'), index=None)

gp2 = df_res.groupby(['账龄', '应还年月', '风险等级'])['assets'].agg('sum').reset_index()
gp2.to_excel(os.path.join(file_path, '非去哪儿汇总（账龄-应还年月-风险等级）.xlsx'), index=None)
#
df_res.to_csv(os.path.join(file_path, '非去哪儿assets减值-details.csv'), index=None, encoding='gbk')
#
# max_limit = 900000
# for i in xrange(0, len(df_res), max_limit):
#     df_res[i:i + max_limit].to_csv(os.path.join(file_path, '非去哪儿assets减值-details_%d.csv' % (i / max_limit + 1)), index=None,
#                                    encoding='utf8')

# groupby
df_res.loc[df_res['ref_id'].duplicated(), '放款金额'] = 0
print('{}'.format(df_res['放款金额'].sum()))
