# -*- encoding: utf8 -*-
import sys
import os
import datetime
import numpy as np
import pandas as pd
from sqlalchemy import create_engine

reload(sys)
sys.setdefaultencoding("utf-8")
pd.options.mode.chained_assignment = None
# 去哪儿源文件
path = u'E:/量化派/去哪儿/去哪儿账单文件/201710'

'''
@author linfang-----
1、将去哪儿的放款还款退款明细导入到数据库中 qunaer_new
2、导入的数据总额需要同去哪儿账户中的金额比较，如果对不上，则查找原因
3、2017.8.22 15:00:00 之前是保理；在 2017.9.26 15:00:00 之前是惠金所，后续则为哈银。
4、注意，这个还款退款的入库，fund_code 是初始化，根据 pay_detail 中的fund_code 初始化的
'''
engine_qunaer = create_engine('mysql+mysqldb://linfang.wang:#jkl3453YUGuo99@172.16.3.201:3306/qunaer_new?charset=utf8', echo=False).connect()

#导入去哪儿的放款数据
#insert_db : true 表示插入数据库  数据源.pay_detail 表中
def load_pay(year,month,insert_db=False):
    df = pd.DataFrame()
    this_month = datetime.date(year,month,1).strftime('%Y-%m')
    print this_month
    for filename in os.listdir(path):
        f = os.path.join(path, filename)
        if os.path.isfile(f) and this_month in f and f.endswith('LIANGHP_payInfo.txt'):
            ff = file(f).readlines()
            while len(ff[-1]) == 0 or ff[-1].startswith('F|'):
                del ff[-1]
            if ff[0].startswith('H|'):
                del ff[0]
            df_tmp = pd.DataFrame(ff, columns=['Line'])
            df_tmp['product_no'] = df_tmp['Line'].apply(lambda x: x.strip().split('|')[2])
            df_tmp['loan_time'] = df_tmp['Line'].apply(lambda x: x.strip().split('|')[3])
            df_tmp['loan_amount'] = df_tmp['Line'].apply(lambda x: x.strip().split('|')[4])
            df_tmp['stages'] = df_tmp['Line'].apply(lambda x: x.strip().split('|')[5])
            df_tmp['user_no'] = df_tmp['Line'].apply(lambda x: x.strip().split('|')[7])
            # 放款状态
            df_tmp['status'] =  df_tmp['Line'].apply(lambda x: x.strip().split('|')[6])
            #费率从 2017-01-01 开始有
            if datetime.date(year,month,1) >= datetime.date(2017,1,1):
                df_tmp['rate'] = df_tmp['Line'].apply(lambda x: x.strip().split('|')[10])
            else :
                df_tmp['rate'] = 0
            #是否转分期
            df_tmp['is_ptf'] = 0
            df_tmp['trans_time'] = df_tmp['loan_time']
            df = df.append(df_tmp[['product_no', 'trans_time','loan_time', 'loan_amount', 'stages', 'user_no', 'rate','status','is_ptf']])
    #提取成功的订单
    print len(df)
    df.status=df.status.astype(int)
    df=df.ix[df.status == 1]
    # 删除重复项
    df.drop_duplicates(subset=['product_no'], inplace=True, keep='last')
    df['stages'] = df['stages'].astype(int)
    df['user_no'] = df['user_no'].astype(long)
    df['loan_time'] = pd.to_datetime(df['loan_time'])
    df['trans_time']=pd.to_datetime(df['trans_time'])
    df['stages'] = df['stages'].apply(lambda x: np.where(x == 0, x + 1, x))
    df['rate'] = df['rate'].apply(lambda x: float(str(x).strip('%')) / 100)
    df.loan_amount = df.loan_amount.astype(float)
    # 资金方--从 2017.8.22号开始转入惠金所放款
    df['fund_code'] = 1
    #惠金所
    df.ix[(df.trans_time > datetime.datetime(2017,8,22,15,0,0) ) & (df.stages == 1),'fund_code'] = 2
    # 哈银
    df.ix[df.trans_time > datetime.datetime(2017, 9, 26, 15, 0, 0), 'fund_code'] = 3
    #剔除product_no 为空的数据
    df=df.ix[~(df.product_no == '')]
    df=df.ix[~df.product_no.isnull()]
    # 输入到数据库中
    if insert_db:
        df.to_sql('pay_detail',engine_qunaer,if_exists='append',index=None,chunksize=10000)
    return df

#导入转分期数据
def load_trans(year,month,insert_db=False):
    list1=[]
    list2=[]
    this_month = datetime.date(year, month, 1).strftime('%Y-%m')
    print this_month
    for filename in os.listdir(path):
        f = os.path.join(path, filename)
        if os.path.isfile(f) and this_month in f and f.endswith('_LIANGHP_transInfo.txt'):
            ff = file(f)
            for line in ff:
                line = line.strip()
                if line.startswith('H|') or line.startswith('F|') or len(line) == 0:
                    continue
                line_split=line.split('|')
                last_info_split = line_split[-1].split(',')
                if datetime.date(year,month,1) >= datetime.date(2017,1,1):
                    rate = line_split[12]
                else:
                    rate = 0
                is_ptf = 1
                list1.append([line_split[2], line_split[3], line_split[4], line_split[6], line_split[7], line_split[8], line_split[9],rate,is_ptf])
                for old_product_no in last_info_split:
                    list2.append([line_split[2],old_product_no])
    #当前月份的转分期
    df= pd.DataFrame(list1,columns=['product_no','loan_time','trans_time','loan_amount','stages','status','user_no','rate','is_ptf'])
    df['stages']=df['stages'].astype(int)
    df['stages'] = df['stages'].apply(lambda x: np.where(x == 0, x + 1, x))
    df['rate'] = df['rate'].apply(lambda x: float(str(x).strip('%')) / 100)
    df.status = df.status.astype(int)
    df.loan_amount=df.loan_amount.astype(float)
    #TODO only 内保支持转分期，不清楚后期会不会有其他资金方支持转分期
    df['fund_code'] = 1
    #提取成功的数据
    df=df.ix[df.status == 1]
    df=df.ix[~(df.product_no == '')]
    #转分期时间 or 放款时间
    print '----转分期数据删除前长度----',len(df)
    df.drop_duplicates(subset=['product_no'],inplace=True)
    print '----转分期数据删除后长度----', len(df)
    df_ref=pd.DataFrame(list2,columns=['trans_no','product_no'])
    print '----转分期新旧订单关联数据删除前长度----', len(df_ref)
    df_ref.drop_duplicates(subset=['trans_no','product_no'],inplace=True)
    print '----转分期新旧订单关联数据删除后长度----', len(df_ref)
    if insert_db :
        df.to_sql('pay_detail',engine_qunaer,index=None,if_exists='append',chunksize = 10000)
        df_ref.to_sql('trans_ref',engine_qunaer,index=None,if_exists='append',chunksize=10000)
    return  df

#导入退款数据
# insert_db true 表示插入数据库 refund_detail
# 初始化 资金方ID，根据pay_detail 中的 fund_code 进行初始化。
def load_refund(year,month,insert_db=False):
    this_month = datetime.date(year, month, 1).strftime('%Y-%m')
    print this_month
    df = pd.DataFrame()
    for filename in os.listdir(path):
        f = os.path.join(path, filename)
        if os.path.isfile(f) and this_month in f and f.endswith('_LIANGHP_refundInfo.txt'):
            ff = file(f).readlines()
            while len(ff[-1]) == 0 or ff[-1].startswith('F|'):
                del ff[-1]
            if ff[0].startswith('H|'):
                del ff[0]
            df_tmp = pd.DataFrame(ff, columns=['Line'])
            df_tmp['product_no'] = df_tmp['Line'].apply(lambda x: x.strip().split('|')[1])
            df_tmp['refund_no'] = df_tmp['Line'].apply(lambda x: x.strip().split('|')[2])
            df_tmp['refund_time'] = df_tmp['Line'].apply(lambda x: x.strip().split('|')[3])
            df_tmp['refund_amount'] = df_tmp['Line'].apply(lambda x: x.strip().split('|')[4])
            df_tmp['user_no'] = df_tmp['Line'].apply(lambda x: x.strip().split('|')[6])
            df_tmp['refund_status'] = df_tmp['Line'].apply(lambda x: x.strip().split('|')[5])
            if datetime.date(year,month,1) >= datetime.date(2017,1,1):
                df_tmp['refund_principle'] = df_tmp['Line'].apply(lambda x: x.strip().split('|')[8])
                df_tmp['refund_fee_amount'] = df_tmp['Line'].apply(lambda x: x.strip().split('|')[9])
                df_tmp['refund_due_amount'] = df_tmp['Line'].apply(lambda x: x.strip().split('|')[10])
            else:
                df_tmp['refund_principle'] = 0
                df_tmp['refund_fee_amount'] = 0
                df_tmp['refund_due_amount'] = 0
            df = df.append(df_tmp[['product_no', 'refund_no', 'refund_time','refund_status', 'refund_amount', 'user_no','refund_principle', 'refund_fee_amount', 'refund_due_amount']])
    df.drop_duplicates(subset=['refund_no'], inplace=True)
    #剔除repay_no 为空的数据
    df=df.ix[~(df.refund_no == '')]
    #only 成功
    df=df.ix[df.refund_status =='S']
    df['refund_status'] = 1
    #金额为负数
    df.refund_amount=df.refund_amount.astype(float)
    df.refund_principle=df.refund_principle.astype(float)
    df.refund_fee_amount = df.refund_fee_amount.astype(float)
    df.refund_due_amount = df.refund_due_amount.astype(float)
    df.refund_amount = df.refund_amount.apply(lambda x:np.round(0-x,2))
    df.refund_principle = df.refund_principle.apply(lambda x:np.round(0-x,2))
    df.refund_fee_amount = df.refund_fee_amount.apply(lambda x: np.round(0 - x, 2))
    df.refund_due_amount = df.refund_due_amount.apply(lambda x: np.round(0 - x, 2))
    #初始化资金方ID
    sql_pay='''
    select product_no,fund_code from qunaer_new.pay_detail 
    where product_no in %s 
    ''' % str(tuple(df.product_no.astype(str).tolist()))
    df_pay=pd.read_sql(sql_pay,engine_qunaer)
    df=pd.merge(df,df_pay,on='product_no',how='left')
    if insert_db:
        df.to_sql('refund_detail',engine_qunaer,if_exists='append',index=None,chunksize=10000)
    return df

# 导入还款明细
def load_repay(year, month, insert_db=False):
    this_month = datetime.date(year, month, 1).strftime('%Y-%m')
    print this_month
    df = pd.DataFrame()
    for filename in os.listdir(path):
        f = os.path.join(path, filename)
        if os.path.isfile(f) and this_month in f and f.endswith('_LIANGHP_repaymentInfo.txt'):
            ff = file(f).readlines()
            while len(ff[-1]) == 0 or ff[-1].startswith('F|'):
                del ff[-1]
            if ff[0].startswith('H|'):
                del ff[0]
            df_tmp = pd.DataFrame(ff, columns=['Line'])
            df_tmp['product_no'] = df_tmp['Line'].apply(lambda x: x.strip().split('|')[1])
            df_tmp['repay_no'] = df_tmp['Line'].apply(lambda x: x.strip().split('|')[2])
            df_tmp['repay_time'] = df_tmp['Line'].apply(lambda x: x.strip().split('|')[3])
            df_tmp['repay_type'] = df_tmp['Line'].apply(lambda x: x.strip().split('|')[4])
            df_tmp['current_stage_no'] = df_tmp['Line'].apply(lambda x: x.strip().split('|')[5])
            df_tmp['repay_amount'] = df_tmp['Line'].apply(lambda x: x.strip().split('|')[6])
            df_tmp['user_no'] = df_tmp['Line'].apply(lambda x: x.strip().split('|')[8])
            df_tmp['repay_status'] = df_tmp['Line'].apply(lambda x: x.strip().split('|')[7])
            if datetime.date(year, month, 1) >= datetime.date(2017, 1, 1):
                df_tmp['repay_principle'] = df_tmp['Line'].apply(lambda x: x.strip().split('|')[10])
                df_tmp['repay_fee_amount'] = df_tmp['Line'].apply(lambda x: x.strip().split('|')[11])
                df_tmp['repay_due_amount'] = df_tmp['Line'].apply(lambda x: x.strip().split('|')[12])
            else:
                df_tmp['repay_principle'] = 0
                df_tmp['repay_fee_amount'] = 0
                df_tmp['repay_due_amount'] = 0
            df = df.append(df_tmp[['product_no', 'repay_no', 'repay_time', 'repay_status', 'repay_type',
                                   'current_stage_no', 'repay_amount', 'user_no', 'repay_principle', 'repay_fee_amount',
                                   'repay_due_amount']])
    df['current_stage_no'] = df['current_stage_no'].astype(int)
    df.repay_amount = df.repay_amount.astype(float)
    # 还款状态切换
    df = df.ix[df.repay_status == 'S']
    df.ix[df.repay_status == 'S', 'repay_status'] = 1
    print '还款数据删除重复前长度-----', len(df)
    df.drop_duplicates(subset=['repay_no'], inplace=True)
    print '还款数据删除重复后长度-----', len(df)
    # 剔除 repay_no 为空的情况
    df = df.ix[~(df.repay_no == '')]
    # 初始化 fund_code
    sql_pay='''
    select product_no,fund_code from qunaer_new.pay_detail 
    WHERE product_no in %s
    ''' % str(tuple(df.product_no.astype(str).tolist()))
    df_pay=pd.read_sql(sql_pay,engine_qunaer)
    df=pd.merge(df,df_pay,on='product_no',how='left')
    if insert_db:
        df.to_sql('repay_detail', engine_qunaer, if_exists='append', index=None, chunksize=10000)
    return df

if __name__ == '__main__':
    year = 2017
    month = 10
    #导入放款数据 -- 注意代码中的 文件名字 和 资金方配置，如果含有 external 则是外保，资金方也需要相应的改动
    # df=load_pay(year,month,insert_db=True)
    # print df['loan_amount'].sum()
    #导入转分期数据
    # df = load_trans(year,month,insert_db=True)
    # print df['loan_amount'].sum()
    # 导入退款数据
    # df = load_refund(year, month, insert_db=True)
    # print df['refund_amount'].sum()
    # 导入还款数据
    # df = load_repay(year,month, insert_db=True)
    # print df['repay_amount'].sum()