Commit 606f16a1 authored by 白条—徐加哲 's avatar 白条—徐加哲

格式化 跑2015, 2016年 白条未还本金, 比较差异.

parent d3c14359
# -*- coding: utf-8 -*-
#
# Created by xujiazhe on 2017/7/28.
#
import sys
import pandas as pd
reload(sys)
sys.setdefaultencoding('utf8')
"""
对比 大的excel的结果
事先列对好
按照行, 或者结果的对比
跑完之后, 原来结果拉进来直接读
按照关键列排序
"""
old_path = '/opt/duizhang_tools/audit_sort/linfang/未还本金_2016---非去哪儿最新.xlsx'
new_path = '/Users/xujiazhe/Desktop/data/未还本金_2016.xlsx'
old_df = pd.read_excel(old_path)
new_df = pd.read_excel(new_path)
# 以后源程序 跑完直接对比 读大excel费时间
old_df = old_df.ix[~old_df[u'订单号'].isnull()]
diff_order_nos = set(old_df[u'订单号'].unique()) ^ set(new_df[u'订单号'].unique())
old_df = old_df.sort_values(by=[u'订单号'])
new_df = new_df.sort_values(by=[u'订单号'])
key_columns = u'订单号'
gp1 = old_df.groupby(key_columns)[key_columns].agg(['count']).reset_index()
gp2 = new_df.groupby(key_columns)[key_columns].agg(['count']).reset_index()
a = gp2['count'].max()
len(gp2) == len(new_df)
old_add = old_df.ix[old_df[u'订单号'].isin(diff_order_nos)]
new_add = new_df.ix[new_df[u'订单号'].isin(diff_order_nos)]
old_df = old_df.ix[~old_df[u'订单号'].isin(diff_order_nos)]
new_df = new_df.ix[~new_df[u'订单号'].isin(diff_order_nos)]
df = old_df.merge(new_df, on=u'订单号')
con_same = \
(df[u'资金方_x'] == df[u'资金方_y']) & \
(df[u'商户_x'] == df[u'商户_y']) & \
(df[u'订单金额_x'] == df[u'订单金额_y']) & \
(df[u'合同期数_x'] == df[u'合同期数_y']) & \
(df[u'放款时间_x'] == df[u'放款时间_y']) & \
(df[u'未还本金_x'] == df[u'未还本金_y'])
cmp_diff_df = df[~con_same]
old_diff = old_df.ix[old_df[u'订单号'].isin(cmp_diff_df[u'订单号'])]
new_diff = new_df.ix[new_df[u'订单号'].isin(cmp_diff_df[u'订单号'])]
diff_res_df = pd.concat([old_diff, new_diff, old_add, new_add])
diff_res_path = '/Users/xujiazhe/Desktop/data/compare.xlsx'
diff_res_df.to_excel(diff_res_path, index=None)
......@@ -23,8 +23,11 @@
#### 白条借款人明细表,按loan id列示
* 未还本金_2016---非去哪儿最新.xlsx  
* 未还本金_2015---非去哪儿最新.xlsx  (代码为2015年的参数,2016年数据需要修改下参数)
- 白条未还本金-linfang.py
* 未还本金_2015---非去哪儿最新.xlsx  
- 白条未还本金-linfang.py (代码为2015年的参数,2016年数据需要修改下参数)
+ 2015年OK
+ 未还本金_2016_new.xlsx   结果差异 compare_diff.xlsx
* 2015_去哪儿未还本金.xlsx
* 2016_去哪儿未还本金.xlsx
- 0628在贷明细_upload.ipynb
......
......@@ -7,10 +7,10 @@ import numpy as np
import pandas as pd
from sqlalchemy import create_engine
from dateutil.relativedelta import relativedelta
reload(sys)
sys.setdefaultencoding('utf8')
'''
目标:根据时间点,以 订单为维度,计算未还本金 即 放款本金 - 已还本金
@author linfang
......@@ -19,88 +19,115 @@ sys.setdefaultencoding('utf8')
注意:订单统一为成功订单(is_active = 1)
'''
#----------------配置变量项开始--------------------------
# ----------------配置变量项开始--------------------------
# TODO 文件输出路劲
path = '/Users/xujiazhe/Desktop/data/'
# 输出文件名
file_name = u'未还本金_2016'
# TODO 还款时间开始 --- 还款时间结束
start_time = '2016-01-01 00:00:00'
end_time = '2017-01-01 00:00:00'
#TODO 文件输出路劲
path='E:/'
#输出文件名
file_name=u'未还本金_2015'
#TODO 还款时间开始 --- 还款时间结束
start_time = '2015-01-01 00:00:00'
end_time = '2016-01-01 00:00:00'
#TODO 放款时间开始 --- 放款时间结束
fk_start_time='2015-01-01 00:00:00'
fk_end_time='2016-01-01 00:00:00'
# TODO 放款时间开始 --- 放款时间结束
fk_start_time = '2016-01-01 00:00:00'
fk_end_time = '2017-01-01 00:00:00'
#TODO 数据源
# TODO 数据源
engine_new_transaction = create_engine(
'mysql+mysqldb://yulong_rw:TouBStYwN8wkdxVt@172.16.3.201:3306/new_transaction?charset=utf8',
'mysql+mysqldb://internal_r:ArbNgtvlJzZHXsEu@172.16.3.201:3306/new_transaction?charset=utf8',
echo=True)
#----------------配置变量项结束--------------------------
# ----------------配置变量项结束--------------------------
read_merchant_sql='''SELECT merchant_id,merchant_name FROM baitiao_audit.merchant'''
df_merchant=pd.read_sql(read_merchant_sql ,con=engine_new_transaction)
df_merchant['merchant_id']=df_merchant['merchant_id'].astype(int)
read_repay_sql='''
SELECT id,`approach_name` FROM new_transaction.`repay_channel`
read_merchant_sql = '''SELECT merchant_id,merchant_name FROM baitiao_audit.merchant'''
df_merchant = pd.read_sql(read_merchant_sql, con=engine_new_transaction)
df_merchant['merchant_id'] = df_merchant['merchant_id'].astype(int)
read_repay_sql = '''
SELECT id,`approach_name` FROM new_transaction.`repay_channel`
'''
df_approach=pd.read_sql(read_repay_sql,engine_new_transaction)
df_approach['id']=df_approach['id'].astype(int)
df_approach = pd.read_sql(read_repay_sql, engine_new_transaction)
df_approach['id'] = df_approach['id'].astype(int)
read_fund_sql='''
SELECT id,fund_name FROM baitiao_audit.`fund_corp`
read_fund_sql = '''
SELECT id,fund_name FROM baitiao_audit.`fund_corp`
'''
df_fund = pd.read_sql(read_fund_sql,engine_new_transaction)
df_fund['id']=df_fund['id'].astype(int)
def fk(start_time,end_time):
sql_1 = '''
SELECT t4.ref_id,t4.funding_corp_id,t4.`merchantId`,t4.`order_no`,t4.`real_loan_amount`,t4.`contract_term`,t4.`loan_paid_at`
FROM baitiao_audit.`baitiao_order` t4
WHERE t4.real_loan_amount > 0 AND t4.`is_active` = 1 AND t4.loan_paid_at >= '%s' and t4.loan_paid_at < '%s'
''' % (start_time,end_time)
return pd.read_sql(sql_1,engine_new_transaction)
def hk(start_time,end_time):
sql_2_1='''
SELECT t2.ref_id,SUM(t1.principle-t1.mitigate_principle) real_principle
FROM `baitiao_repay_plan_repay_record_ref` t1
JOIN `offline_alipay_record` t3 ON t1.`record_id` = t3.id AND t1.`repay_channel` = 13
JOIN `user_bt_repayment_plan` t2 ON t1.plan_id = t2.id
JOIN baitiao_audit.`baitiao_order` t4 ON t2.ref_id = t4.ref_id AND t4.`is_active` = 1
WHERE t3.transfer_time >= '%s' and t3.transfer_time < '%s'
GROUP BY t2.ref_id
''' % (start_time,end_time)
sql_2_2 = '''
SELECT t2.ref_id,SUM(t1.principle-t1.mitigate_principle) real_principle
FROM `baitiao_repay_plan_repay_record_ref` t1
JOIN `repay_record_online` t3 ON t1.`record_id` = t3.id AND t1.`repay_channel` < 13
JOIN `user_bt_repayment_plan` t2 ON t1.plan_id = t2.id
JOIN baitiao_audit.`baitiao_order` t4 ON t2.ref_id = t4.ref_id AND t4.`is_active` = 1
WHERE t3.bill_time >= '%s' and t3.bill_time < '%s'
GROUP BY t2.ref_id
''' % (start_time,end_time)
sql_2_3='''
SELECT t2.ref_id,SUM(t1.principle-t1.mitigate_principle) real_principle
FROM `baitiao_repay_plan_repay_record_ref` t1
JOIN `offline_bank_repay_record` t3 ON t1.`record_id` = t3.id AND t1.`repay_channel` in (14,15,16)
JOIN `user_bt_repayment_plan` t2 ON t1.plan_id = t2.id
JOIN baitiao_audit.`baitiao_order` t4 ON t2.ref_id = t4.ref_id AND t4.`is_active` = 1
WHERE t3.transfer_time >= '%s' and t3.transfer_time < '%s'
GROUP BY t2.ref_id
''' % (start_time,end_time)
df_2_1=pd.read_sql(sql_2_1 ,engine_new_transaction)
df_2_2=pd.read_sql(sql_2_2 ,engine_new_transaction)
df_2_3 = pd.read_sql(sql_2_3 , engine_new_transaction)
return pd.concat([df_2_1,df_2_2,df_2_3],axis=0,ignore_index=True)
def tj(df_fk,df_hk,filename):
df_fund = pd.read_sql(read_fund_sql, engine_new_transaction)
df_fund['id'] = df_fund['id'].astype(int)
def fk(start_time, end_time):
sql_1 = '''
SELECT
bt_o.ref_id,
bt_o.funding_corp_id,
bt_o.`merchantId`,
bt_o.`order_no`,
bt_o.`real_loan_amount`,
bt_o.`contract_term`,
bt_o.`loan_paid_at`
FROM baitiao_audit.`baitiao_order` bt_o
WHERE bt_o.real_loan_amount > 0 AND bt_o.`is_active` = 1 AND bt_o.loan_paid_at >= '%s' and
bt_o.loan_paid_at < '%s'
''' % (start_time, end_time)
return pd.read_sql(sql_1, engine_new_transaction)
def hk(start_time, end_time):
sql_2_1 = '''
SELECT
ubrp.ref_id,
SUM(bt_rp_rrr.principle - bt_rp_rrr.mitigate_principle) real_principle
FROM `baitiao_repay_plan_repay_record_ref` bt_rp_rrr
JOIN `offline_alipay_record` oar
ON bt_rp_rrr.`record_id` = oar.id AND bt_rp_rrr.`repay_channel` = 13
JOIN `user_bt_repayment_plan` ubrp
ON bt_rp_rrr.plan_id = ubrp.id
JOIN baitiao_audit.`baitiao_order` bt_o
ON ubrp.ref_id = bt_o.ref_id AND bt_o.`is_active` = 1
WHERE oar.transfer_time >= '%s' and oar.transfer_time < '%s'
GROUP BY ubrp.ref_id
''' % (start_time, end_time)
sql_2_2 = '''
SELECT
ubrp.ref_id,
SUM(bt_rp_rrr.principle - bt_rp_rrr.mitigate_principle) real_principle
FROM `baitiao_repay_plan_repay_record_ref` bt_rp_rrr
JOIN `repay_record_online` rro
ON bt_rp_rrr.`record_id` = rro.id AND bt_rp_rrr.`repay_channel` < 13
JOIN `user_bt_repayment_plan` ubrp
ON bt_rp_rrr.plan_id = ubrp.id
JOIN baitiao_audit.`baitiao_order` bt_o
ON ubrp.ref_id = bt_o.ref_id AND bt_o.`is_active` = 1
WHERE rro.bill_time >= '%s' and rro.bill_time < '%s'
GROUP BY ubrp.ref_id
''' % (start_time, end_time)
sql_2_3 = '''
SELECT
ubrp.ref_id,
SUM(bt_rp_rrr.principle - bt_rp_rrr.mitigate_principle) real_principle
FROM `baitiao_repay_plan_repay_record_ref` bt_rp_rrr
JOIN `offline_bank_repay_record` obrr
ON bt_rp_rrr.`record_id` = obrr.id AND bt_rp_rrr.`repay_channel` in (14, 15, 16)
JOIN `user_bt_repayment_plan` ubrp
ON bt_rp_rrr.plan_id = ubrp.id
JOIN baitiao_audit.`baitiao_order` bt_o
ON ubrp.ref_id = bt_o.ref_id AND bt_o.`is_active` = 1
WHERE obrr.transfer_time >= '%s' and obrr.transfer_time < '%s'
GROUP BY ubrp.ref_id
''' % (start_time, end_time)
df_2_1 = pd.read_sql(sql_2_1, engine_new_transaction)
df_2_2 = pd.read_sql(sql_2_2, engine_new_transaction)
df_2_3 = pd.read_sql(sql_2_3, engine_new_transaction)
return pd.concat([df_2_1, df_2_2, df_2_3], axis=0, ignore_index=True)
def tj(df_fk, df_hk, filename):
df_2 = df_hk.groupby(by='ref_id')['real_principle'].agg({'sum'}).reset_index().rename(
columns={'sum': 'principle_sum'})
df_2['ref_id'] = df_2['ref_id'].astype(int)
......@@ -117,15 +144,15 @@ def tj(df_fk,df_hk,filename):
'contract_term': '合同期数',
'loan_paid_at': '放款时间', 'diff': '未还本金'}, inplace=True)
df = df[['资金方', '商户', '订单号', '订单金额', '合同期数', '放款时间', '未还本金']]
df.to_excel(path + filename+'.xlsx', columns=df.columns, index=None, encoding='utf8')
df.to_excel(path + filename + '.xlsx', columns=df.columns, index=None, encoding='utf8')
if __name__ == '__main__':
pool = mtp.Pool(processes=2)
df_fk=pool.apply_async(fk,(fk_start_time,fk_end_time))
df_hk=pool.apply_async(hk,(start_time,end_time))
df_fk = pool.apply_async(fk, (fk_start_time, fk_end_time))
df_hk = pool.apply_async(hk, (start_time, end_time))
pool.close()
pool.join()
tj(df_fk._value,df_hk._value,file_name)
tj(df_fk._value, df_hk._value, file_name)
print '======================main done==================================='
......@@ -8,9 +8,10 @@
>> 1. 整理审计数据,将之前的需求都找到,然后拿到代码
>> 2. 跑一遍,检查哪个数据库可以匹配,并且能跑出正确结果,记录下来
>> 3. 目标是 当时 跑出来的结果, 现在找 对应的库, 把能跑出和原结果一样的库 标记为 输入库.
具体工作 | 财务对接人 | | 负责人
具体工作 | 财务对接人 | | 负责人
------- | ------- | ----- | ----------
内控库业务数据提取 | 彭千 | 数据部 | 玉龙
现金贷2015和2016年的放款表更新 | 彭千 | 数据部 | 玉龙
......@@ -29,7 +30,7 @@
现金贷借款人明细期后收款情况,按loan id列示 | 彭千 | 数据部 | 马振
白条借款人明细期后收款情况,按loan id列示 | 彭千 | 数据部 | 德宇、林芳
现场查看ABC放款表明细SQL(带loan id 和ABC ) | 彭千 | 数据部 | 马振
截止到2016.12.31的代偿统计表以及代偿后还款统计表 | 彭千 | 数据部 |
截止到2016.12.31的代偿统计表以及代偿后还款统计表 | 彭千 | 数据部 |
1~3月的放款,还款数据 | | | 王博,林芳,时耀
<hr>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment