Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
D
duizhang_tools
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
data—王林芳
duizhang_tools
Commits
606f16a1
Commit
606f16a1
authored
Jul 28, 2017
by
白条—徐加哲
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
格式化 跑2015, 2016年 白条未还本金, 比较差异.
parent
d3c14359
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
179 additions
and
80 deletions
+179
-80
compare_diff.xlsx
audit_sort/linfang/compare_diff.xlsx
+0
-0
excel_compare_tool.py
audit_sort/linfang/excel_compare_tool.py
+68
-0
readme.md
audit_sort/linfang/readme.md
+5
-2
未还本金_2016_new.xlsx
audit_sort/linfang/未还本金_2016_new.xlsx
+0
-0
白条未还本金-linfang.py
audit_sort/linfang/白条未还本金-linfang.py
+103
-76
readme.md
audit_sort/readme.md
+3
-2
No files found.
audit_sort/linfang/compare_diff.xlsx
0 → 100644
View file @
606f16a1
File added
audit_sort/linfang/excel_compare_tool.py
0 → 100644
View file @
606f16a1
# -*- coding: utf-8 -*-
#
# Created by xujiazhe on 2017/7/28.
#
import
sys
import
pandas
as
pd
reload
(
sys
)
sys
.
setdefaultencoding
(
'utf8'
)
"""
对比 大的excel的结果
事先列对好
按照行, 或者结果的对比
跑完之后, 原来结果拉进来直接读
按照关键列排序
"""
old_path
=
'/opt/duizhang_tools/audit_sort/linfang/未还本金_2016---非去哪儿最新.xlsx'
new_path
=
'/Users/xujiazhe/Desktop/data/未还本金_2016.xlsx'
old_df
=
pd
.
read_excel
(
old_path
)
new_df
=
pd
.
read_excel
(
new_path
)
# 以后源程序 跑完直接对比 读大excel费时间
old_df
=
old_df
.
ix
[
~
old_df
[
u'订单号'
]
.
isnull
()]
diff_order_nos
=
set
(
old_df
[
u'订单号'
]
.
unique
())
^
set
(
new_df
[
u'订单号'
]
.
unique
())
old_df
=
old_df
.
sort_values
(
by
=
[
u'订单号'
])
new_df
=
new_df
.
sort_values
(
by
=
[
u'订单号'
])
key_columns
=
u'订单号'
gp1
=
old_df
.
groupby
(
key_columns
)[
key_columns
]
.
agg
([
'count'
])
.
reset_index
()
gp2
=
new_df
.
groupby
(
key_columns
)[
key_columns
]
.
agg
([
'count'
])
.
reset_index
()
a
=
gp2
[
'count'
]
.
max
()
len
(
gp2
)
==
len
(
new_df
)
old_add
=
old_df
.
ix
[
old_df
[
u'订单号'
]
.
isin
(
diff_order_nos
)]
new_add
=
new_df
.
ix
[
new_df
[
u'订单号'
]
.
isin
(
diff_order_nos
)]
old_df
=
old_df
.
ix
[
~
old_df
[
u'订单号'
]
.
isin
(
diff_order_nos
)]
new_df
=
new_df
.
ix
[
~
new_df
[
u'订单号'
]
.
isin
(
diff_order_nos
)]
df
=
old_df
.
merge
(
new_df
,
on
=
u'订单号'
)
con_same
=
\
(
df
[
u'资金方_x'
]
==
df
[
u'资金方_y'
])
&
\
(
df
[
u'商户_x'
]
==
df
[
u'商户_y'
])
&
\
(
df
[
u'订单金额_x'
]
==
df
[
u'订单金额_y'
])
&
\
(
df
[
u'合同期数_x'
]
==
df
[
u'合同期数_y'
])
&
\
(
df
[
u'放款时间_x'
]
==
df
[
u'放款时间_y'
])
&
\
(
df
[
u'未还本金_x'
]
==
df
[
u'未还本金_y'
])
cmp_diff_df
=
df
[
~
con_same
]
old_diff
=
old_df
.
ix
[
old_df
[
u'订单号'
]
.
isin
(
cmp_diff_df
[
u'订单号'
])]
new_diff
=
new_df
.
ix
[
new_df
[
u'订单号'
]
.
isin
(
cmp_diff_df
[
u'订单号'
])]
diff_res_df
=
pd
.
concat
([
old_diff
,
new_diff
,
old_add
,
new_add
])
diff_res_path
=
'/Users/xujiazhe/Desktop/data/compare.xlsx'
diff_res_df
.
to_excel
(
diff_res_path
,
index
=
None
)
audit_sort/linfang/readme.md
View file @
606f16a1
...
...
@@ -23,8 +23,11 @@
#### 白条借款人明细表,按loan id列示
*
未还本金_2016---非去哪儿最新.xlsx
*
未还本金_2015---非去哪儿最新.xlsx (代码为2015年的参数,2016年数据需要修改下参数)
-
白条未还本金-linfang.py
*
未还本金_2015---非去哪儿最新.xlsx
-
白条未还本金-linfang.py (代码为2015年的参数,2016年数据需要修改下参数)
+
2015年OK
+
未还本金_2016_new.xlsx 结果差异 compare_diff.xlsx
*
2015_去哪儿未还本金.xlsx
*
2016_去哪儿未还本金.xlsx
-
0628在贷明细_upload.ipynb
...
...
audit_sort/linfang/未还本金_2016_new.xlsx
0 → 100644
View file @
606f16a1
File added
audit_sort/linfang/白条未还本金-linfang.py
View file @
606f16a1
...
...
@@ -7,10 +7,10 @@ import numpy as np
import
pandas
as
pd
from
sqlalchemy
import
create_engine
from
dateutil.relativedelta
import
relativedelta
reload
(
sys
)
sys
.
setdefaultencoding
(
'utf8'
)
'''
目标:根据时间点,以 订单为维度,计算未还本金 即 放款本金 - 已还本金
@author linfang
...
...
@@ -19,88 +19,115 @@ sys.setdefaultencoding('utf8')
注意:订单统一为成功订单(is_active = 1)
'''
#----------------配置变量项开始--------------------------
# ----------------配置变量项开始--------------------------
# TODO 文件输出路劲
path
=
'/Users/xujiazhe/Desktop/data/'
# 输出文件名
file_name
=
u'未还本金_2016'
# TODO 还款时间开始 --- 还款时间结束
start_time
=
'2016-01-01 00:00:00'
end_time
=
'2017-01-01 00:00:00'
#TODO 文件输出路劲
path
=
'E:/'
#输出文件名
file_name
=
u'未还本金_2015'
#TODO 还款时间开始 --- 还款时间结束
start_time
=
'2015-01-01 00:00:00'
end_time
=
'2016-01-01 00:00:00'
#TODO 放款时间开始 --- 放款时间结束
fk_start_time
=
'2015-01-01 00:00:00'
fk_end_time
=
'2016-01-01 00:00:00'
# TODO 放款时间开始 --- 放款时间结束
fk_start_time
=
'2016-01-01 00:00:00'
fk_end_time
=
'2017-01-01 00:00:00'
#TODO 数据源
#
TODO 数据源
engine_new_transaction
=
create_engine
(
'mysql+mysqldb://
yulong_rw:TouBStYwN8wkdxVt
@172.16.3.201:3306/new_transaction?charset=utf8'
,
'mysql+mysqldb://
internal_r:ArbNgtvlJzZHXsEu
@172.16.3.201:3306/new_transaction?charset=utf8'
,
echo
=
True
)
#----------------配置变量项结束--------------------------
#
----------------配置变量项结束--------------------------
read_merchant_sql
=
'''SELECT merchant_id,merchant_name FROM baitiao_audit.merchant'''
df_merchant
=
pd
.
read_sql
(
read_merchant_sql
,
con
=
engine_new_transaction
)
df_merchant
[
'merchant_id'
]
=
df_merchant
[
'merchant_id'
]
.
astype
(
int
)
read_repay_sql
=
'''
SELECT id,`approach_name` FROM new_transaction.`repay_channel`
read_merchant_sql
=
'''SELECT merchant_id,merchant_name FROM baitiao_audit.merchant'''
df_merchant
=
pd
.
read_sql
(
read_merchant_sql
,
con
=
engine_new_transaction
)
df_merchant
[
'merchant_id'
]
=
df_merchant
[
'merchant_id'
]
.
astype
(
int
)
read_repay_sql
=
'''
SELECT id,`approach_name` FROM new_transaction.`repay_channel`
'''
df_approach
=
pd
.
read_sql
(
read_repay_sql
,
engine_new_transaction
)
df_approach
[
'id'
]
=
df_approach
[
'id'
]
.
astype
(
int
)
df_approach
=
pd
.
read_sql
(
read_repay_sql
,
engine_new_transaction
)
df_approach
[
'id'
]
=
df_approach
[
'id'
]
.
astype
(
int
)
read_fund_sql
=
'''
SELECT id,fund_name FROM baitiao_audit.`fund_corp`
read_fund_sql
=
'''
SELECT id,fund_name FROM baitiao_audit.`fund_corp`
'''
df_fund
=
pd
.
read_sql
(
read_fund_sql
,
engine_new_transaction
)
df_fund
[
'id'
]
=
df_fund
[
'id'
]
.
astype
(
int
)
def
fk
(
start_time
,
end_time
):
sql_1
=
'''
SELECT t4.ref_id,t4.funding_corp_id,t4.`merchantId`,t4.`order_no`,t4.`real_loan_amount`,t4.`contract_term`,t4.`loan_paid_at`
FROM baitiao_audit.`baitiao_order` t4
WHERE t4.real_loan_amount > 0 AND t4.`is_active` = 1 AND t4.loan_paid_at >= '
%
s' and t4.loan_paid_at < '
%
s'
'''
%
(
start_time
,
end_time
)
return
pd
.
read_sql
(
sql_1
,
engine_new_transaction
)
def
hk
(
start_time
,
end_time
):
sql_2_1
=
'''
SELECT t2.ref_id,SUM(t1.principle-t1.mitigate_principle) real_principle
FROM `baitiao_repay_plan_repay_record_ref` t1
JOIN `offline_alipay_record` t3 ON t1.`record_id` = t3.id AND t1.`repay_channel` = 13
JOIN `user_bt_repayment_plan` t2 ON t1.plan_id = t2.id
JOIN baitiao_audit.`baitiao_order` t4 ON t2.ref_id = t4.ref_id AND t4.`is_active` = 1
WHERE t3.transfer_time >= '
%
s' and t3.transfer_time < '
%
s'
GROUP BY t2.ref_id
'''
%
(
start_time
,
end_time
)
sql_2_2
=
'''
SELECT t2.ref_id,SUM(t1.principle-t1.mitigate_principle) real_principle
FROM `baitiao_repay_plan_repay_record_ref` t1
JOIN `repay_record_online` t3 ON t1.`record_id` = t3.id AND t1.`repay_channel` < 13
JOIN `user_bt_repayment_plan` t2 ON t1.plan_id = t2.id
JOIN baitiao_audit.`baitiao_order` t4 ON t2.ref_id = t4.ref_id AND t4.`is_active` = 1
WHERE t3.bill_time >= '
%
s' and t3.bill_time < '
%
s'
GROUP BY t2.ref_id
'''
%
(
start_time
,
end_time
)
sql_2_3
=
'''
SELECT t2.ref_id,SUM(t1.principle-t1.mitigate_principle) real_principle
FROM `baitiao_repay_plan_repay_record_ref` t1
JOIN `offline_bank_repay_record` t3 ON t1.`record_id` = t3.id AND t1.`repay_channel` in (14,15,16)
JOIN `user_bt_repayment_plan` t2 ON t1.plan_id = t2.id
JOIN baitiao_audit.`baitiao_order` t4 ON t2.ref_id = t4.ref_id AND t4.`is_active` = 1
WHERE t3.transfer_time >= '
%
s' and t3.transfer_time < '
%
s'
GROUP BY t2.ref_id
'''
%
(
start_time
,
end_time
)
df_2_1
=
pd
.
read_sql
(
sql_2_1
,
engine_new_transaction
)
df_2_2
=
pd
.
read_sql
(
sql_2_2
,
engine_new_transaction
)
df_2_3
=
pd
.
read_sql
(
sql_2_3
,
engine_new_transaction
)
return
pd
.
concat
([
df_2_1
,
df_2_2
,
df_2_3
],
axis
=
0
,
ignore_index
=
True
)
def
tj
(
df_fk
,
df_hk
,
filename
):
df_fund
=
pd
.
read_sql
(
read_fund_sql
,
engine_new_transaction
)
df_fund
[
'id'
]
=
df_fund
[
'id'
]
.
astype
(
int
)
def
fk
(
start_time
,
end_time
):
sql_1
=
'''
SELECT
bt_o.ref_id,
bt_o.funding_corp_id,
bt_o.`merchantId`,
bt_o.`order_no`,
bt_o.`real_loan_amount`,
bt_o.`contract_term`,
bt_o.`loan_paid_at`
FROM baitiao_audit.`baitiao_order` bt_o
WHERE bt_o.real_loan_amount > 0 AND bt_o.`is_active` = 1 AND bt_o.loan_paid_at >= '
%
s' and
bt_o.loan_paid_at < '
%
s'
'''
%
(
start_time
,
end_time
)
return
pd
.
read_sql
(
sql_1
,
engine_new_transaction
)
def
hk
(
start_time
,
end_time
):
sql_2_1
=
'''
SELECT
ubrp.ref_id,
SUM(bt_rp_rrr.principle - bt_rp_rrr.mitigate_principle) real_principle
FROM `baitiao_repay_plan_repay_record_ref` bt_rp_rrr
JOIN `offline_alipay_record` oar
ON bt_rp_rrr.`record_id` = oar.id AND bt_rp_rrr.`repay_channel` = 13
JOIN `user_bt_repayment_plan` ubrp
ON bt_rp_rrr.plan_id = ubrp.id
JOIN baitiao_audit.`baitiao_order` bt_o
ON ubrp.ref_id = bt_o.ref_id AND bt_o.`is_active` = 1
WHERE oar.transfer_time >= '
%
s' and oar.transfer_time < '
%
s'
GROUP BY ubrp.ref_id
'''
%
(
start_time
,
end_time
)
sql_2_2
=
'''
SELECT
ubrp.ref_id,
SUM(bt_rp_rrr.principle - bt_rp_rrr.mitigate_principle) real_principle
FROM `baitiao_repay_plan_repay_record_ref` bt_rp_rrr
JOIN `repay_record_online` rro
ON bt_rp_rrr.`record_id` = rro.id AND bt_rp_rrr.`repay_channel` < 13
JOIN `user_bt_repayment_plan` ubrp
ON bt_rp_rrr.plan_id = ubrp.id
JOIN baitiao_audit.`baitiao_order` bt_o
ON ubrp.ref_id = bt_o.ref_id AND bt_o.`is_active` = 1
WHERE rro.bill_time >= '
%
s' and rro.bill_time < '
%
s'
GROUP BY ubrp.ref_id
'''
%
(
start_time
,
end_time
)
sql_2_3
=
'''
SELECT
ubrp.ref_id,
SUM(bt_rp_rrr.principle - bt_rp_rrr.mitigate_principle) real_principle
FROM `baitiao_repay_plan_repay_record_ref` bt_rp_rrr
JOIN `offline_bank_repay_record` obrr
ON bt_rp_rrr.`record_id` = obrr.id AND bt_rp_rrr.`repay_channel` in (14, 15, 16)
JOIN `user_bt_repayment_plan` ubrp
ON bt_rp_rrr.plan_id = ubrp.id
JOIN baitiao_audit.`baitiao_order` bt_o
ON ubrp.ref_id = bt_o.ref_id AND bt_o.`is_active` = 1
WHERE obrr.transfer_time >= '
%
s' and obrr.transfer_time < '
%
s'
GROUP BY ubrp.ref_id
'''
%
(
start_time
,
end_time
)
df_2_1
=
pd
.
read_sql
(
sql_2_1
,
engine_new_transaction
)
df_2_2
=
pd
.
read_sql
(
sql_2_2
,
engine_new_transaction
)
df_2_3
=
pd
.
read_sql
(
sql_2_3
,
engine_new_transaction
)
return
pd
.
concat
([
df_2_1
,
df_2_2
,
df_2_3
],
axis
=
0
,
ignore_index
=
True
)
def
tj
(
df_fk
,
df_hk
,
filename
):
df_2
=
df_hk
.
groupby
(
by
=
'ref_id'
)[
'real_principle'
]
.
agg
({
'sum'
})
.
reset_index
()
.
rename
(
columns
=
{
'sum'
:
'principle_sum'
})
df_2
[
'ref_id'
]
=
df_2
[
'ref_id'
]
.
astype
(
int
)
...
...
@@ -117,15 +144,15 @@ def tj(df_fk,df_hk,filename):
'contract_term'
:
'合同期数'
,
'loan_paid_at'
:
'放款时间'
,
'diff'
:
'未还本金'
},
inplace
=
True
)
df
=
df
[[
'资金方'
,
'商户'
,
'订单号'
,
'订单金额'
,
'合同期数'
,
'放款时间'
,
'未还本金'
]]
df
.
to_excel
(
path
+
filename
+
'.xlsx'
,
columns
=
df
.
columns
,
index
=
None
,
encoding
=
'utf8'
)
df
.
to_excel
(
path
+
filename
+
'.xlsx'
,
columns
=
df
.
columns
,
index
=
None
,
encoding
=
'utf8'
)
if
__name__
==
'__main__'
:
pool
=
mtp
.
Pool
(
processes
=
2
)
df_fk
=
pool
.
apply_async
(
fk
,(
fk_start_time
,
fk_end_time
))
df_hk
=
pool
.
apply_async
(
hk
,(
start_time
,
end_time
))
df_fk
=
pool
.
apply_async
(
fk
,
(
fk_start_time
,
fk_end_time
))
df_hk
=
pool
.
apply_async
(
hk
,
(
start_time
,
end_time
))
pool
.
close
()
pool
.
join
()
tj
(
df_fk
.
_value
,
df_hk
.
_value
,
file_name
)
tj
(
df_fk
.
_value
,
df_hk
.
_value
,
file_name
)
print
'======================main done==================================='
audit_sort/readme.md
View file @
606f16a1
...
...
@@ -8,9 +8,10 @@
>> 1. 整理审计数据,将之前的需求都找到,然后拿到代码
>> 2. 跑一遍,检查哪个数据库可以匹配,并且能跑出正确结果,记录下来
>> 3. 目标是 当时 跑出来的结果, 现在找 对应的库, 把能跑出和原结果一样的库 标记为 输入库.
具体工作 | 财务对接人 | | 负责人
具体工作 | 财务对接人 | | 负责人
------- | ------- | ----- | ----------
内控库业务数据提取 | 彭千 | 数据部 | 玉龙
现金贷2015和2016年的放款表更新 | 彭千 | 数据部 | 玉龙
...
...
@@ -29,7 +30,7 @@
现金贷借款人明细期后收款情况,按loan id列示 | 彭千 | 数据部 | 马振
白条借款人明细期后收款情况,按loan id列示 | 彭千 | 数据部 | 德宇、林芳
现场查看ABC放款表明细SQL(带loan id 和ABC ) | 彭千 | 数据部 | 马振
截止到2016.12.31的代偿统计表以及代偿后还款统计表 | 彭千 | 数据部 |
截止到2016.12.31的代偿统计表以及代偿后还款统计表 | 彭千 | 数据部 |
1~3月的放款,还款数据 | | | 王博,林芳,时耀
<hr>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment