Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
D
duizhang_tools
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
data—王林芳
duizhang_tools
Commits
791b0400
Commit
791b0400
authored
Oct 26, 2017
by
zhen.ma
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
rt
parent
fe8f9a3f
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
62 additions
and
36 deletions
+62
-36
收入确认表-不同库版本.py
审计第一阶段/现金贷/收入确认表-不同库版本.py
+16
-6
收入确认表-读数据.py
审计第一阶段/现金贷/收入确认表-读数据.py
+17
-17
去哪儿收入确认表-模型.py
审计第一阶段/白条/去哪儿/去哪儿收入确认表-模型.py
+4
-4
去哪儿收入确认表.py
审计第一阶段/白条/去哪儿/去哪儿收入确认表.py
+9
-2
非去哪儿收入确认表-模型.py
审计第一阶段/白条/非去哪儿/非去哪儿收入确认表-模型.py
+4
-4
非去哪儿收入确认表.py
审计第一阶段/白条/非去哪儿/非去哪儿收入确认表.py
+12
-3
No files found.
审计第一阶段/现金贷/收入确认表-不同库版本.py
View file @
791b0400
...
...
@@ -25,6 +25,8 @@ pd.options.mode.chained_assignment = None
pd
.
set_option
(
'display.float_format'
,
lambda
x
:
'
%.5
f'
%
x
)
res_sum
=
[]
def
proc_save
(
watch_date
):
loan
=
df_loan_ratio
.
loc
[
df_loan_ratio
[
u'放款日期'
]
<
watch_date
]
loan
.
loc
[
~
loan
.
is_active
.
isin
([
1
,
-
2
]),
u'本金债权'
]
=
0
...
...
@@ -84,12 +86,16 @@ def proc_save(watch_date):
0
)
print
(
loan_result
[
[
u'本金债权'
,
u'计提风险金'
,
u'qg_one_time_service_fee'
,
u'实还服务费(减免后)'
,
u'趸交服务费'
,
u'实还服务费(减免后、含一次性服务费、趸交服务费)'
,
u'asset余额'
,
[
u'本金债权'
,
u'计提风险金'
,
u'qg_one_time_service_fee'
,
u'实还服务费(减免后)'
,
u'趸交服务费'
,
u'实还服务费(减免后、含一次性服务费、趸交服务费)'
,
u'asset余额'
,
u'收入确认金额'
]]
.
sum
())
tmp_sum
=
loan_result
[
[
u'本金债权'
,
u'计提风险金'
,
u'qg_one_time_service_fee'
,
u'实还服务费(减免后)'
,
u'趸交服务费'
,
u'实还服务费(减免后、含一次性服务费、趸交服务费)'
,
u'asset余额'
,
u'收入确认金额'
]]
.
sum
()
.
to_frame
()
.
T
# tmp_sum = loan_result.groupby(u'风险等级')[
# [u'本金债权', u'计提风险金', u'qg_one_time_service_fee', u'实还服务费(减免后)', u'趸交服务费', u'实还服务费(减免后、含一次性服务费、趸交服务费)',
# u'asset余额', u'收入确认金额']].agg('sum').reset_index()
tmp_sum
[
u'截止时间'
]
=
watch_date
res_sum
.
append
(
tmp_sum
)
...
...
@@ -107,7 +113,7 @@ def proc_save(watch_date):
if
__name__
==
'__main__'
:
step
=
10000
max_limit
=
900000
file_path
=
u'E:/审计出表/现金贷/
0926
'
file_path
=
u'E:/审计出表/现金贷/
1021
'
file_name
=
u'现金贷收入确认表'
if
not
os
.
path
.
exists
(
file_path
):
os
.
makedirs
(
file_path
)
...
...
@@ -129,9 +135,12 @@ if __name__ == '__main__':
df_ms
.
loan_id
=
df_ms
.
loan_id
.
astype
(
str
)
db_names
=
[
'all_back_0630'
,
'all_back_0731'
,
'all_back_0831'
,
'new_transaction'
]
date_report_list
=
[[
item
.
date
()
for
item
in
pd
.
date_range
(
start
=
'2017-02'
,
end
=
'2017-07'
,
freq
=
'MS'
)],
[
datetime
.
date
(
2017
,
8
,
1
)],
[
datetime
.
date
(
2017
,
9
,
1
)],
[
datetime
.
date
(
2017
,
10
,
1
)]]
date_report_list
=
[
[
datetime
.
date
(
2017
,
2
,
1
),
datetime
.
date
(
2017
,
3
,
1
),
datetime
.
date
(
2017
,
4
,
1
),
datetime
.
date
(
2017
,
5
,
1
),
datetime
.
date
(
2017
,
6
,
1
),
datetime
.
date
(
2017
,
7
,
1
)],
[
datetime
.
date
(
2017
,
8
,
1
)],
[
datetime
.
date
(
2017
,
9
,
1
)],
[
datetime
.
date
(
2017
,
10
,
1
)]]
# db_names = ['new_transaction',]
# date_report_list = [[datetime.date(2017, 10, 1)],]
for
i
in
xrange
(
len
(
db_names
)):
...
...
@@ -141,7 +150,8 @@ if __name__ == '__main__':
# #######################
# 查文件
df_loan
=
pd
.
read_csv
(
os
.
path
.
join
(
file_path
,
'all_loan_
%
s.csv'
%
db_name
),
encoding
=
'utf8'
,
low_memory
=
False
)
df_ref
=
pd
.
read_csv
(
os
.
path
.
join
(
file_path
,
'all_ref_
%
s.csv'
%
db_name
),
encoding
=
'utf8'
,
low_memory
=
False
)
df_ref
=
pd
.
read_csv
(
os
.
path
.
join
(
file_path
,
'all_ref_
%
s.txt'
%
db_name
),
sep
=
'
\t
'
,
low_memory
=
False
)
df_ref
.
columns
=
[
u'ref_id'
,
u'实还总额'
,
u'实还本金'
,
u'实还利息'
,
u'实还服务费(减免后)'
,
u'实还罚息'
,
u'其他收益'
,
u'repaid_at'
]
df_loan
[
u'放款日期'
]
=
pd
.
to_datetime
(
df_loan
[
u'放款日期'
])
.
dt
.
date
df_loan
.
loc
[
df_loan
[
u'风险等级'
]
==
'D'
,
u'风险等级'
]
=
'C'
...
...
审计第一阶段/现金贷/收入确认表-读数据.py
View file @
791b0400
...
...
@@ -25,9 +25,9 @@ step = 10000
max_limit
=
900000
pd
.
options
.
mode
.
chained_assignment
=
None
pd
.
set_option
(
'display.float_format'
,
lambda
x
:
'
%.5
f'
%
x
)
db_names
=
[
'a
ll_back_0630'
,
'all_back_0731'
,
'all_back_0831
'
]
end_dates
=
[
datetime
.
date
(
2017
,
7
,
1
)
,
datetime
.
date
(
2017
,
8
,
1
),
datetime
.
date
(
2017
,
9
,
1
)
]
file_path
=
u'E:/审计出表/现金贷/
0926
'
db_names
=
[
'a
udit_temp
'
]
end_dates
=
[
datetime
.
date
(
2017
,
7
,
1
)]
file_path
=
u'E:/审计出表/现金贷/
1021
'
if
not
os
.
path
.
exists
(
file_path
):
os
.
makedirs
(
file_path
)
...
...
@@ -66,7 +66,7 @@ SELECT
xrprrr.punish - xrprrr.mitigate_collection_relief 实还罚息,
xrprrr.remain_income 其他收益,
DATE(xrprrrrd.repaid_at) `repaid_at`
FROM
%
s.xjd_repay_plan_repay_record_ref xrprrr
FROM
%
s.xjd_repay_plan_repay_record_ref
_0630
xrprrr
JOIN test_mzh.xjd_repay_plan_repay_record_ref_repay_date_flow xrprrrrd ON xrprrr.id = xrprrrrd.xjd_ref_id
JOIN
%
s.user_repayment_plan urp ON urp.id = xrprrr.plan_id
WHERE xrprrrrd.repaid_at < '
%
s';
...
...
@@ -116,13 +116,13 @@ for i in xrange(0, len(db_names)):
end_date
=
end_dates
[
i
]
# ##############################################
# 查数据库,并保存文件,时间较长,服务器上在15分钟左右,本机25分钟左右
df_gen
=
pd
.
read_sql
(
sql
=
sql_loan_plan
%
(
db_name
,
end_date
,),
con
=
mysql
,
chunksize
=
step
)
res
=
[]
for
tmp
in
df_gen
:
res
.
append
(
tmp
)
df_loan
=
pd
.
concat
(
res
)
df_loan
[
'qg_one_time_service_fee'
]
.
fillna
(
0
,
inplace
=
True
)
df_loan
.
to_csv
(
os
.
path
.
join
(
file_path
,
'all_loan_
%
s.csv'
%
db_name
),
encoding
=
'utf8'
,
index
=
None
)
#
df_gen = pd.read_sql(sql=sql_loan_plan % (db_name, end_date,), con=mysql, chunksize=step)
#
res = []
#
for tmp in df_gen:
#
res.append(tmp)
#
df_loan = pd.concat(res)
#
df_loan['qg_one_time_service_fee'].fillna(0, inplace=True)
#
df_loan.to_csv(os.path.join(file_path, 'all_loan_%s.csv' % db_name), encoding='utf8', index=None)
df_gen
=
pd
.
read_sql
(
sql
=
sql_ref
%
(
db_name
,
db_name
,
end_date
,),
con
=
mysql
,
chunksize
=
step
)
res
=
[]
...
...
@@ -131,9 +131,9 @@ for i in xrange(0, len(db_names)):
df_ref
=
pd
.
concat
(
res
)
df_ref
.
to_csv
(
os
.
path
.
join
(
file_path
,
'all_ref_
%
s.csv'
%
db_name
),
encoding
=
'utf8'
,
index
=
None
)
df_ms
=
pd
.
read_sql
(
sql
=
sql_ms
,
con
=
mysql
)
df_ms
.
to_csv
(
os
.
path
.
join
(
file_path
,
'ms.csv'
),
encoding
=
'utf8'
,
index
=
None
)
df_yghs
=
pd
.
read_sql
(
sql
=
sql_yghs
,
con
=
mysql
)
df_yghs
.
to_csv
(
os
.
path
.
join
(
file_path
,
'yghs.csv'
),
encoding
=
'utf8'
,
index
=
None
)
df_dunjiao
=
pd
.
read_sql
(
sql
=
sql_dunjiao
,
con
=
mysql
)
df_dunjiao
.
to_csv
(
os
.
path
.
join
(
file_path
,
'dunjiao.csv'
),
encoding
=
'utf8'
,
index
=
None
)
#
df_ms = pd.read_sql(sql=sql_ms, con=mysql)
#
df_ms.to_csv(os.path.join(file_path, 'ms.csv'), encoding='utf8', index=None)
#
df_yghs = pd.read_sql(sql=sql_yghs, con=mysql)
#
df_yghs.to_csv(os.path.join(file_path, 'yghs.csv'), encoding='utf8', index=None)
#
df_dunjiao = pd.read_sql(sql=sql_dunjiao, con=mysql)
#
df_dunjiao.to_csv(os.path.join(file_path, 'dunjiao.csv'), encoding='utf8', index=None)
审计第一阶段/白条/去哪儿/去哪儿收入确认表-模型.py
View file @
791b0400
...
...
@@ -100,7 +100,7 @@ def cal_debt_age(x):
df_res
[
'账龄'
]
=
df_res
[[
'放款时间'
,
'应还款日'
]]
.
apply
(
lambda
x
:
cal_debt_age
(
x
),
axis
=
1
)
asset_remain
=
df_res
.
loc
[
df_res
.
assets_remain
>
0
]
asset_remain
=
df_res
.
loc
[
df_res
.
assets_remain
!=
0
]
asset_remain
[[
'产品编号'
,
'风险等级'
,
'放款时间'
,
'应还服务费'
,
'assets_remain'
]]
.
to_excel
(
os
.
path
.
join
(
file_path
,
'去哪儿asset_remain.xlsx'
),
index
=
None
)
...
...
@@ -111,12 +111,12 @@ gp1.to_excel(os.path.join(file_path, '去哪儿汇总(期数-应还年月-风
gp2
=
df_res
.
groupby
([
'账龄'
,
'应还年月'
,
'风险等级'
])[
'assets'
]
.
agg
(
'sum'
)
.
reset_index
()
gp2
.
to_excel
(
os
.
path
.
join
(
file_path
,
'去哪儿汇总(账龄-应还年月-风险等级).xlsx'
),
index
=
None
)
#
df_res.to_csv(os.path.join(file_path, '去哪儿assets减值-details.csv'), index=None, encoding='gbk')
df_res
.
to_csv
(
os
.
path
.
join
(
file_path
,
'去哪儿assets减值-details.csv'
),
index
=
None
,
encoding
=
'gbk'
)
#
# max_limit = 900000
# if len(df_res) > max_limit:
# for i in xrange(0, len(df_res), max_limit):
# df_res[i:i + max_limit].to_csv(os.path.join(file_path, '去哪儿assets减值-details_%d.csv' % (i / max_limit + 1)), index=None,
# encoding='utf8')
#
df_res.loc[df_res['产品编号'].duplicated(), '放款金额'] = 0
#
print('{}'.format(df_res['放款金额'].sum()))
df_res
.
loc
[
df_res
[
'产品编号'
]
.
duplicated
(),
'放款金额'
]
=
0
print
(
'{}'
.
format
(
df_res
[
'放款金额'
]
.
sum
()))
审计第一阶段/白条/去哪儿/去哪儿收入确认表.py
View file @
791b0400
...
...
@@ -90,7 +90,7 @@ df_loan['计提风险金'] = 0
view_dates
=
[
datetime
.
date
(
2017
,
9
,
1
),
datetime
.
date
(
2017
,
10
,
1
)]
file_path
=
'E:/审计出表/白条/收入确认表'
res_sum
=
[]
for
v_date
in
view_dates
:
last_day
=
v_date
+
datetime
.
timedelta
(
days
=-
1
)
df_loan_v
=
df_loan
.
loc
[
df_loan
[
'放款时间'
]
<
v_date
]
...
...
@@ -141,7 +141,13 @@ for v_date in view_dates:
df_loan_v
.
drop
([
'减值'
,
'退款服务费'
],
axis
=
1
,
inplace
=
True
)
df_loan_v
[
'放款时间'
]
=
df_loan_v
[
'放款时间'
]
.
dt
.
date
print
(
df_loan_v
[[
'放款金额'
,
'计提风险金'
,
'实还服务费'
,
'assets'
,
'收入'
]]
.
sum
())
print
(
df_loan_v
.
groupby
([
'风险等级'
])
.
agg
(
'sum'
))
# tmp_sum = df_loan_v.groupby(u'风险等级')[
# [u'放款金额', u'计提风险金', u'实还服务费', u'assets', u'收入']].agg('sum').reset_index()
tmp_sum
=
df_loan_v
[[
u'放款金额'
,
u'计提风险金'
,
u'实还服务费'
,
u'assets'
,
u'收入'
]]
.
agg
(
'sum'
)
.
to_frame
()
.
T
tmp_sum
[
u'截止时间'
]
=
v_date
res_sum
.
append
(
tmp_sum
)
df_loan_v
.
to_csv
(
os
.
path
.
join
(
file_path
,
'白条去哪儿收入确认表_{}_{:0>2}.csv'
.
format
(
v_date
.
year
,
v_date
.
month
-
1
)),
index
=
None
,
encoding
=
'gbk'
)
df_sum
=
pd
.
concat
(
res_sum
)
df_sum
.
to_excel
(
os
.
path
.
join
(
file_path
,
'白条去哪儿汇总.xlsx'
),
index
=
None
)
\ No newline at end of file
审计第一阶段/白条/非去哪儿/非去哪儿收入确认表-模型.py
View file @
791b0400
...
...
@@ -149,9 +149,9 @@ def cal_debt_age(x):
df_res
[
'账龄'
]
=
df_res
[[
'放款时间'
,
'应还日'
]]
.
apply
(
lambda
x
:
cal_debt_age
(
x
),
axis
=
1
)
# asset_remain = df_res.loc[df_res.assets_remain >
0]
#
asset_remain[['产品编号', '风险等级', '放款时间', '应还服务费', 'assets_remain']].to_excel(os.path.join(file_path, '非去哪儿asset_remain.xlsx'),
#
index=None)
asset_remain
=
df_res
.
loc
[
df_res
.
assets_remain
!=
0
]
asset_remain
[[
'产品编号'
,
'风险等级'
,
'放款时间'
,
'应还服务费'
,
'assets_remain'
]]
.
to_excel
(
os
.
path
.
join
(
file_path
,
'非去哪儿asset_remain.xlsx'
),
index
=
None
)
df_res
[
'应还年月'
]
=
df_res
[
'应还日'
]
.
apply
(
lambda
x
:
x
.
strftime
(
'
%
Y-
%
m'
))
gp1
=
df_res
.
groupby
([
'当前期数'
,
'应还年月'
,
'风险等级'
])[
'assets'
]
.
agg
(
'sum'
)
.
reset_index
()
...
...
@@ -160,7 +160,7 @@ gp1.to_excel(os.path.join(file_path, '非去哪儿汇总(期数-应还年月-
gp2
=
df_res
.
groupby
([
'账龄'
,
'应还年月'
,
'风险等级'
])[
'assets'
]
.
agg
(
'sum'
)
.
reset_index
()
gp2
.
to_excel
(
os
.
path
.
join
(
file_path
,
'非去哪儿汇总(账龄-应还年月-风险等级).xlsx'
),
index
=
None
)
#
#
df_res.to_csv(os.path.join(file_path, '非去哪儿assets减值-details.csv'), index=None, encoding='gbk')
df_res
.
to_csv
(
os
.
path
.
join
(
file_path
,
'非去哪儿assets减值-details.csv'
),
index
=
None
,
encoding
=
'gbk'
)
#
# max_limit = 900000
# for i in xrange(0, len(df_res), max_limit):
...
...
审计第一阶段/白条/非去哪儿/非去哪儿收入确认表.py
View file @
791b0400
...
...
@@ -102,6 +102,7 @@ print(df_repay.groupby(df_repay['还款时间'].dt.month)['实还服务费'].agg
view_dates
=
[
datetime
.
date
(
2017
,
9
,
1
),
datetime
.
date
(
2017
,
10
,
1
)]
file_path
=
'E:/审计出表/白条/收入确认表'
res_sum
=
[]
for
v_date
in
view_dates
:
df_loan_v
=
df_loan
.
loc
[
df_loan
[
'放款时间'
]
<
v_date
]
df_repay_v
=
df_repay
.
loc
[
df_repay
[
'还款时间'
]
<
v_date
]
...
...
@@ -132,8 +133,17 @@ for v_date in view_dates:
df_loan_v
.
drop
([
'ref_id'
,
'减值'
],
axis
=
1
,
inplace
=
True
)
df_loan_v
[
'放款时间'
]
=
df_loan_v
[
'放款时间'
]
.
dt
.
date
print
(
df_loan_v
[[
'放款金额'
,
'计提风险金'
,
'实还服务费'
,
'assets'
,
'收入'
]]
.
sum
())
# df_loan_v.to_csv(os.path.join(file_path, '白条收入确认表_{}_{:0>2}.csv'.format(v_date.year, v_date.month - 1)), index=None, encoding='gbk')
# tmp_sum = df_loan_v.groupby(u'风险等级')[
# [u'放款金额', u'计提风险金', u'实还服务费', u'assets', u'收入']].agg('sum').reset_index()
tmp_sum
=
df_loan_v
[[
u'放款金额'
,
u'计提风险金'
,
u'实还服务费'
,
u'assets'
,
u'收入'
]]
.
agg
(
'sum'
)
.
to_frame
()
.
T
tmp_sum
[
u'截止时间'
]
=
v_date
res_sum
.
append
(
tmp_sum
)
df_loan_v
.
to_csv
(
os
.
path
.
join
(
file_path
,
'白条收入确认表_{}_{:0>2}.csv'
.
format
(
v_date
.
year
,
v_date
.
month
-
1
)),
index
=
None
,
encoding
=
'gbk'
)
df_sum
=
pd
.
concat
(
res_sum
)
df_sum
.
to_excel
(
os
.
path
.
join
(
file_path
,
'白条非去哪儿汇总.xlsx'
),
index
=
None
)
# 98108, 98108
# ()
...
...
@@ -144,4 +154,4 @@ for v_date in view_dates:
# 0 8 8561.64000
# 1 9 559489.81000
# 有还款,没有放款的len: 0,ref_id: ()
# 有还款,没有放款的len: 0,ref_id: ()
\ No newline at end of file
# 有还款,没有放款的len: 0,ref_id: ()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment