Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
M
model_mvp
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
decision-science
model_mvp
Commits
ccad6faa
Commit
ccad6faa
authored
Apr 22, 2019
by
linfang.wang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
加入业务数据
parent
a33add70
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
92 additions
and
0 deletions
+92
-0
mysqlquery.py
data/samples/mysqlquery.py
+14
-0
xyqbdata.py
data/samples/xyqbdata.py
+78
-0
No files found.
data/samples/mysqlquery.py
0 → 100644
View file @
ccad6faa
import
pandas
as
pd
def
query
(
sql
,
engine_sql
):
'''
查询大量数据
:param sql:
:param engine_sql:查询器
:return:dataframe
'''
res
=
[]
tmp
=
pd
.
read_sql
(
sql
,
engine_sql
,
chunksize
=
10000
)
for
tt
in
tmp
:
res
.
append
(
tt
)
return
pd
.
concat
(
res
)
\ No newline at end of file
data/samples/xyqbdata.py
0 → 100644
View file @
ccad6faa
import
pandas
as
pd
from
data.datasource.mysqldb
import
*
from
data.samples
import
mysqlquery
'''
目的:提供业务数据,包括 order_no,loan_id,用户类型【直接使用risk_info】,渠道,申请时间,拒绝原因,首逾天数,最大逾期天数,放款期数
'''
def
query_user_loan_type
(
order_nos
):
'''
:param order_nos:
:return:order_no,user_loan_type-- 策略使用的用户类型
'''
res
=
[]
sql
=
'''
select biz_no as order_no,
ifnull(JSON_EXTRACT(audit_context_data, '$.user_loan_type_v3'),JSON_EXTRACT(audit_context_data, '$.user_loan_type_v4')) as user_loan_type
from biz_audit_log_data
where biz_no in
%
s
'''
for
i
in
range
(
0
,
len
(
order_nos
),
1000
):
print
(
'----exe sql
%
d---- '
%
i
)
res
.
append
(
pd
.
read_sql
(
sql
%
str
(
tuple
(
order_nos
[
i
:
i
+
1000
])),
engine_risk
))
df
=
pd
.
concat
(
res
)
df
.
drop_duplicates
([
'order_no'
],
inplace
=
True
)
return
df
def
query_byloanid
(
loan_ids
):
'''
数据源为分析库
:param loan_ids:list 放款集
:return:order_no,user_id,loan_id,用户类型,渠道,申请时间,拒绝原因,首逾天数,最大逾期天数,放款期数
'''
sql
=
'''
select t1.loan_id,t1.user_id,t2.order_no,t2.contract_term,if(t2.term_no==1,t2.passdue_day,null) as passdue_day,
t1.applied_at,t1.applied_type,t1.applied_channel,if(t2.loan_id is not null,'已放款',if(t1.approval==1,'审核通过','审核未通过')) as reason,
max(t2.passdue_day) as max_passdue_day
from loan_application t1
left join loan_repay t2 on t1.loan_id=t2.loan_id and t2.repayment_status!=4
where t1.loan_id in
%
s
group by 1,2,3,4,5,6,7,8
'''
res
=
[]
for
i
in
range
(
0
,
len
(
loan_ids
),
1000
):
print
(
'----exe sql
%
d---- '
%
i
)
res
.
append
(
pd
.
read_sql
(
sql
%
str
(
tuple
(
loan_ids
[
i
:
i
+
1000
])),
engine_analysis_new
))
df
=
pd
.
concat
(
res
)
df
.
order_no
=
df
.
order_no
.
apply
(
lambda
x
:
x
.
decode
(
'utf8'
))
#== 剔除重复数据
df
.
sort_values
([
'loan_id'
],
ascending
=
True
,
inplace
=
True
)
df
.
drop_duplicates
([
'loan_id'
],
keep
=
'last'
,
inplace
=
True
)
return
df
def
query_by_orderno
(
order_nos
):
'''
数据源为分析库
:param loan_ids:list 放款集
:return:order_no,user_id,loan_id,用户类型,渠道,申请时间,拒绝原因,首逾天数,最大逾期天数,放款期数
'''
sql
=
'''
select t1.loan_id,t1.user_id,t2.order_no,t2.contract_term,if(t2.term_no==1,t2.passdue_day,null) as passdue_day,
t1.applied_at,t1.applied_type,t1.applied_channel,if(t2.loan_id is not null,'已放款',if(t1.approval==1,'审核通过','审核未通过')) as reason,
max(t2.passdue_day) as max_passdue_day
from loan_application t1
left join loan_repay t2 on t1.loan_id=t2.loan_id and t2.repayment_status!=4
where t1.order_no in
%
s
group by 1,2,3,4,5,6,7,8
'''
res
=
[]
for
i
in
range
(
0
,
len
(
order_nos
),
1000
):
print
(
'----exe sql
%
d---- '
%
i
)
res
.
append
(
pd
.
read_sql
(
sql
%
str
(
tuple
(
order_nos
[
i
:
i
+
1000
])),
engine_analysis_new
))
df
=
pd
.
concat
(
res
)
df
.
order_no
=
df
.
order_no
.
apply
(
lambda
x
:
x
.
decode
(
'utf8'
))
# == 剔除重复数据
df
.
sort_values
([
'loan_id'
],
ascending
=
True
,
inplace
=
True
)
df
.
drop_duplicates
([
'loan_id'
],
keep
=
'last'
,
inplace
=
True
)
return
df
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment