Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
M
model_mvp
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
decision-science
model_mvp
Commits
a33add70
Commit
a33add70
authored
Apr 22, 2019
by
linfang.wang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
大数据查询
parent
45721de0
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
24 additions
and
3 deletions
+24
-3
allocator.py
mvp/allocator.py
+1
-1
dhb.py
mvp/dhb.py
+23
-2
No files found.
mvp/allocator.py
View file @
a33add70
...
...
@@ -47,7 +47,7 @@ if __name__ == '__main__':
# 'third_data_source#xy_pan_newqueryAorgAcount',
# 'third_data_source#xy_pan_newqueryAsumAcount'
# ]
dhb
=
dhb
.
dhb
()
dhb
=
dhb
.
dhb
(
start_time_period
=
'2019-01-19 11:00:00'
,
end_time_period
=
'2019-01-20 12:00:00'
)
df
=
dhb
.
dhb_features_extract
()
label
=
'target'
# df=pd.read_csv('test.csv')
...
...
mvp/dhb.py
View file @
a33add70
...
...
@@ -236,8 +236,19 @@ class dhb:
#print(self.sql.replace('@start_time_period',self.start_time_period).replace('@end_time_period',self.end_time_period))
# use risk_analysis to extract data
dhb_loan
=
pd
.
read_sql
(
self
.
sql
.
replace
(
'@start_time_period'
,
self
.
start_time_period
)
.
replace
(
'@end_time_period'
,
self
.
end_time_period
),
mysqldb
.
engine_risk_analysis
)
dhb_loan
[[
"dhb_overview_dun_first_call_time"
,
"dhb_overview_dun_last_call_time"
,
"dhb_overview_ntdun_first_call_time"
,
"dhb_overview_ntdun_last_call_time"
]]
=
dhb_loan
[[
"dhb_overview_dun_first_call_time"
,
"dhb_overview_dun_last_call_time"
,
"dhb_overview_ntdun_first_call_time"
,
"dhb_overview_ntdun_last_call_time"
]]
.
applymap
(
lambda
x
:
value_map
[
x
])
print
(
'-----get dhb features from risk_analysis---'
,
datetime
.
datetime
.
now
())
cnt
=
self
.
cnt_samples
()
print
(
'-----samples number is
%
d '
%
cnt
[
'cnt'
][
0
])
res
=
[]
tmp
=
pd
.
read_sql
(
self
.
sql
.
replace
(
'@start_time_period'
,
self
.
start_time_period
)
.
replace
(
'@end_time_period'
,
self
.
end_time_period
),
mysqldb
.
engine_risk_analysis
,
chunksize
=
10000
)
for
tt
in
tmp
:
res
.
append
(
tt
)
dhb_loan
=
pd
.
concat
(
res
)
cols
=
[
"dhb_overview_dun_first_call_time"
,
"dhb_overview_dun_last_call_time"
,
"dhb_overview_ntdun_first_call_time"
,
"dhb_overview_ntdun_last_call_time"
]
dhb_loan
[
cols
]
=
dhb_loan
[
cols
]
.
applymap
(
lambda
x
:
value_map
[
x
])
dhb_loan
.
loc
[
dhb_loan
.
dhb_last_60_and_90_days_ntdun_call_avg_duration
>=
42
,
"dhb_last_60_and_90_days_ntdun_call_avg_duration"
]
=
42
dhb_loan
.
loc
[
dhb_loan
.
dhb_overview_ntdun_call_duration_above60
>=
25
,
"dhb_overview_ntdun_call_duration_above60"
]
=
25
...
...
@@ -280,6 +291,16 @@ class dhb:
# df['bins'] = df.qcut(df['target'], q = 10, percision = 6, dupulicates='drop')
# df.groupby
# return 1
def
cnt_samples
(
self
):
sql
=
'''
select count(1) as cnt
from risk_analysis
where applied_at >= '
%
s' and applied_at < '
%
s'
and transacted = 1
and dhb_flag =1
and datediff(now(),deadline) > 15
'''
%
(
self
.
start_time_period
,
self
.
end_time_period
)
return
pd
.
read_sql
(
sql
,
mysqldb
.
engine_risk_analysis
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment