Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
M
model_mvp
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
decision-science
model_mvp
Commits
dda80e02
Commit
dda80e02
authored
Apr 22, 2019
by
linfang.wang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
dhb 待调试
parent
a132f117
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
39 additions
and
54 deletions
+39
-54
matplot.py
data/graph/matplot.py
+0
-8
dhb.py
mvp/dhb.py
+39
-46
No files found.
data/graph/matplot.py
View file @
dda80e02
...
...
@@ -103,14 +103,6 @@ def density_chart(dataset,title):
plt
.
title
(
title
)
plt
.
show
()
def
learning_curve
():
def
pdp_chart
():
return
1
def
uniVarChart
():
return
1
...
...
mvp/dhb.py
View file @
dda80e02
import
pandas
as
pd
import
mysqldb
from
data.datasource
import
mysqldb
,
mongodb
import
datetime
import
dateutil
'''
model instructions : established a dhb obj which cotains attrubutes of dhb model
...
...
@@ -176,24 +177,11 @@ class dhb():
'dhb_overview_ntdun_call_total_duration'
,
'dhb_overview_ntdun_call_total_times'
,
'dhb_overview_ntdun_first_call_time'
]
'''
instructions :
build a constructor of dhb
Params:
'''
# ime period set as default
start_time_period
=
(
datetime
.
date
.
today
()
-
relativedelta
(
months
=+
7
))
.
strftime
(
"
%
Y-
%
m-
%
d 00:00:00"
)
end_time_period
=
(
datetime
.
date
.
today
()
-
relativedelta
(
days
=+
16
))
.
strftime
(
"
%
Y-
%
m-
%
d 00:00:00"
)
def
__init__
(
self
,
overdue_days
=
15
,
features
=
None
,
sql
=
None
,
start_time_period
=
None
,
end_time_period
=
None
):
try
:
if
features
!=
None
:
self
.
features
=
features
self
.
features
=
features
if
sql
!=
None
:
self
.
sql
=
sql
else
:
...
...
@@ -207,41 +195,46 @@ Params:
if
start_time_period
!=
None
:
self
.
start_time_period
=
start_time_period
else
:
self
.
start_time_period
=
(
datetime
.
date
.
today
()
-
dateutil
.
relativedelta
(
months
=+
7
))
.
strftime
(
"
%
Y-
%
m-
%
d 00:00:00"
)
if
end_time_period
!=
None
:
self
.
end_time_period
=
end_time_period
else
:
self
.
end_time_period
=
(
datetime
.
date
.
today
()
-
dateutil
.
relativedelta
(
days
=+
16
))
.
strftime
(
"
%
Y-
%
m-
%
d 00:00:00"
)
except
Exception
as
e
:
print
(
'Parameters Error:
\n
'
,
e
)
'''
instrucions : extract dhb features from risk_analysis
Params : nothing yet
returns : dhb features
'''
def
dhb_features_extract
(
self
):
'''
instrucions : extract dhb features from risk_analysis
:param self:
:return: dhb features
'''
value_map
=
{
"近3天"
:
1
,
"近4-5天"
:
2
,
"近6-7天"
:
3
,
"近8-15天"
:
4
,
"近16-30天"
:
5
,
"近31-60天"
:
6
,
"近61-90天"
:
7
,
"近91-120天"
:
8
,
"近121-150天"
:
9
,
"近151-180天"
:
10
,
"180天前"
:
11
,
"无"
:
0
}
#print(self.sql.replace('@start_time_period',self.start_time_period).replace('@end_time_period',self.end_time_period))
"近3天"
:
1
,
"近4-5天"
:
2
,
"近6-7天"
:
3
,
"近8-15天"
:
4
,
"近16-30天"
:
5
,
"近31-60天"
:
6
,
"近61-90天"
:
7
,
"近91-120天"
:
8
,
"近121-150天"
:
9
,
"近151-180天"
:
10
,
"180天前"
:
11
,
"无"
:
0
}
#print(self.sql.replace('@start_time_period',self.start_time_period).replace('@end_time_period',self.end_time_period))
# use risk_analysis to extract data
dhb_loan
=
query_sql
(
self
.
sql
.
replace
(
'@start_time_period'
,
self
.
start_time_period
)
.
replace
(
'@end_time_period'
,
self
.
end_time_period
))
# dhb_loan[["dhb_overview_dun_first_call_time", "dhb_overview_dun_last_call_time","dhb_overview_ntdun_first_call_time", "dhb_overview_ntdun_last_call_time"]] = dhb_loan[["dhb_overview_dun_first_call_time", "dhb_overview_dun_last_call_time","dhb_overview_ntdun_first_call_time", "dhb_overview_ntdun_last_call_time"]].applymap(lambda x : value_map[x])
sql
=
self
.
sql
.
replace
(
'@start_time_period'
,
self
.
start_time_period
)
.
replace
(
'@end_time_period'
,
self
.
end_time_period
)
#
dhb_loan
=
pd
.
read_sql
(
sql
,
mysqldb
.
engine_risk_analysis
)
# dhb_loan[["dhb_overview_dun_first_call_time", "dhb_overview_dun_last_call_time","dhb_overview_ntdun_first_call_time", "dhb_overview_ntdun_last_call_time"]] = dhb_loan[["dhb_overview_dun_first_call_time", "dhb_overview_dun_last_call_time","dhb_overview_ntdun_first_call_time", "dhb_overview_ntdun_last_call_time"]].applymap(lambda x : value_map[x])
# manipul category datatype which includes sequences
dhb_loan
[[
"dhb_overview_dun_first_call_time"
,
"dhb_overview_dun_last_call_time"
,
"dhb_overview_ntdun_first_call_time"
,
"dhb_overview_ntdun_last_call_time"
]]
=
pd
.
get_dummies
(
d
f
[[
"dhb_overview_dun_first_call_time"
,
"dhb_overview_dun_last_call_time"
,
"dhb_overview_ntdun_first_call_time"
,
"dhb_overview_ntdun_last_call_time"
]],
columns
=
[
"dhb_overview_dun_first_call_time"
,
"dhb_overview_dun_last_call_time"
,
"dhb_overview_ntdun_first_call_time"
,
"dhb_overview_ntdun_last_call_time"
])
dhb_loan
[[
"dhb_overview_dun_first_call_time"
,
"dhb_overview_dun_last_call_time"
,
"dhb_overview_ntdun_first_call_time"
,
"dhb_overview_ntdun_last_call_time"
]]
=
pd
.
get_dummies
(
d
hb_loan
[[
"dhb_overview_dun_first_call_time"
,
"dhb_overview_dun_last_call_time"
,
"dhb_overview_ntdun_first_call_time"
,
"dhb_overview_ntdun_last_call_time"
]],
columns
=
[
"dhb_overview_dun_first_call_time"
,
"dhb_overview_dun_last_call_time"
,
"dhb_overview_ntdun_first_call_time"
,
"dhb_overview_ntdun_last_call_time"
])
# limit the upper boundary
dhb_loan
.
loc
[
dhb_loan
.
dhb_last_60_and_90_days_ntdun_call_avg_duration
>=
42
,
"dhb_last_60_and_90_days_ntdun_call_avg_duration"
]
=
42
dhb_loan
.
loc
[
dhb_loan
.
dhb_overview_ntdun_call_duration_above60
>=
25
,
"dhb_overview_ntdun_call_duration_above60"
]
=
25
...
...
@@ -255,15 +248,15 @@ Params:
dhb_loan
.
loc
[
dhb_loan
.
dhb_last_60_and_90_days_ntdun_call_in_duration
>=
1000
,
"dhb_last_60_and_90_days_ntdun_call_in_duration"
]
=
1000
dhb_loan
.
loc
[
dhb_loan
.
dhb_overview_dun_call_tel_total_nums
>=
22
,
"dhb_overview_dun_call_tel_total_nums"
]
=
22
dhb_loan
.
loc
[
dhb_loan
.
dhb_last_30_days_dun_call_total_duration
>=
1100
,
"dhb_last_30_days_dun_call_total_duration"
]
=
1100
dhb_loan
.
loc
[
dhb_loan
.
dhb_last_two_weeks_ntdun_call_in_duration
>=
300
,
"dhb_last_two_weeks_ntdun_call_in_duration"
]
=
300
dhb_loan
.
loc
[
dhb_loan
.
dhb_last_two_weeks_ntdun_call_in_duration
>=
300
,
"dhb_last_two_weeks_ntdun_call_in_duration"
]
=
300
dhb_loan
.
to_csv
(
"./dhb_loan_sample——"
+
str
(
datetime
.
date
.
today
())
+
".csv"
)
print
(
time
.
strftime
(
'
%
Y.
%
m.
%
d
%
H:
%
M:
%
S'
,
time
.
localtime
(
time
.
time
()))
+
"提取了dhb {}+ "
.
format
(
str
(
overdue_days
))
+
self
.
start_time_period
+
"to"
+
self
.
end_time_period
+
"时段样本"
)
print
(
datetime
.
time
.
strftime
(
'
%
Y.
%
m.
%
d
%
H:
%
M:
%
S'
,
datetime
.
time
.
localtime
(
datetime
.
time
.
time
()))
+
"提取了dhb {}+ "
.
format
(
str
(
self
.
overdue_days
))
+
self
.
start_time_period
+
"to"
+
self
.
end_time_period
+
"时段样本"
)
return
dhb_loan
def
dhb_comparasion
(
df
=
None
,
start_time_period
=
self
.
start_time_period
,
end_time_period
=
self
.
end_time_period
,
applied_type
=
None
,
applied_from
=
None
):
df_mongo
=
pymongodb
(
start_time_period
,
end_time_period
,
limit
,
"{'order_id':1,'model_exec_data_source#dhb':1}"
)
def
dhb_comparasion
(
self
,
limit
=
"{'wf_created_at': {'$gte': '@start_date', '$lt': '@end_date'}}"
,
df
=
None
,
applied_type
=
None
,
applied_from
=
None
):
df_mongo
=
mongodb
.
pymongodb
(
self
.
start_time_period
,
self
.
end_time_period
,
limit
,
"{'order_id':1,'model_exec_data_source#dhb':1}"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment