Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
M
model_mvp
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
decision-science
model_mvp
Commits
45721de0
Commit
45721de0
authored
Apr 22, 2019
by
王家华
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
debug
parent
b5a3f366
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
293 additions
and
214 deletions
+293
-214
__init__.cpython-37.pyc
models/__pycache__/__init__.cpython-37.pyc
+0
-0
xgboost.cpython-37.pyc
models/__pycache__/xgboost.cpython-37.pyc
+0
-0
lightgbm.py
models/lightgbm.py
+51
-1
__init__.cpython-37.pyc
mvp/__pycache__/__init__.cpython-37.pyc
+0
-0
dhb.cpython-37.pyc
mvp/__pycache__/dhb.cpython-37.pyc
+0
-0
xgbreport.cpython-37.pyc
mvp/__pycache__/xgbreport.cpython-37.pyc
+0
-0
dhb.py
mvp/dhb.py
+240
-210
lgbreport.py
mvp/lgbreport.py
+2
-3
No files found.
models/__pycache__/__init__.cpython-37.pyc
0 → 100644
View file @
45721de0
File added
models/__pycache__/xgboost.cpython-37.pyc
0 → 100644
View file @
45721de0
File added
models/lightgbm.py
View file @
45721de0
import
lightgbm
as
lgb
import
lightgbm
as
lgb
from
sklearn.metrics
import
roc_auc_score
from
sklearn.metrics
import
roc_auc_score
from
sklearn.model_selection
import
GridSearchCV
from
sklearn.metrics
import
confusion_matrix
,
mean_squared_error
from
sklearn.metrics
import
confusion_matrix
,
mean_squared_error
import
numpy
import
numpy
import
pandas
import
pandas
...
@@ -21,8 +22,9 @@ params = {
...
@@ -21,8 +22,9 @@ params = {
'verbose'
:
1
# <0 显示致命的, =0 显示错误 (警告), >0 显示信息
'verbose'
:
1
# <0 显示致命的, =0 显示错误 (警告), >0 显示信息
}
}
'''
'''
instructions : training lightgbm model with specified params
instructions : training lightgbm model with specified params
Parameters :
Parameters :
...
@@ -33,5 +35,53 @@ Parameters :
...
@@ -33,5 +35,53 @@ Parameters :
'''
'''
def
lgb_train
(
params
,
training_set
,
features
,
target
):
def
lgb_train
(
params
,
training_set
,
features
,
target
):
lgb_train
=
lgb
.
Dataset
(
training_set
[
features
],
training_set
[
target
])
lgb_train
=
lgb
.
Dataset
(
training_set
[
features
],
training_set
[
target
])
#lgb.train(params,)
return
1
'''
instructions : build a lgb classifier
Params :
'''
def
buildClf
(
params
):
return
lgb
.
LGBMClassifier
(
params
)
'''
'''
def
automodelfit
(
clf
,
param_grid
,
dftrain
,
features
,
resp
,
kfold
=
10
,
scoring
=
'roc_auc'
):
# kflod=StratifiedKFold(n_splits=kfold,shuffle=True,random_state=7)
grid_search
=
GridSearchCV
(
clf
,
param_grid
,
scoring
=
scoring
,
n_jobs
=
2
,
cv
=
kfold
,
verbose
=
2
,
iid
=
True
,
refit
=
True
)
#== 模型训练
grid_search
.
fit
(
dftrain
[
features
],
dftrain
[
resp
])
#== 获取最优参数
return
grid_search
def
modelfit
(
clf
,
dftrain
,
features
,
resp
,
useTrainCV
=
True
,
kfold
=
10
,
eval_metric
=
'auc'
,
early_stopping_rounds
=
20
):
'''
模型训练
:type useTrainCV: object
:param clf:XGBClassifier
:param dftrain:训练集
:param features: 特征
:param resp:label
:param useTrainCV:if True call cv function,目的是调节参数 n_estimators
:param cv_folds: N 折交叉验证
:param early_stopping_rounds:添加数loss变化不大这个状态持续的轮数,达到这个数就退出训练过程
:param eval_metric 同 目标函数 objective 有关,取值https://xgboost.readthedocs.io/en/latest/python/python_api.html#
:return:
'''
if
useTrainCV
:
# kflod = StratifiedKFold(n_splits=kfold, shuffle=True, random_state=7)
xgb_param
=
clf
.
get_xgb_params
()
xgtrain
=
lgb
.
DMatrix
(
dftrain
[
features
]
.
values
,
label
=
dftrain
[
resp
]
.
values
)
cvresult
=
lgb
.
cv
(
xgb_param
,
xgtrain
,
num_boost_round
=
clf
.
get_params
()[
'n_estimators'
],
nfold
=
kfold
,
metrics
=
eval_metric
,
early_stopping_rounds
=
early_stopping_rounds
,
verbose_eval
=
True
)
clf
.
set_params
(
n_estimators
=
cvresult
.
shape
[
0
])
clf
.
fit
(
dftrain
[
features
],
dftrain
[
resp
],
eval_metric
=
eval_metric
)
return
clf
mvp/__pycache__/__init__.cpython-37.pyc
0 → 100644
View file @
45721de0
File added
mvp/__pycache__/dhb.cpython-37.pyc
0 → 100644
View file @
45721de0
File added
mvp/__pycache__/xgbreport.cpython-37.pyc
0 → 100644
View file @
45721de0
File added
mvp/dhb.py
View file @
45721de0
import
pandas
as
pd
import
pandas
as
pd
from
data.datasource
import
mysqldb
,
mongodb
from
data.datasource
import
mysqldb
,
mongodb
import
time
from
dateutil.relativedelta
import
relativedelta
import
datetime
import
datetime
import
dateutil
'''
'''
model instructions : established a dhb obj which cotains attrubutes of dhb model
model instructions : established a dhb obj which cotains attrubutes of dhb model
...
@@ -20,222 +22,223 @@ API :
...
@@ -20,222 +22,223 @@ API :
class
dhb
:
class
dhb
:
# features as Series format
# features as Series format
features
=
[
'dhb_last_30_and_60_days_dun_call_avg_duration'
,
#features = pd.read_excel()
'dhb_last_30_and_60_days_dun_call_duration_above60'
,
'dhb_last_30_and_60_days_dun_call_duration_below15'
,
sql
=
'''
'dhb_last_30_and_60_days_dun_call_duration_between15_and_30'
,
select dhb_last_30_and_60_days_dun_call_avg_duration,
'dhb_last_30_and_60_days_dun_call_in_duration'
,
dhb_last_30_and_60_days_dun_call_duration_above60,
'dhb_last_30_and_60_days_dun_call_in_times'
,
dhb_last_30_and_60_days_dun_call_duration_below15,
'dhb_last_30_and_60_days_dun_call_out_duration'
,
dhb_last_30_and_60_days_dun_call_duration_between15_and_30,
'dhb_last_30_and_60_days_dun_call_out_times'
,
dhb_last_30_and_60_days_dun_call_in_duration,
'dhb_last_30_and_60_days_dun_call_tel_total_nums'
,
dhb_last_30_and_60_days_dun_call_in_times,
'dhb_last_30_and_60_days_dun_call_total_duration'
,
dhb_last_30_and_60_days_dun_call_out_duration,
'dhb_last_30_and_60_days_dun_call_total_times'
,
dhb_last_30_and_60_days_dun_call_out_times,
'dhb_last_30_and_60_days_ntdun_call_avg_duration'
,
dhb_last_30_and_60_days_dun_call_tel_total_nums,
'dhb_last_30_and_60_days_ntdun_call_duration_above60'
,
dhb_last_30_and_60_days_dun_call_total_duration,
'dhb_last_30_and_60_days_ntdun_call_duration_below15'
,
dhb_last_30_and_60_days_dun_call_total_times,
'dhb_last_30_and_60_days_ntdun_call_duration_between15_and_30'
,
dhb_last_30_and_60_days_ntdun_call_avg_duration,
'dhb_last_30_and_60_days_ntdun_call_in_duration'
,
dhb_last_30_and_60_days_ntdun_call_duration_above60,
'dhb_last_30_and_60_days_ntdun_call_in_times'
,
dhb_last_30_and_60_days_ntdun_call_duration_below15,
'dhb_last_30_and_60_days_ntdun_call_out_duration'
,
dhb_last_30_and_60_days_ntdun_call_duration_between15_and_30,
'dhb_last_30_and_60_days_ntdun_call_out_times'
,
dhb_last_30_and_60_days_ntdun_call_in_duration,
'dhb_last_30_and_60_days_ntdun_call_tel_total_nums'
,
dhb_last_30_and_60_days_ntdun_call_in_times,
'dhb_last_30_and_60_days_ntdun_call_total_duration'
,
dhb_last_30_and_60_days_ntdun_call_out_duration,
'dhb_last_30_and_60_days_ntdun_call_total_times'
,
dhb_last_30_and_60_days_ntdun_call_out_times,
'dhb_last_30_days_dun_call_avg_duration'
,
dhb_last_30_and_60_days_ntdun_call_tel_total_nums,
'dhb_last_30_days_dun_call_duration_above60'
,
dhb_last_30_and_60_days_ntdun_call_total_duration,
'dhb_last_30_days_dun_call_duration_below15'
,
dhb_last_30_and_60_days_ntdun_call_total_times,
'dhb_last_30_days_dun_call_duration_between15_and_30'
,
dhb_last_30_days_dun_call_avg_duration,
'dhb_last_30_days_dun_call_in_duration'
,
dhb_last_30_days_dun_call_duration_above60,
'dhb_last_30_days_dun_call_in_times'
,
dhb_last_30_days_dun_call_duration_below15,
'dhb_last_30_days_dun_call_out_duration'
,
dhb_last_30_days_dun_call_duration_between15_and_30,
'dhb_last_30_days_dun_call_out_times'
,
dhb_last_30_days_dun_call_in_duration,
'dhb_last_30_days_dun_call_tel_total_nums'
,
dhb_last_30_days_dun_call_in_times,
'dhb_last_30_days_dun_call_total_duration'
,
dhb_last_30_days_dun_call_out_duration,
'dhb_last_30_days_dun_call_total_times'
,
dhb_last_30_days_dun_call_out_times,
'dhb_last_30_days_ntdun_call_avg_duration'
,
dhb_last_30_days_dun_call_tel_total_nums,
'dhb_last_30_days_ntdun_call_duration_above60'
,
dhb_last_30_days_dun_call_total_duration,
'dhb_last_30_days_ntdun_call_duration_below15'
,
dhb_last_30_days_dun_call_total_times,
'dhb_last_30_days_ntdun_call_duration_between15_and_30'
,
dhb_last_30_days_ntdun_call_avg_duration,
'dhb_last_30_days_ntdun_call_in_duration'
,
dhb_last_30_days_ntdun_call_duration_above60,
'dhb_last_30_days_ntdun_call_in_times'
,
dhb_last_30_days_ntdun_call_duration_below15,
'dhb_last_30_days_ntdun_call_out_duration'
,
dhb_last_30_days_ntdun_call_duration_between15_and_30,
'dhb_last_30_days_ntdun_call_out_times'
,
dhb_last_30_days_ntdun_call_in_duration,
'dhb_last_30_days_ntdun_call_tel_total_nums'
,
dhb_last_30_days_ntdun_call_in_times,
'dhb_last_30_days_ntdun_call_total_duration'
,
dhb_last_30_days_ntdun_call_out_duration,
'dhb_last_30_days_ntdun_call_total_times'
,
dhb_last_30_days_ntdun_call_out_times,
'dhb_last_60_and_90_days_dun_call_avg_duration'
,
dhb_last_30_days_ntdun_call_tel_total_nums,
'dhb_last_60_and_90_days_dun_call_duration_above60'
,
dhb_last_30_days_ntdun_call_total_duration,
'dhb_last_60_and_90_days_dun_call_duration_below15'
,
dhb_last_30_days_ntdun_call_total_times,
'dhb_last_60_and_90_days_dun_call_duration_between15_and_30'
,
dhb_last_60_and_90_days_dun_call_avg_duration,
'dhb_last_60_and_90_days_dun_call_in_duration'
,
dhb_last_60_and_90_days_dun_call_duration_above60,
'dhb_last_60_and_90_days_dun_call_in_times'
,
dhb_last_60_and_90_days_dun_call_duration_below15,
'dhb_last_60_and_90_days_dun_call_out_duration'
,
dhb_last_60_and_90_days_dun_call_duration_between15_and_30,
'dhb_last_60_and_90_days_dun_call_out_times'
,
dhb_last_60_and_90_days_dun_call_in_duration,
'dhb_last_60_and_90_days_dun_call_tel_total_nums'
,
dhb_last_60_and_90_days_dun_call_in_times,
'dhb_last_60_and_90_days_dun_call_total_duration'
,
dhb_last_60_and_90_days_dun_call_out_duration,
'dhb_last_60_and_90_days_dun_call_total_times'
,
dhb_last_60_and_90_days_dun_call_out_times,
'dhb_last_60_and_90_days_ntdun_call_avg_duration'
,
dhb_last_60_and_90_days_dun_call_tel_total_nums,
'dhb_last_60_and_90_days_ntdun_call_duration_above60'
,
dhb_last_60_and_90_days_dun_call_total_duration,
'dhb_last_60_and_90_days_ntdun_call_duration_below15'
,
dhb_last_60_and_90_days_dun_call_total_times,
'dhb_last_60_and_90_days_ntdun_call_duration_between15_and_30'
,
dhb_last_60_and_90_days_ntdun_call_avg_duration,
'dhb_last_60_and_90_days_ntdun_call_in_duration'
,
dhb_last_60_and_90_days_ntdun_call_duration_above60,
'dhb_last_60_and_90_days_ntdun_call_in_times'
,
dhb_last_60_and_90_days_ntdun_call_duration_below15,
'dhb_last_60_and_90_days_ntdun_call_out_duration'
,
dhb_last_60_and_90_days_ntdun_call_duration_between15_and_30,
'dhb_last_60_and_90_days_ntdun_call_out_times'
,
dhb_last_60_and_90_days_ntdun_call_in_duration,
'dhb_last_60_and_90_days_ntdun_call_tel_total_nums'
,
dhb_last_60_and_90_days_ntdun_call_in_times,
'dhb_last_60_and_90_days_ntdun_call_total_duration'
,
dhb_last_60_and_90_days_ntdun_call_out_duration,
'dhb_last_60_and_90_days_ntdun_call_total_times'
,
dhb_last_60_and_90_days_ntdun_call_out_times,
'dhb_last_three_weeks_dun_call_avg_duration'
,
dhb_last_60_and_90_days_ntdun_call_tel_total_nums,
'dhb_last_three_weeks_dun_call_duration_above60'
,
dhb_last_60_and_90_days_ntdun_call_total_duration,
'dhb_last_three_weeks_dun_call_duration_below15'
,
dhb_last_60_and_90_days_ntdun_call_total_times,
'dhb_last_three_weeks_dun_call_duration_between15_and_30'
,
dhb_last_three_weeks_dun_call_avg_duration,
'dhb_last_three_weeks_dun_call_in_duration'
,
dhb_last_three_weeks_dun_call_duration_above60,
'dhb_last_three_weeks_dun_call_in_times'
,
dhb_last_three_weeks_dun_call_duration_below15,
'dhb_last_three_weeks_dun_call_out_duration'
,
dhb_last_three_weeks_dun_call_duration_between15_and_30,
'dhb_last_three_weeks_dun_call_out_times'
,
dhb_last_three_weeks_dun_call_in_duration,
'dhb_last_three_weeks_dun_call_tel_total_nums'
,
dhb_last_three_weeks_dun_call_in_times,
'dhb_last_three_weeks_dun_call_total_duration'
,
dhb_last_three_weeks_dun_call_out_duration,
'dhb_last_three_weeks_dun_call_total_times'
,
dhb_last_three_weeks_dun_call_out_times,
'dhb_last_three_weeks_ntdun_call_avg_duration'
,
dhb_last_three_weeks_dun_call_tel_total_nums,
'dhb_last_three_weeks_ntdun_call_duration_above60'
,
dhb_last_three_weeks_dun_call_total_duration,
'dhb_last_three_weeks_ntdun_call_duration_below15'
,
dhb_last_three_weeks_dun_call_total_times,
'dhb_last_three_weeks_ntdun_call_duration_between15_and_30'
,
dhb_last_three_weeks_ntdun_call_avg_duration,
'dhb_last_three_weeks_ntdun_call_in_duration'
,
dhb_last_three_weeks_ntdun_call_duration_above60,
'dhb_last_three_weeks_ntdun_call_in_times'
,
dhb_last_three_weeks_ntdun_call_duration_below15,
'dhb_last_three_weeks_ntdun_call_out_duration'
,
dhb_last_three_weeks_ntdun_call_duration_between15_and_30,
'dhb_last_three_weeks_ntdun_call_out_times'
,
dhb_last_three_weeks_ntdun_call_in_duration,
'dhb_last_three_weeks_ntdun_call_tel_total_nums'
,
dhb_last_three_weeks_ntdun_call_in_times,
'dhb_last_three_weeks_ntdun_call_total_duration'
,
dhb_last_three_weeks_ntdun_call_out_duration,
'dhb_last_three_weeks_ntdun_call_total_times'
,
dhb_last_three_weeks_ntdun_call_out_times,
'dhb_last_two_weeks_dun_call_avg_duration'
,
dhb_last_three_weeks_ntdun_call_tel_total_nums,
'dhb_last_two_weeks_dun_call_duration_above60'
,
dhb_last_three_weeks_ntdun_call_total_duration,
'dhb_last_two_weeks_dun_call_duration_below15'
,
dhb_last_three_weeks_ntdun_call_total_times,
'dhb_last_two_weeks_dun_call_duration_between15_and_30'
,
dhb_last_two_weeks_dun_call_avg_duration,
'dhb_last_two_weeks_dun_call_in_duration'
,
dhb_last_two_weeks_dun_call_duration_above60,
'dhb_last_two_weeks_dun_call_in_times'
,
dhb_last_two_weeks_dun_call_duration_below15,
'dhb_last_two_weeks_dun_call_out_duration'
,
dhb_last_two_weeks_dun_call_duration_between15_and_30,
'dhb_last_two_weeks_dun_call_out_times'
,
dhb_last_two_weeks_dun_call_in_duration,
'dhb_last_two_weeks_dun_call_tel_total_nums'
,
dhb_last_two_weeks_dun_call_in_times,
'dhb_last_two_weeks_dun_call_total_duration'
,
dhb_last_two_weeks_dun_call_out_duration,
'dhb_last_two_weeks_dun_call_total_times'
,
dhb_last_two_weeks_dun_call_out_times,
'dhb_last_two_weeks_ntdun_call_avg_duration'
,
dhb_last_two_weeks_dun_call_tel_total_nums,
'dhb_last_two_weeks_ntdun_call_duration_above60'
,
dhb_last_two_weeks_dun_call_total_duration,
'dhb_last_two_weeks_ntdun_call_duration_below15'
,
dhb_last_two_weeks_dun_call_total_times,
'dhb_last_two_weeks_ntdun_call_duration_between15_and_30'
,
dhb_last_two_weeks_ntdun_call_avg_duration,
'dhb_last_two_weeks_ntdun_call_in_duration'
,
dhb_last_two_weeks_ntdun_call_duration_above60,
'dhb_last_two_weeks_ntdun_call_in_times'
,
dhb_last_two_weeks_ntdun_call_duration_below15,
'dhb_last_two_weeks_ntdun_call_out_duration'
,
dhb_last_two_weeks_ntdun_call_duration_between15_and_30,
'dhb_last_two_weeks_ntdun_call_out_times'
,
dhb_last_two_weeks_ntdun_call_in_duration,
'dhb_last_two_weeks_ntdun_call_tel_total_nums'
,
dhb_last_two_weeks_ntdun_call_in_times,
'dhb_last_two_weeks_ntdun_call_total_duration'
,
dhb_last_two_weeks_ntdun_call_out_duration,
'dhb_last_two_weeks_ntdun_call_total_times'
,
dhb_last_two_weeks_ntdun_call_out_times,
'dhb_last_week_dun_call_avg_duration'
,
dhb_last_two_weeks_ntdun_call_tel_total_nums,
'dhb_last_week_dun_call_duration_above60'
,
dhb_last_two_weeks_ntdun_call_total_duration,
'dhb_last_week_dun_call_duration_below15'
,
dhb_last_two_weeks_ntdun_call_total_times,
'dhb_last_week_dun_call_duration_between15_and_30'
,
dhb_last_week_dun_call_avg_duration,
'dhb_last_week_dun_call_in_duration'
,
dhb_last_week_dun_call_duration_above60,
'dhb_last_week_dun_call_in_times'
,
dhb_last_week_dun_call_duration_below15,
'dhb_last_week_dun_call_out_duration'
,
dhb_last_week_dun_call_duration_between15_and_30,
'dhb_last_week_dun_call_out_times'
,
dhb_last_week_dun_call_in_duration, dhb_last_week_dun_call_in_times,
'dhb_last_week_dun_call_tel_total_nums'
,
dhb_last_week_dun_call_out_duration,
'dhb_last_week_dun_call_total_duration'
,
dhb_last_week_dun_call_out_times,
'dhb_last_week_dun_call_total_times'
,
dhb_last_week_dun_call_tel_total_nums,
'dhb_last_week_ntdun_call_avg_duration'
,
dhb_last_week_dun_call_total_duration,
'dhb_last_week_ntdun_call_duration_above60'
,
dhb_last_week_dun_call_total_times,
'dhb_last_week_ntdun_call_duration_below15'
,
dhb_last_week_ntdun_call_avg_duration,
'dhb_last_week_ntdun_call_duration_between15_and_30'
,
dhb_last_week_ntdun_call_duration_above60,
'dhb_last_week_ntdun_call_in_duration'
,
dhb_last_week_ntdun_call_duration_below15,
'dhb_last_week_ntdun_call_in_times'
,
dhb_last_week_ntdun_call_duration_between15_and_30,
'dhb_last_week_ntdun_call_out_duration'
,
dhb_last_week_ntdun_call_in_duration,
'dhb_last_week_ntdun_call_out_times'
,
dhb_last_week_ntdun_call_in_times,
'dhb_last_week_ntdun_call_tel_total_nums'
,
dhb_last_week_ntdun_call_out_duration,
'dhb_last_week_ntdun_call_total_duration'
,
dhb_last_week_ntdun_call_out_times,
'dhb_last_week_ntdun_call_total_times'
,
dhb_last_week_ntdun_call_tel_total_nums,
'dhb_overview_dun_call_avg_duration'
,
dhb_last_week_ntdun_call_total_duration,
'dhb_overview_dun_call_duration_above60'
,
dhb_last_week_ntdun_call_total_times,
'dhb_overview_dun_call_duration_below15'
,
dhb_overview_dun_call_avg_duration,
'dhb_overview_dun_call_duration_between15_and_30'
,
dhb_overview_dun_call_duration_above60,
'dhb_overview_dun_call_in_duration'
,
dhb_overview_dun_call_duration_below15,
'dhb_overview_dun_call_in_times'
,
dhb_overview_dun_call_duration_between15_and_30,
'dhb_overview_dun_call_out_duration'
,
dhb_overview_dun_call_in_duration, dhb_overview_dun_call_in_times,
'dhb_overview_dun_call_out_times'
,
dhb_overview_dun_call_out_duration, dhb_overview_dun_call_out_times,
'dhb_overview_dun_call_tel_total_nums'
,
dhb_overview_dun_call_tel_total_nums,
'dhb_overview_dun_call_total_duration'
,
dhb_overview_dun_call_total_duration,
'dhb_overview_dun_call_total_times'
,
dhb_overview_dun_call_total_times, dhb_overview_dun_first_call_time,
'dhb_overview_dun_first_call_time'
,
dhb_overview_dun_last_call_time,
'dhb_overview_dun_last_call_time'
,
dhb_overview_ntdun_call_avg_duration,
'dhb_overview_ntdun_call_avg_duration'
,
dhb_overview_ntdun_call_duration_above60,
'dhb_overview_ntdun_call_duration_above60'
,
dhb_overview_ntdun_call_duration_below15,
'dhb_overview_ntdun_call_duration_below15'
,
dhb_overview_ntdun_call_duration_between15_and_30,
'dhb_overview_ntdun_call_duration_between15_and_30'
,
dhb_overview_ntdun_call_in_duration,
'dhb_overview_ntdun_call_in_duration'
,
dhb_overview_ntdun_call_in_times,
'dhb_overview_ntdun_call_in_times'
,
dhb_overview_ntdun_call_out_duration,
'dhb_overview_ntdun_call_out_duration'
,
dhb_overview_ntdun_call_out_times,
'dhb_overview_ntdun_call_out_times'
,
dhb_overview_ntdun_call_tel_total_nums,
'dhb_overview_ntdun_call_tel_total_nums'
,
dhb_overview_ntdun_call_total_duration,
'dhb_overview_ntdun_call_total_duration'
,
dhb_overview_ntdun_call_total_times,
'dhb_overview_ntdun_call_total_times'
,
dhb_overview_ntdun_first_call_time,
'dhb_overview_ntdun_first_call_time'
]
dhb_overview_ntdun_last_call_time,applied_at,applied_from,applied_type,if(passdue_day>15,1,0) as target
from risk_analysis
where applied_at >= '@start_time_period' and applied_at < '@end_time_period'
and transacted = 1
and dhb_flag =1
and datediff(now(),deadline) > 15
'''
def
__init__
(
self
,
overdue_days
=
15
,
features
=
None
,
sql
=
None
,
start_time_period
=
None
,
end_time_period
=
None
):
# sql = '''
#
# '''
start_time_period
=
(
datetime
.
date
.
today
()
-
relativedelta
(
months
=+
7
))
.
strftime
(
"
%
Y-
%
m-
%
d 00:00:00"
)
end_time_period
=
(
datetime
.
date
.
today
()
-
relativedelta
(
days
=+
17
))
.
strftime
(
"
%
Y-
%
m-
%
d 00:00:00"
)
def
__init__
(
self
,
features
=
None
,
sql
=
None
,
start_time_period
=
None
,
end_time_period
=
None
):
try
:
try
:
if
features
!=
None
:
# if the para was not Series
self
.
features
=
features
if
(
type
(
features
)
!=
pd
.
core
.
series
.
Series
):
if
sql
!=
None
:
self
.
features
=
pd
.
Series
(
features
)
self
.
sql
=
sql
else
:
sql
=
"select "
+
str
(
features
)
.
strip
(
'['
)
.
strip
(
']'
)
+
''',if(passdue_day>'''
+
str
(
overdue_days
)
+
''',1,0) as target, applied_at, applied_from, applied_type
from risk_analysis
where applied_at >= '@start_time_period' and applied_at < '@end_time_period'
and transacted = 1
and dhb_flag =1
and datediff(now(),deadline) > '''
+
str
(
overdue_days
)
+
'''
'''
if
start_time_period
!=
None
:
self
.
start_time_period
=
start_time_period
else
:
else
:
self
.
start_time_period
=
(
datetime
.
date
.
today
()
-
dateutil
.
relativedelta
(
months
=+
7
))
.
strftime
(
"
%
Y-
%
m-
%
d 00:00:00"
)
self
.
features
=
features
if
end_time_period
!=
None
:
self
.
end_time_period
=
end_time_period
else
:
self
.
end_time_period
=
(
datetime
.
date
.
today
()
-
dateutil
.
relativedelta
(
days
=+
16
))
.
strftime
(
"
%
Y-
%
m-
%
d 00:00:00"
)
except
Exception
as
e
:
except
Exception
as
e
:
print
(
'Parameters Error:
\n
'
,
e
)
print
(
"'features' parameter type Error, it should be list or Series"
)
raise
if
sql
!=
None
:
self
.
sql
=
sql
if
start_time_period
!=
None
:
self
.
start_time_period
=
start_time_period
if
end_time_period
!=
None
:
self
.
end_time_period
=
end_time_period
def
dhb_features_extract
(
self
):
def
dhb_features_extract
(
self
):
'''
instrucions : extract dhb features from risk_analysis
:param self:
:return: dhb features
'''
value_map
=
{
value_map
=
{
"近3天"
:
1
,
"近3天"
:
1
,
"近4-5天"
:
2
,
"近4-5天"
:
2
,
"近6-7天"
:
3
,
"近6-7天"
:
3
,
"近8-15天"
:
4
,
"近8-15天"
:
4
,
"近16-30天"
:
5
,
"近16-30天"
:
5
,
"近31-60天"
:
6
,
"近31-60天"
:
6
,
"近61-90天"
:
7
,
"近61-90天"
:
7
,
"近91-120天"
:
8
,
"近91-120天"
:
8
,
"近121-150天"
:
9
,
"近121-150天"
:
9
,
"近151-180天"
:
10
,
"近151-180天"
:
10
,
"180天前"
:
11
,
"180天前"
:
11
,
"无"
:
0
"无"
:
0
}
}
#print(self.sql.replace('@start_time_period',self.start_time_period).replace('@end_time_period',self.end_time_period))
#print(self.sql.replace('@start_time_period',self.start_time_period).replace('@end_time_period',self.end_time_period))
# use risk_analysis to extract data
# use risk_analysis to extract data
sql
=
self
.
sql
.
replace
(
'@start_time_period'
,
self
.
start_time_period
)
.
replace
(
'@end_time_period'
,
self
.
end_time_period
)
dhb_loan
=
pd
.
read_sql
(
self
.
sql
.
replace
(
'@start_time_period'
,
self
.
start_time_period
)
.
replace
(
'@end_time_period'
,
self
.
end_time_period
),
mysqldb
.
engine_risk_analysis
)
#
dhb_loan
[[
"dhb_overview_dun_first_call_time"
,
"dhb_overview_dun_last_call_time"
,
"dhb_overview_ntdun_first_call_time"
,
"dhb_overview_ntdun_last_call_time"
]]
=
dhb_loan
[[
"dhb_overview_dun_first_call_time"
,
"dhb_overview_dun_last_call_time"
,
"dhb_overview_ntdun_first_call_time"
,
"dhb_overview_ntdun_last_call_time"
]]
.
applymap
(
lambda
x
:
value_map
[
x
])
dhb_loan
=
pd
.
read_sql
(
sql
,
mysqldb
.
engine_risk_analysis
)
# dhb_loan[["dhb_overview_dun_first_call_time", "dhb_overview_dun_last_call_time","dhb_overview_ntdun_first_call_time", "dhb_overview_ntdun_last_call_time"]] = dhb_loan[["dhb_overview_dun_first_call_time", "dhb_overview_dun_last_call_time","dhb_overview_ntdun_first_call_time", "dhb_overview_ntdun_last_call_time"]].applymap(lambda x : value_map[x])
# manipul category datatype which includes sequences
dhb_loan
[[
"dhb_overview_dun_first_call_time"
,
"dhb_overview_dun_last_call_time"
,
"dhb_overview_ntdun_first_call_time"
,
"dhb_overview_ntdun_last_call_time"
]]
=
pd
.
get_dummies
(
dhb_loan
[[
"dhb_overview_dun_first_call_time"
,
"dhb_overview_dun_last_call_time"
,
"dhb_overview_ntdun_first_call_time"
,
"dhb_overview_ntdun_last_call_time"
]],
columns
=
[
"dhb_overview_dun_first_call_time"
,
"dhb_overview_dun_last_call_time"
,
"dhb_overview_ntdun_first_call_time"
,
"dhb_overview_ntdun_last_call_time"
])
# limit the upper boundary
dhb_loan
.
loc
[
dhb_loan
.
dhb_last_60_and_90_days_ntdun_call_avg_duration
>=
42
,
"dhb_last_60_and_90_days_ntdun_call_avg_duration"
]
=
42
dhb_loan
.
loc
[
dhb_loan
.
dhb_last_60_and_90_days_ntdun_call_avg_duration
>=
42
,
"dhb_last_60_and_90_days_ntdun_call_avg_duration"
]
=
42
dhb_loan
.
loc
[
dhb_loan
.
dhb_overview_ntdun_call_duration_above60
>=
25
,
"dhb_overview_ntdun_call_duration_above60"
]
=
25
dhb_loan
.
loc
[
dhb_loan
.
dhb_overview_ntdun_call_duration_above60
>=
25
,
"dhb_overview_ntdun_call_duration_above60"
]
=
25
dhb_loan
.
loc
[
dhb_loan
.
dhb_last_30_and_60_days_ntdun_call_total_duration
>=
800
,
"dhb_last_30_and_60_days_ntdun_call_total_duration"
]
=
800
dhb_loan
.
loc
[
dhb_loan
.
dhb_last_30_and_60_days_ntdun_call_total_duration
>=
800
,
"dhb_last_30_and_60_days_ntdun_call_total_duration"
]
=
800
...
@@ -249,14 +252,41 @@ class dhb:
...
@@ -249,14 +252,41 @@ class dhb:
dhb_loan
.
loc
[
dhb_loan
.
dhb_overview_dun_call_tel_total_nums
>=
22
,
"dhb_overview_dun_call_tel_total_nums"
]
=
22
dhb_loan
.
loc
[
dhb_loan
.
dhb_overview_dun_call_tel_total_nums
>=
22
,
"dhb_overview_dun_call_tel_total_nums"
]
=
22
dhb_loan
.
loc
[
dhb_loan
.
dhb_last_30_days_dun_call_total_duration
>=
1100
,
"dhb_last_30_days_dun_call_total_duration"
]
=
1100
dhb_loan
.
loc
[
dhb_loan
.
dhb_last_30_days_dun_call_total_duration
>=
1100
,
"dhb_last_30_days_dun_call_total_duration"
]
=
1100
dhb_loan
.
loc
[
dhb_loan
.
dhb_last_two_weeks_ntdun_call_in_duration
>=
300
,
"dhb_last_two_weeks_ntdun_call_in_duration"
]
=
300
dhb_loan
.
loc
[
dhb_loan
.
dhb_last_two_weeks_ntdun_call_in_duration
>=
300
,
"dhb_last_two_weeks_ntdun_call_in_duration"
]
=
300
# dhb_loan.to_csv("./dhb_loan_sample——"+str(datetime.date.today())+".csv")
print
(
datetime
.
time
.
strftime
(
'
%
Y.
%
m.
%
d
%
H:
%
M:
%
S'
,
datetime
.
time
.
localtime
(
datetime
.
time
.
time
()))
+
"提取了dhb {}+ "
.
format
(
str
(
self
.
overdue_days
))
+
self
.
start_time_period
+
"to"
+
self
.
end_time_period
+
"时段样本"
)
dhb_loan
.
to_csv
(
"./dhb_loan_sample——"
+
str
(
datetime
.
date
.
today
())
+
".csv"
)
print
(
time
.
strftime
(
'
%
Y.
%
m.
%
d
%
H:
%
M:
%
S'
,
time
.
localtime
(
time
.
time
()))
+
"提取了dhb "
+
self
.
start_time_period
+
"to"
+
self
.
end_time_period
+
"时段样本"
)
return
dhb_loan
return
dhb_loan
def
dhb_comparasion
(
self
,
limit
=
"{'wf_created_at': {'$gte': '@start_date', '$lt': '@end_date'}}"
,
df
=
None
,
applied_type
=
None
,
applied_from
=
None
):
'''
df_mongo
=
mongodb
.
pymongodb
(
self
.
start_time_period
,
self
.
end_time_period
,
limit
,
"{'order_id':1,'model_exec_data_source#dhb':1}"
)
instructions : build a comparasion
Params :
df - test dataset which was given
score - score column
target - label
start_time_period -
end_time_period -
applied_tpye -
applied_from -
Returns :
auc comparasion
liftchart plot
'''
# def dhb_comparasion(df,score = 'model_exec_data_source#dhb' ,target = 'target', start_time_period = self.start_time_period, end_time_period = self.end_time_period, applied_type = None, applied_from = None):
# df_mongo = mongodb.pymongodb(start_time_period, end_time_period, limit, "{'order_id':1,'model_exec_data_source#dhb':1}")
# df = pd.merge(df,df_mongo,how='left',left_on='order_no',right_on='order_id')
# df['bins'] = df.qcut(df['target'], q = 10, percision = 6, dupulicates='drop')
# df.groupby
# return 1
mvp/lgbreport.py
View file @
45721de0
...
@@ -6,7 +6,6 @@ from data.analyis import datacal
...
@@ -6,7 +6,6 @@ from data.analyis import datacal
from
models
import
xgboost
from
models
import
xgboost
from
matplotlib
import
pyplot
as
plt
from
matplotlib
import
pyplot
as
plt
from
data.graph
import
drawplot
from
data.graph
import
drawplot
import
dhb
from
mvp
import
dhb
from
data.datasource
import
mysqldb
,
mongodb
dhb
=
dhb
()
df_dhb
=
dhb
.
dhb_features_extract
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment