Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
M
model_mvp
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
decision-science
model_mvp
Commits
45721de0
Commit
45721de0
authored
Apr 22, 2019
by
王家华
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
debug
parent
b5a3f366
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
293 additions
and
214 deletions
+293
-214
__init__.cpython-37.pyc
models/__pycache__/__init__.cpython-37.pyc
+0
-0
xgboost.cpython-37.pyc
models/__pycache__/xgboost.cpython-37.pyc
+0
-0
lightgbm.py
models/lightgbm.py
+51
-1
__init__.cpython-37.pyc
mvp/__pycache__/__init__.cpython-37.pyc
+0
-0
dhb.cpython-37.pyc
mvp/__pycache__/dhb.cpython-37.pyc
+0
-0
xgbreport.cpython-37.pyc
mvp/__pycache__/xgbreport.cpython-37.pyc
+0
-0
dhb.py
mvp/dhb.py
+240
-210
lgbreport.py
mvp/lgbreport.py
+2
-3
No files found.
models/__pycache__/__init__.cpython-37.pyc
0 → 100644
View file @
45721de0
File added
models/__pycache__/xgboost.cpython-37.pyc
0 → 100644
View file @
45721de0
File added
models/lightgbm.py
View file @
45721de0
import
lightgbm
as
lgb
from
sklearn.metrics
import
roc_auc_score
from
sklearn.model_selection
import
GridSearchCV
from
sklearn.metrics
import
confusion_matrix
,
mean_squared_error
import
numpy
import
pandas
...
...
@@ -21,8 +22,9 @@ params = {
'verbose'
:
1
# <0 显示致命的, =0 显示错误 (警告), >0 显示信息
}
'''
'''
instructions : training lightgbm model with specified params
Parameters :
...
...
@@ -33,5 +35,53 @@ Parameters :
'''
def
lgb_train
(
params
,
training_set
,
features
,
target
):
lgb_train
=
lgb
.
Dataset
(
training_set
[
features
],
training_set
[
target
])
#lgb.train(params,)
return
1
'''
instructions : build a lgb classifier
Params :
'''
def
buildClf
(
params
):
return
lgb
.
LGBMClassifier
(
params
)
'''
'''
def
automodelfit
(
clf
,
param_grid
,
dftrain
,
features
,
resp
,
kfold
=
10
,
scoring
=
'roc_auc'
):
# kflod=StratifiedKFold(n_splits=kfold,shuffle=True,random_state=7)
grid_search
=
GridSearchCV
(
clf
,
param_grid
,
scoring
=
scoring
,
n_jobs
=
2
,
cv
=
kfold
,
verbose
=
2
,
iid
=
True
,
refit
=
True
)
#== 模型训练
grid_search
.
fit
(
dftrain
[
features
],
dftrain
[
resp
])
#== 获取最优参数
return
grid_search
def
modelfit
(
clf
,
dftrain
,
features
,
resp
,
useTrainCV
=
True
,
kfold
=
10
,
eval_metric
=
'auc'
,
early_stopping_rounds
=
20
):
'''
模型训练
:type useTrainCV: object
:param clf:XGBClassifier
:param dftrain:训练集
:param features: 特征
:param resp:label
:param useTrainCV:if True call cv function,目的是调节参数 n_estimators
:param cv_folds: N 折交叉验证
:param early_stopping_rounds:添加数loss变化不大这个状态持续的轮数,达到这个数就退出训练过程
:param eval_metric 同 目标函数 objective 有关,取值https://xgboost.readthedocs.io/en/latest/python/python_api.html#
:return:
'''
if
useTrainCV
:
# kflod = StratifiedKFold(n_splits=kfold, shuffle=True, random_state=7)
xgb_param
=
clf
.
get_xgb_params
()
xgtrain
=
lgb
.
DMatrix
(
dftrain
[
features
]
.
values
,
label
=
dftrain
[
resp
]
.
values
)
cvresult
=
lgb
.
cv
(
xgb_param
,
xgtrain
,
num_boost_round
=
clf
.
get_params
()[
'n_estimators'
],
nfold
=
kfold
,
metrics
=
eval_metric
,
early_stopping_rounds
=
early_stopping_rounds
,
verbose_eval
=
True
)
clf
.
set_params
(
n_estimators
=
cvresult
.
shape
[
0
])
clf
.
fit
(
dftrain
[
features
],
dftrain
[
resp
],
eval_metric
=
eval_metric
)
return
clf
mvp/__pycache__/__init__.cpython-37.pyc
0 → 100644
View file @
45721de0
File added
mvp/__pycache__/dhb.cpython-37.pyc
0 → 100644
View file @
45721de0
File added
mvp/__pycache__/xgbreport.cpython-37.pyc
0 → 100644
View file @
45721de0
File added
mvp/dhb.py
View file @
45721de0
import
pandas
as
pd
from
data.datasource
import
mysqldb
,
mongodb
import
time
from
dateutil.relativedelta
import
relativedelta
import
datetime
import
dateutil
'''
model instructions : established a dhb obj which cotains attrubutes of dhb model
...
...
@@ -20,222 +22,223 @@ API :
class
dhb
:
# features as Series format
features
=
[
'dhb_last_30_and_60_days_dun_call_avg_duration'
,
'dhb_last_30_and_60_days_dun_call_duration_above60'
,
'dhb_last_30_and_60_days_dun_call_duration_below15'
,
'dhb_last_30_and_60_days_dun_call_duration_between15_and_30'
,
'dhb_last_30_and_60_days_dun_call_in_duration'
,
'dhb_last_30_and_60_days_dun_call_in_times'
,
'dhb_last_30_and_60_days_dun_call_out_duration'
,
'dhb_last_30_and_60_days_dun_call_out_times'
,
'dhb_last_30_and_60_days_dun_call_tel_total_nums'
,
'dhb_last_30_and_60_days_dun_call_total_duration'
,
'dhb_last_30_and_60_days_dun_call_total_times'
,
'dhb_last_30_and_60_days_ntdun_call_avg_duration'
,
'dhb_last_30_and_60_days_ntdun_call_duration_above60'
,
'dhb_last_30_and_60_days_ntdun_call_duration_below15'
,
'dhb_last_30_and_60_days_ntdun_call_duration_between15_and_30'
,
'dhb_last_30_and_60_days_ntdun_call_in_duration'
,
'dhb_last_30_and_60_days_ntdun_call_in_times'
,
'dhb_last_30_and_60_days_ntdun_call_out_duration'
,
'dhb_last_30_and_60_days_ntdun_call_out_times'
,
'dhb_last_30_and_60_days_ntdun_call_tel_total_nums'
,
'dhb_last_30_and_60_days_ntdun_call_total_duration'
,
'dhb_last_30_and_60_days_ntdun_call_total_times'
,
'dhb_last_30_days_dun_call_avg_duration'
,
'dhb_last_30_days_dun_call_duration_above60'
,
'dhb_last_30_days_dun_call_duration_below15'
,
'dhb_last_30_days_dun_call_duration_between15_and_30'
,
'dhb_last_30_days_dun_call_in_duration'
,
'dhb_last_30_days_dun_call_in_times'
,
'dhb_last_30_days_dun_call_out_duration'
,
'dhb_last_30_days_dun_call_out_times'
,
'dhb_last_30_days_dun_call_tel_total_nums'
,
'dhb_last_30_days_dun_call_total_duration'
,
'dhb_last_30_days_dun_call_total_times'
,
'dhb_last_30_days_ntdun_call_avg_duration'
,
'dhb_last_30_days_ntdun_call_duration_above60'
,
'dhb_last_30_days_ntdun_call_duration_below15'
,
'dhb_last_30_days_ntdun_call_duration_between15_and_30'
,
'dhb_last_30_days_ntdun_call_in_duration'
,
'dhb_last_30_days_ntdun_call_in_times'
,
'dhb_last_30_days_ntdun_call_out_duration'
,
'dhb_last_30_days_ntdun_call_out_times'
,
'dhb_last_30_days_ntdun_call_tel_total_nums'
,
'dhb_last_30_days_ntdun_call_total_duration'
,
'dhb_last_30_days_ntdun_call_total_times'
,
'dhb_last_60_and_90_days_dun_call_avg_duration'
,
'dhb_last_60_and_90_days_dun_call_duration_above60'
,
'dhb_last_60_and_90_days_dun_call_duration_below15'
,
'dhb_last_60_and_90_days_dun_call_duration_between15_and_30'
,
'dhb_last_60_and_90_days_dun_call_in_duration'
,
'dhb_last_60_and_90_days_dun_call_in_times'
,
'dhb_last_60_and_90_days_dun_call_out_duration'
,
'dhb_last_60_and_90_days_dun_call_out_times'
,
'dhb_last_60_and_90_days_dun_call_tel_total_nums'
,
'dhb_last_60_and_90_days_dun_call_total_duration'
,
'dhb_last_60_and_90_days_dun_call_total_times'
,
'dhb_last_60_and_90_days_ntdun_call_avg_duration'
,
'dhb_last_60_and_90_days_ntdun_call_duration_above60'
,
'dhb_last_60_and_90_days_ntdun_call_duration_below15'
,
'dhb_last_60_and_90_days_ntdun_call_duration_between15_and_30'
,
'dhb_last_60_and_90_days_ntdun_call_in_duration'
,
'dhb_last_60_and_90_days_ntdun_call_in_times'
,
'dhb_last_60_and_90_days_ntdun_call_out_duration'
,
'dhb_last_60_and_90_days_ntdun_call_out_times'
,
'dhb_last_60_and_90_days_ntdun_call_tel_total_nums'
,
'dhb_last_60_and_90_days_ntdun_call_total_duration'
,
'dhb_last_60_and_90_days_ntdun_call_total_times'
,
'dhb_last_three_weeks_dun_call_avg_duration'
,
'dhb_last_three_weeks_dun_call_duration_above60'
,
'dhb_last_three_weeks_dun_call_duration_below15'
,
'dhb_last_three_weeks_dun_call_duration_between15_and_30'
,
'dhb_last_three_weeks_dun_call_in_duration'
,
'dhb_last_three_weeks_dun_call_in_times'
,
'dhb_last_three_weeks_dun_call_out_duration'
,
'dhb_last_three_weeks_dun_call_out_times'
,
'dhb_last_three_weeks_dun_call_tel_total_nums'
,
'dhb_last_three_weeks_dun_call_total_duration'
,
'dhb_last_three_weeks_dun_call_total_times'
,
'dhb_last_three_weeks_ntdun_call_avg_duration'
,
'dhb_last_three_weeks_ntdun_call_duration_above60'
,
'dhb_last_three_weeks_ntdun_call_duration_below15'
,
'dhb_last_three_weeks_ntdun_call_duration_between15_and_30'
,
'dhb_last_three_weeks_ntdun_call_in_duration'
,
'dhb_last_three_weeks_ntdun_call_in_times'
,
'dhb_last_three_weeks_ntdun_call_out_duration'
,
'dhb_last_three_weeks_ntdun_call_out_times'
,
'dhb_last_three_weeks_ntdun_call_tel_total_nums'
,
'dhb_last_three_weeks_ntdun_call_total_duration'
,
'dhb_last_three_weeks_ntdun_call_total_times'
,
'dhb_last_two_weeks_dun_call_avg_duration'
,
'dhb_last_two_weeks_dun_call_duration_above60'
,
'dhb_last_two_weeks_dun_call_duration_below15'
,
'dhb_last_two_weeks_dun_call_duration_between15_and_30'
,
'dhb_last_two_weeks_dun_call_in_duration'
,
'dhb_last_two_weeks_dun_call_in_times'
,
'dhb_last_two_weeks_dun_call_out_duration'
,
'dhb_last_two_weeks_dun_call_out_times'
,
'dhb_last_two_weeks_dun_call_tel_total_nums'
,
'dhb_last_two_weeks_dun_call_total_duration'
,
'dhb_last_two_weeks_dun_call_total_times'
,
'dhb_last_two_weeks_ntdun_call_avg_duration'
,
'dhb_last_two_weeks_ntdun_call_duration_above60'
,
'dhb_last_two_weeks_ntdun_call_duration_below15'
,
'dhb_last_two_weeks_ntdun_call_duration_between15_and_30'
,
'dhb_last_two_weeks_ntdun_call_in_duration'
,
'dhb_last_two_weeks_ntdun_call_in_times'
,
'dhb_last_two_weeks_ntdun_call_out_duration'
,
'dhb_last_two_weeks_ntdun_call_out_times'
,
'dhb_last_two_weeks_ntdun_call_tel_total_nums'
,
'dhb_last_two_weeks_ntdun_call_total_duration'
,
'dhb_last_two_weeks_ntdun_call_total_times'
,
'dhb_last_week_dun_call_avg_duration'
,
'dhb_last_week_dun_call_duration_above60'
,
'dhb_last_week_dun_call_duration_below15'
,
'dhb_last_week_dun_call_duration_between15_and_30'
,
'dhb_last_week_dun_call_in_duration'
,
'dhb_last_week_dun_call_in_times'
,
'dhb_last_week_dun_call_out_duration'
,
'dhb_last_week_dun_call_out_times'
,
'dhb_last_week_dun_call_tel_total_nums'
,
'dhb_last_week_dun_call_total_duration'
,
'dhb_last_week_dun_call_total_times'
,
'dhb_last_week_ntdun_call_avg_duration'
,
'dhb_last_week_ntdun_call_duration_above60'
,
'dhb_last_week_ntdun_call_duration_below15'
,
'dhb_last_week_ntdun_call_duration_between15_and_30'
,
'dhb_last_week_ntdun_call_in_duration'
,
'dhb_last_week_ntdun_call_in_times'
,
'dhb_last_week_ntdun_call_out_duration'
,
'dhb_last_week_ntdun_call_out_times'
,
'dhb_last_week_ntdun_call_tel_total_nums'
,
'dhb_last_week_ntdun_call_total_duration'
,
'dhb_last_week_ntdun_call_total_times'
,
'dhb_overview_dun_call_avg_duration'
,
'dhb_overview_dun_call_duration_above60'
,
'dhb_overview_dun_call_duration_below15'
,
'dhb_overview_dun_call_duration_between15_and_30'
,
'dhb_overview_dun_call_in_duration'
,
'dhb_overview_dun_call_in_times'
,
'dhb_overview_dun_call_out_duration'
,
'dhb_overview_dun_call_out_times'
,
'dhb_overview_dun_call_tel_total_nums'
,
'dhb_overview_dun_call_total_duration'
,
'dhb_overview_dun_call_total_times'
,
'dhb_overview_dun_first_call_time'
,
'dhb_overview_dun_last_call_time'
,
'dhb_overview_ntdun_call_avg_duration'
,
'dhb_overview_ntdun_call_duration_above60'
,
'dhb_overview_ntdun_call_duration_below15'
,
'dhb_overview_ntdun_call_duration_between15_and_30'
,
'dhb_overview_ntdun_call_in_duration'
,
'dhb_overview_ntdun_call_in_times'
,
'dhb_overview_ntdun_call_out_duration'
,
'dhb_overview_ntdun_call_out_times'
,
'dhb_overview_ntdun_call_tel_total_nums'
,
'dhb_overview_ntdun_call_total_duration'
,
'dhb_overview_ntdun_call_total_times'
,
'dhb_overview_ntdun_first_call_time'
]
#features = pd.read_excel()
sql
=
'''
select dhb_last_30_and_60_days_dun_call_avg_duration,
dhb_last_30_and_60_days_dun_call_duration_above60,
dhb_last_30_and_60_days_dun_call_duration_below15,
dhb_last_30_and_60_days_dun_call_duration_between15_and_30,
dhb_last_30_and_60_days_dun_call_in_duration,
dhb_last_30_and_60_days_dun_call_in_times,
dhb_last_30_and_60_days_dun_call_out_duration,
dhb_last_30_and_60_days_dun_call_out_times,
dhb_last_30_and_60_days_dun_call_tel_total_nums,
dhb_last_30_and_60_days_dun_call_total_duration,
dhb_last_30_and_60_days_dun_call_total_times,
dhb_last_30_and_60_days_ntdun_call_avg_duration,
dhb_last_30_and_60_days_ntdun_call_duration_above60,
dhb_last_30_and_60_days_ntdun_call_duration_below15,
dhb_last_30_and_60_days_ntdun_call_duration_between15_and_30,
dhb_last_30_and_60_days_ntdun_call_in_duration,
dhb_last_30_and_60_days_ntdun_call_in_times,
dhb_last_30_and_60_days_ntdun_call_out_duration,
dhb_last_30_and_60_days_ntdun_call_out_times,
dhb_last_30_and_60_days_ntdun_call_tel_total_nums,
dhb_last_30_and_60_days_ntdun_call_total_duration,
dhb_last_30_and_60_days_ntdun_call_total_times,
dhb_last_30_days_dun_call_avg_duration,
dhb_last_30_days_dun_call_duration_above60,
dhb_last_30_days_dun_call_duration_below15,
dhb_last_30_days_dun_call_duration_between15_and_30,
dhb_last_30_days_dun_call_in_duration,
dhb_last_30_days_dun_call_in_times,
dhb_last_30_days_dun_call_out_duration,
dhb_last_30_days_dun_call_out_times,
dhb_last_30_days_dun_call_tel_total_nums,
dhb_last_30_days_dun_call_total_duration,
dhb_last_30_days_dun_call_total_times,
dhb_last_30_days_ntdun_call_avg_duration,
dhb_last_30_days_ntdun_call_duration_above60,
dhb_last_30_days_ntdun_call_duration_below15,
dhb_last_30_days_ntdun_call_duration_between15_and_30,
dhb_last_30_days_ntdun_call_in_duration,
dhb_last_30_days_ntdun_call_in_times,
dhb_last_30_days_ntdun_call_out_duration,
dhb_last_30_days_ntdun_call_out_times,
dhb_last_30_days_ntdun_call_tel_total_nums,
dhb_last_30_days_ntdun_call_total_duration,
dhb_last_30_days_ntdun_call_total_times,
dhb_last_60_and_90_days_dun_call_avg_duration,
dhb_last_60_and_90_days_dun_call_duration_above60,
dhb_last_60_and_90_days_dun_call_duration_below15,
dhb_last_60_and_90_days_dun_call_duration_between15_and_30,
dhb_last_60_and_90_days_dun_call_in_duration,
dhb_last_60_and_90_days_dun_call_in_times,
dhb_last_60_and_90_days_dun_call_out_duration,
dhb_last_60_and_90_days_dun_call_out_times,
dhb_last_60_and_90_days_dun_call_tel_total_nums,
dhb_last_60_and_90_days_dun_call_total_duration,
dhb_last_60_and_90_days_dun_call_total_times,
dhb_last_60_and_90_days_ntdun_call_avg_duration,
dhb_last_60_and_90_days_ntdun_call_duration_above60,
dhb_last_60_and_90_days_ntdun_call_duration_below15,
dhb_last_60_and_90_days_ntdun_call_duration_between15_and_30,
dhb_last_60_and_90_days_ntdun_call_in_duration,
dhb_last_60_and_90_days_ntdun_call_in_times,
dhb_last_60_and_90_days_ntdun_call_out_duration,
dhb_last_60_and_90_days_ntdun_call_out_times,
dhb_last_60_and_90_days_ntdun_call_tel_total_nums,
dhb_last_60_and_90_days_ntdun_call_total_duration,
dhb_last_60_and_90_days_ntdun_call_total_times,
dhb_last_three_weeks_dun_call_avg_duration,
dhb_last_three_weeks_dun_call_duration_above60,
dhb_last_three_weeks_dun_call_duration_below15,
dhb_last_three_weeks_dun_call_duration_between15_and_30,
dhb_last_three_weeks_dun_call_in_duration,
dhb_last_three_weeks_dun_call_in_times,
dhb_last_three_weeks_dun_call_out_duration,
dhb_last_three_weeks_dun_call_out_times,
dhb_last_three_weeks_dun_call_tel_total_nums,
dhb_last_three_weeks_dun_call_total_duration,
dhb_last_three_weeks_dun_call_total_times,
dhb_last_three_weeks_ntdun_call_avg_duration,
dhb_last_three_weeks_ntdun_call_duration_above60,
dhb_last_three_weeks_ntdun_call_duration_below15,
dhb_last_three_weeks_ntdun_call_duration_between15_and_30,
dhb_last_three_weeks_ntdun_call_in_duration,
dhb_last_three_weeks_ntdun_call_in_times,
dhb_last_three_weeks_ntdun_call_out_duration,
dhb_last_three_weeks_ntdun_call_out_times,
dhb_last_three_weeks_ntdun_call_tel_total_nums,
dhb_last_three_weeks_ntdun_call_total_duration,
dhb_last_three_weeks_ntdun_call_total_times,
dhb_last_two_weeks_dun_call_avg_duration,
dhb_last_two_weeks_dun_call_duration_above60,
dhb_last_two_weeks_dun_call_duration_below15,
dhb_last_two_weeks_dun_call_duration_between15_and_30,
dhb_last_two_weeks_dun_call_in_duration,
dhb_last_two_weeks_dun_call_in_times,
dhb_last_two_weeks_dun_call_out_duration,
dhb_last_two_weeks_dun_call_out_times,
dhb_last_two_weeks_dun_call_tel_total_nums,
dhb_last_two_weeks_dun_call_total_duration,
dhb_last_two_weeks_dun_call_total_times,
dhb_last_two_weeks_ntdun_call_avg_duration,
dhb_last_two_weeks_ntdun_call_duration_above60,
dhb_last_two_weeks_ntdun_call_duration_below15,
dhb_last_two_weeks_ntdun_call_duration_between15_and_30,
dhb_last_two_weeks_ntdun_call_in_duration,
dhb_last_two_weeks_ntdun_call_in_times,
dhb_last_two_weeks_ntdun_call_out_duration,
dhb_last_two_weeks_ntdun_call_out_times,
dhb_last_two_weeks_ntdun_call_tel_total_nums,
dhb_last_two_weeks_ntdun_call_total_duration,
dhb_last_two_weeks_ntdun_call_total_times,
dhb_last_week_dun_call_avg_duration,
dhb_last_week_dun_call_duration_above60,
dhb_last_week_dun_call_duration_below15,
dhb_last_week_dun_call_duration_between15_and_30,
dhb_last_week_dun_call_in_duration, dhb_last_week_dun_call_in_times,
dhb_last_week_dun_call_out_duration,
dhb_last_week_dun_call_out_times,
dhb_last_week_dun_call_tel_total_nums,
dhb_last_week_dun_call_total_duration,
dhb_last_week_dun_call_total_times,
dhb_last_week_ntdun_call_avg_duration,
dhb_last_week_ntdun_call_duration_above60,
dhb_last_week_ntdun_call_duration_below15,
dhb_last_week_ntdun_call_duration_between15_and_30,
dhb_last_week_ntdun_call_in_duration,
dhb_last_week_ntdun_call_in_times,
dhb_last_week_ntdun_call_out_duration,
dhb_last_week_ntdun_call_out_times,
dhb_last_week_ntdun_call_tel_total_nums,
dhb_last_week_ntdun_call_total_duration,
dhb_last_week_ntdun_call_total_times,
dhb_overview_dun_call_avg_duration,
dhb_overview_dun_call_duration_above60,
dhb_overview_dun_call_duration_below15,
dhb_overview_dun_call_duration_between15_and_30,
dhb_overview_dun_call_in_duration, dhb_overview_dun_call_in_times,
dhb_overview_dun_call_out_duration, dhb_overview_dun_call_out_times,
dhb_overview_dun_call_tel_total_nums,
dhb_overview_dun_call_total_duration,
dhb_overview_dun_call_total_times, dhb_overview_dun_first_call_time,
dhb_overview_dun_last_call_time,
dhb_overview_ntdun_call_avg_duration,
dhb_overview_ntdun_call_duration_above60,
dhb_overview_ntdun_call_duration_below15,
dhb_overview_ntdun_call_duration_between15_and_30,
dhb_overview_ntdun_call_in_duration,
dhb_overview_ntdun_call_in_times,
dhb_overview_ntdun_call_out_duration,
dhb_overview_ntdun_call_out_times,
dhb_overview_ntdun_call_tel_total_nums,
dhb_overview_ntdun_call_total_duration,
dhb_overview_ntdun_call_total_times,
dhb_overview_ntdun_first_call_time,
dhb_overview_ntdun_last_call_time,applied_at,applied_from,applied_type,if(passdue_day>15,1,0) as target
from risk_analysis
where applied_at >= '@start_time_period' and applied_at < '@end_time_period'
and transacted = 1
and dhb_flag =1
and datediff(now(),deadline) > 15
'''
def
__init__
(
self
,
overdue_days
=
15
,
features
=
None
,
sql
=
None
,
start_time_period
=
None
,
end_time_period
=
None
):
# sql = '''
#
# '''
start_time_period
=
(
datetime
.
date
.
today
()
-
relativedelta
(
months
=+
7
))
.
strftime
(
"
%
Y-
%
m-
%
d 00:00:00"
)
end_time_period
=
(
datetime
.
date
.
today
()
-
relativedelta
(
days
=+
17
))
.
strftime
(
"
%
Y-
%
m-
%
d 00:00:00"
)
def
__init__
(
self
,
features
=
None
,
sql
=
None
,
start_time_period
=
None
,
end_time_period
=
None
):
try
:
if
features
!=
None
:
self
.
features
=
features
if
sql
!=
None
:
self
.
sql
=
sql
else
:
sql
=
"select "
+
str
(
features
)
.
strip
(
'['
)
.
strip
(
']'
)
+
''',if(passdue_day>'''
+
str
(
overdue_days
)
+
''',1,0) as target, applied_at, applied_from, applied_type
from risk_analysis
where applied_at >= '@start_time_period' and applied_at < '@end_time_period'
and transacted = 1
and dhb_flag =1
and datediff(now(),deadline) > '''
+
str
(
overdue_days
)
+
'''
'''
if
start_time_period
!=
None
:
self
.
start_time_period
=
start_time_period
# if the para was not Series
if
(
type
(
features
)
!=
pd
.
core
.
series
.
Series
):
self
.
features
=
pd
.
Series
(
features
)
else
:
self
.
start_time_period
=
(
datetime
.
date
.
today
()
-
dateutil
.
relativedelta
(
months
=+
7
))
.
strftime
(
"
%
Y-
%
m-
%
d 00:00:00"
)
if
end_time_period
!=
None
:
self
.
end_time_period
=
end_time_period
else
:
self
.
end_time_period
=
(
datetime
.
date
.
today
()
-
dateutil
.
relativedelta
(
days
=+
16
))
.
strftime
(
"
%
Y-
%
m-
%
d 00:00:00"
)
self
.
features
=
features
except
Exception
as
e
:
print
(
'Parameters Error:
\n
'
,
e
)
print
(
"'features' parameter type Error, it should be list or Series"
)
raise
if
sql
!=
None
:
self
.
sql
=
sql
if
start_time_period
!=
None
:
self
.
start_time_period
=
start_time_period
if
end_time_period
!=
None
:
self
.
end_time_period
=
end_time_period
def
dhb_features_extract
(
self
):
'''
instrucions : extract dhb features from risk_analysis
:param self:
:return: dhb features
'''
value_map
=
{
"近3天"
:
1
,
"近4-5天"
:
2
,
"近6-7天"
:
3
,
"近8-15天"
:
4
,
"近16-30天"
:
5
,
"近31-60天"
:
6
,
"近61-90天"
:
7
,
"近91-120天"
:
8
,
"近121-150天"
:
9
,
"近151-180天"
:
10
,
"180天前"
:
11
,
"无"
:
0
"近3天"
:
1
,
"近4-5天"
:
2
,
"近6-7天"
:
3
,
"近8-15天"
:
4
,
"近16-30天"
:
5
,
"近31-60天"
:
6
,
"近61-90天"
:
7
,
"近91-120天"
:
8
,
"近121-150天"
:
9
,
"近151-180天"
:
10
,
"180天前"
:
11
,
"无"
:
0
}
#print(self.sql.replace('@start_time_period',self.start_time_period).replace('@end_time_period',self.end_time_period))
# use risk_analysis to extract data
sql
=
self
.
sql
.
replace
(
'@start_time_period'
,
self
.
start_time_period
)
.
replace
(
'@end_time_period'
,
self
.
end_time_period
)
#
dhb_loan
=
pd
.
read_sql
(
sql
,
mysqldb
.
engine_risk_analysis
)
# dhb_loan[["dhb_overview_dun_first_call_time", "dhb_overview_dun_last_call_time","dhb_overview_ntdun_first_call_time", "dhb_overview_ntdun_last_call_time"]] = dhb_loan[["dhb_overview_dun_first_call_time", "dhb_overview_dun_last_call_time","dhb_overview_ntdun_first_call_time", "dhb_overview_ntdun_last_call_time"]].applymap(lambda x : value_map[x])
# manipul category datatype which includes sequences
dhb_loan
[[
"dhb_overview_dun_first_call_time"
,
"dhb_overview_dun_last_call_time"
,
"dhb_overview_ntdun_first_call_time"
,
"dhb_overview_ntdun_last_call_time"
]]
=
pd
.
get_dummies
(
dhb_loan
[[
"dhb_overview_dun_first_call_time"
,
"dhb_overview_dun_last_call_time"
,
"dhb_overview_ntdun_first_call_time"
,
"dhb_overview_ntdun_last_call_time"
]],
columns
=
[
"dhb_overview_dun_first_call_time"
,
"dhb_overview_dun_last_call_time"
,
"dhb_overview_ntdun_first_call_time"
,
"dhb_overview_ntdun_last_call_time"
])
# limit the upper boundary
dhb_loan
=
pd
.
read_sql
(
self
.
sql
.
replace
(
'@start_time_period'
,
self
.
start_time_period
)
.
replace
(
'@end_time_period'
,
self
.
end_time_period
),
mysqldb
.
engine_risk_analysis
)
dhb_loan
[[
"dhb_overview_dun_first_call_time"
,
"dhb_overview_dun_last_call_time"
,
"dhb_overview_ntdun_first_call_time"
,
"dhb_overview_ntdun_last_call_time"
]]
=
dhb_loan
[[
"dhb_overview_dun_first_call_time"
,
"dhb_overview_dun_last_call_time"
,
"dhb_overview_ntdun_first_call_time"
,
"dhb_overview_ntdun_last_call_time"
]]
.
applymap
(
lambda
x
:
value_map
[
x
])
dhb_loan
.
loc
[
dhb_loan
.
dhb_last_60_and_90_days_ntdun_call_avg_duration
>=
42
,
"dhb_last_60_and_90_days_ntdun_call_avg_duration"
]
=
42
dhb_loan
.
loc
[
dhb_loan
.
dhb_overview_ntdun_call_duration_above60
>=
25
,
"dhb_overview_ntdun_call_duration_above60"
]
=
25
dhb_loan
.
loc
[
dhb_loan
.
dhb_last_30_and_60_days_ntdun_call_total_duration
>=
800
,
"dhb_last_30_and_60_days_ntdun_call_total_duration"
]
=
800
...
...
@@ -249,14 +252,41 @@ class dhb:
dhb_loan
.
loc
[
dhb_loan
.
dhb_overview_dun_call_tel_total_nums
>=
22
,
"dhb_overview_dun_call_tel_total_nums"
]
=
22
dhb_loan
.
loc
[
dhb_loan
.
dhb_last_30_days_dun_call_total_duration
>=
1100
,
"dhb_last_30_days_dun_call_total_duration"
]
=
1100
dhb_loan
.
loc
[
dhb_loan
.
dhb_last_two_weeks_ntdun_call_in_duration
>=
300
,
"dhb_last_two_weeks_ntdun_call_in_duration"
]
=
300
# dhb_loan.to_csv("./dhb_loan_sample——"+str(datetime.date.today())+".csv")
print
(
datetime
.
time
.
strftime
(
'
%
Y.
%
m.
%
d
%
H:
%
M:
%
S'
,
datetime
.
time
.
localtime
(
datetime
.
time
.
time
()))
+
"提取了dhb {}+ "
.
format
(
str
(
self
.
overdue_days
))
+
self
.
start_time_period
+
"to"
+
self
.
end_time_period
+
"时段样本"
)
dhb_loan
.
to_csv
(
"./dhb_loan_sample——"
+
str
(
datetime
.
date
.
today
())
+
".csv"
)
print
(
time
.
strftime
(
'
%
Y.
%
m.
%
d
%
H:
%
M:
%
S'
,
time
.
localtime
(
time
.
time
()))
+
"提取了dhb "
+
self
.
start_time_period
+
"to"
+
self
.
end_time_period
+
"时段样本"
)
return
dhb_loan
def
dhb_comparasion
(
self
,
limit
=
"{'wf_created_at': {'$gte': '@start_date', '$lt': '@end_date'}}"
,
df
=
None
,
applied_type
=
None
,
applied_from
=
None
):
df_mongo
=
mongodb
.
pymongodb
(
self
.
start_time_period
,
self
.
end_time_period
,
limit
,
"{'order_id':1,'model_exec_data_source#dhb':1}"
)
'''
instructions : build a comparasion
Params :
df - test dataset which was given
score - score column
target - label
start_time_period -
end_time_period -
applied_tpye -
applied_from -
Returns :
auc comparasion
liftchart plot
'''
# def dhb_comparasion(df,score = 'model_exec_data_source#dhb' ,target = 'target', start_time_period = self.start_time_period, end_time_period = self.end_time_period, applied_type = None, applied_from = None):
# df_mongo = mongodb.pymongodb(start_time_period, end_time_period, limit, "{'order_id':1,'model_exec_data_source#dhb':1}")
# df = pd.merge(df,df_mongo,how='left',left_on='order_no',right_on='order_id')
# df['bins'] = df.qcut(df['target'], q = 10, percision = 6, dupulicates='drop')
# df.groupby
# return 1
mvp/lgbreport.py
View file @
45721de0
...
...
@@ -6,7 +6,6 @@ from data.analyis import datacal
from
models
import
xgboost
from
matplotlib
import
pyplot
as
plt
from
data.graph
import
drawplot
import
dhb
from
mvp
import
dhb
from
data.datasource
import
mysqldb
,
mongodb
dhb
=
dhb
()
df_dhb
=
dhb
.
dhb_features_extract
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment