Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
M
model_mvp
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
decision-science
model_mvp
Commits
a1429476
Commit
a1429476
authored
Apr 22, 2019
by
linfang.wang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
电话邦跑
parent
03588f52
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
18 additions
and
10 deletions
+18
-10
xgboost.py
models/xgboost.py
+2
-0
allocator.py
mvp/allocator.py
+7
-1
xgbreport.py
mvp/xgbreport.py
+9
-9
No files found.
models/xgboost.py
View file @
a1429476
...
...
@@ -77,6 +77,8 @@ def modelfit(clf, dftrain, features, resp,useTrainCV = True, kfold=10, eval_metr
:param eval_metric 同 目标函数 objective 有关,取值https://xgboost.readthedocs.io/en/latest/python/python_api.html#
:return:
'''
if
dftrain
[
features
]
.
shape
[
0
]
==
0
:
raise
(
' NO train data !!!! '
)
if
useTrainCV
:
# kflod = StratifiedKFold(n_splits=kfold, shuffle=True, random_state=7)
xgb_param
=
clf
.
get_xgb_params
()
...
...
mvp/allocator.py
View file @
a1429476
...
...
@@ -49,8 +49,14 @@ if __name__ == '__main__':
# ]
dhb
=
dhb
.
dhb
(
start_time_period
=
'2019-01-19 11:00:00'
,
end_time_period
=
'2019-01-20 12:00:00'
)
df
=
dhb
.
dhb_features_extract
()
print
(
df
.
columns
.
tolist
())
print
(
df
.
target
.
unique
())
label
=
'target'
features
=
dhb
.
get_feature
()
df
[
features
]
=
df
[
features
]
.
astype
(
float
)
df
[
'target'
]
=
df
[
'target'
]
.
astype
(
int
)
print
(
'----feature---'
,
len
(
features
))
# df=pd.read_csv('test.csv')
dftrain
,
dftest
=
datacal
.
split_train_val
(
df
,
trainsplit
=
'timeSeries'
,
trainsplitRatio
=
0.8
,
sort_col
=
'applied_at'
)
xgbreport
.
report
(
dftrain
,
dftest
,
dhb
.
features
,
label
,
''
,
'tmp.doc'
)
xgbreport
.
report
(
dftrain
,
dftest
,
features
,
label
,
''
,
'tmp.doc'
,
kfold
=
2
)
mvp/xgbreport.py
View file @
a1429476
...
...
@@ -7,7 +7,7 @@ from models import xgboost
from
matplotlib
import
pyplot
as
plt
from
data.graph
import
drawplot
def
report
(
dftrain
,
dftest
,
features
,
label
,
path
,
filename
):
def
report
(
dftrain
,
dftest
,
features
,
label
,
path
,
filename
,
kfold
=
10
):
'''
dftrain,dftest 中必然有 字段 applied_at,applied_channel,applied_type
:param dftrain:
...
...
@@ -22,34 +22,34 @@ def report(dftrain,dftest,features,label,path,filename):
document
.
add_heading
(
'xgboost 算法运行报告'
)
clf
=
xgboost
.
buildClf
()
document
.
add_paragraph
(
'初始化参数运行{}'
.
format
(
clf
.
get_xgb_params
()))
clf
=
xgboost
.
modelfit
(
clf
,
dftrain
,
features
,
label
)
clf
=
xgboost
.
modelfit
(
clf
,
dftrain
,
features
,
label
,
kfold
=
kfold
)
document
.
add_paragraph
(
'模型训练集{}'
.
format
(
xgboost
.
auc
(
clf
,
dftrain
,
features
,
label
)))
document
.
add_paragraph
(
'模型测试集{}'
.
format
(
xgboost
.
auc
(
clf
,
dftest
,
features
,
label
)))
document
.
add_heading
(
'调整参数'
)
max_depth
=
[
2
,
3
]
min_child_weight
=
range
(
1
,
4
,
1
)
document
,
clf
=
tun_params
(
document
,
clf
,
dftrain
,
dftest
,
{
'max_depth'
:
max_depth
,
'min_child_weight'
:
min_child_weight
},
features
,
label
)
document
,
clf
=
tun_params
(
document
,
clf
,
dftrain
,
dftest
,
{
'max_depth'
:
max_depth
,
'min_child_weight'
:
min_child_weight
},
features
,
label
,
kfold
=
kfold
)
# gamma
gamma
=
[
i
/
10
for
i
in
range
(
0
,
5
)]
document
,
clf
=
tun_params
(
document
,
clf
,
dftrain
,
dftest
,{
'gamma'
:
gamma
},
features
,
label
)
document
,
clf
=
tun_params
(
document
,
clf
,
dftrain
,
dftest
,{
'gamma'
:
gamma
},
features
,
label
,
kfold
=
kfold
)
# subsample colsample_bytree
subsample
=
[
0.8
,
0.9
,
1
]
colsample_bytree
=
[
0.8
,
0.9
,
1
]
document
,
clf
=
tun_params
(
document
,
clf
,
dftrain
,
dftest
,
{
'subsample'
:
subsample
,
'colsample_bytree'
:
colsample_bytree
},
features
,
label
)
{
'subsample'
:
subsample
,
'colsample_bytree'
:
colsample_bytree
},
features
,
label
,
kfold
=
kfold
)
# reg_alpha
reg_alpha
=
[
0.001
,
0.01
,
0.1
,
1
,
10
]
document
,
clf
=
tun_params
(
document
,
clf
,
dftrain
,
dftest
,
{
'reg_alpha'
:
reg_alpha
},
features
,
label
)
{
'reg_alpha'
:
reg_alpha
},
features
,
label
,
kfold
=
kfold
)
# reg_lambda
reg_lambda
=
[
0.001
,
0.01
,
0.1
,
1
,
10
]
document
,
clf
=
tun_params
(
document
,
clf
,
dftrain
,
dftest
,
{
'reg_lambda'
:
reg_lambda
},
features
,
label
)
{
'reg_lambda'
:
reg_lambda
},
features
,
label
,
kfold
=
kfold
)
#==生成模型最后的报告,各个特征的单变量图,PDP,liftchart
dftrain
=
xgboost
.
predict
(
clf
,
dftrain
,
features
)
...
...
@@ -103,10 +103,10 @@ def report(dftrain,dftest,features,label,path,filename):
def
tun_params
(
document
,
clf
,
dftrain
,
dftest
,
params
,
features
,
label
):
def
tun_params
(
document
,
clf
,
dftrain
,
dftest
,
params
,
features
,
label
,
kfold
=
10
):
for
i
in
dict
(
params
)
.
keys
():
document
.
add_paragraph
(
'调参{},取值{}'
.
format
(
i
,
params
[
i
]))
grid_search
=
xgboost
.
automodelfit
(
clf
,
params
,
dftrain
,
features
,
label
)
grid_search
=
xgboost
.
automodelfit
(
clf
,
params
,
dftrain
,
features
,
label
,
kfold
=
kfold
)
clf
=
grid_search
.
best_estimator_
document
.
add_paragraph
(
'模型训练参数{}'
.
format
(
clf
.
get_xgb_params
()))
#==
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment