Commit a1429476 authored by linfang.wang's avatar linfang.wang

电话邦跑

parent 03588f52
......@@ -77,6 +77,8 @@ def modelfit(clf, dftrain, features, resp,useTrainCV = True, kfold=10, eval_metr
:param eval_metric 同 目标函数 objective 有关,取值https://xgboost.readthedocs.io/en/latest/python/python_api.html#
:return:
'''
if dftrain[features].shape[0]==0:
raise(' NO train data !!!! ')
if useTrainCV:
# kflod = StratifiedKFold(n_splits=kfold, shuffle=True, random_state=7)
xgb_param = clf.get_xgb_params()
......
......@@ -49,8 +49,14 @@ if __name__ == '__main__':
# ]
dhb = dhb.dhb(start_time_period='2019-01-19 11:00:00',end_time_period='2019-01-20 12:00:00')
df=dhb.dhb_features_extract()
print(df.columns.tolist())
print(df.target.unique())
label='target'
features=dhb.get_feature()
df[features]=df[features].astype(float)
df['target']=df['target'].astype(int)
print('----feature---',len(features))
# df=pd.read_csv('test.csv')
dftrain,dftest=datacal.split_train_val(df,trainsplit='timeSeries',trainsplitRatio=0.8,sort_col='applied_at')
xgbreport.report(dftrain,dftest,dhb.features,label,'','tmp.doc')
xgbreport.report(dftrain,dftest,features,label,'','tmp.doc',kfold=2)
......@@ -7,7 +7,7 @@ from models import xgboost
from matplotlib import pyplot as plt
from data.graph import drawplot
def report(dftrain,dftest,features,label,path,filename):
def report(dftrain,dftest,features,label,path,filename,kfold=10):
'''
dftrain,dftest 中必然有 字段 applied_at,applied_channel,applied_type
:param dftrain:
......@@ -22,34 +22,34 @@ def report(dftrain,dftest,features,label,path,filename):
document.add_heading('xgboost 算法运行报告')
clf=xgboost.buildClf()
document.add_paragraph('初始化参数运行{}'.format(clf.get_xgb_params()))
clf=xgboost.modelfit(clf,dftrain,features,label)
clf=xgboost.modelfit(clf,dftrain,features,label,kfold=kfold)
document.add_paragraph('模型训练集{}'.format(xgboost.auc(clf,dftrain,features,label)))
document.add_paragraph('模型测试集{}'.format(xgboost.auc(clf, dftest, features, label)))
document.add_heading('调整参数')
max_depth=[2,3]
min_child_weight=range(1,4,1)
document, clf = tun_params(document, clf, dftrain, dftest, {'max_depth': max_depth,'min_child_weight':min_child_weight}, features, label)
document, clf = tun_params(document, clf, dftrain, dftest, {'max_depth': max_depth,'min_child_weight':min_child_weight}, features, label,kfold=kfold)
# gamma
gamma=[i/10 for i in range(0,5)]
document,clf=tun_params(document,clf,dftrain,dftest,{'gamma':gamma},features,label)
document,clf=tun_params(document,clf,dftrain,dftest,{'gamma':gamma},features,label,kfold=kfold)
# subsample colsample_bytree
subsample=[0.8,0.9,1]
colsample_bytree=[0.8,0.9,1]
document, clf = tun_params(document, clf, dftrain, dftest,
{'subsample': subsample, 'colsample_bytree': colsample_bytree}, features, label)
{'subsample': subsample, 'colsample_bytree': colsample_bytree}, features, label,kfold=kfold)
# reg_alpha
reg_alpha=[0.001,0.01,0.1,1,10]
document, clf = tun_params(document, clf, dftrain, dftest,
{'reg_alpha': reg_alpha}, features, label)
{'reg_alpha': reg_alpha}, features, label,kfold=kfold)
# reg_lambda
reg_lambda = [0.001, 0.01, 0.1, 1, 10]
document, clf = tun_params(document, clf, dftrain, dftest,
{'reg_lambda': reg_lambda}, features, label)
{'reg_lambda': reg_lambda}, features, label,kfold=kfold)
#==生成模型最后的报告,各个特征的单变量图,PDP,liftchart
dftrain=xgboost.predict(clf,dftrain,features)
......@@ -103,10 +103,10 @@ def report(dftrain,dftest,features,label,path,filename):
def tun_params(document,clf,dftrain,dftest,params,features,label):
def tun_params(document,clf,dftrain,dftest,params,features,label,kfold=10):
for i in dict(params).keys():
document.add_paragraph('调参{},取值{}'.format(i,params[i]))
grid_search = xgboost.automodelfit(clf, params,dftrain, features, label)
grid_search = xgboost.automodelfit(clf, params,dftrain, features, label,kfold=kfold)
clf = grid_search.best_estimator_
document.add_paragraph('模型训练参数{}'.format(clf.get_xgb_params()))
#==
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment