Commit 45721de0 authored by 王家华's avatar 王家华

debug

parent b5a3f366
import lightgbm as lgb import lightgbm as lgb
from sklearn.metrics import roc_auc_score from sklearn.metrics import roc_auc_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix, mean_squared_error from sklearn.metrics import confusion_matrix, mean_squared_error
import numpy import numpy
import pandas import pandas
...@@ -21,8 +22,9 @@ params = { ...@@ -21,8 +22,9 @@ params = {
'verbose': 1 # <0 显示致命的, =0 显示错误 (警告), >0 显示信息 'verbose': 1 # <0 显示致命的, =0 显示错误 (警告), >0 显示信息
} }
'''
'''
instructions : training lightgbm model with specified params instructions : training lightgbm model with specified params
Parameters : Parameters :
...@@ -33,5 +35,53 @@ Parameters : ...@@ -33,5 +35,53 @@ Parameters :
''' '''
def lgb_train(params,training_set,features,target): def lgb_train(params,training_set,features,target):
lgb_train = lgb.Dataset(training_set[features],training_set[target]) lgb_train = lgb.Dataset(training_set[features],training_set[target])
#lgb.train(params,)
return 1
'''
instructions : build a lgb classifier
Params :
'''
def buildClf(params):
return lgb.LGBMClassifier(params)
'''
'''
def automodelfit(clf,param_grid,dftrain,features,resp, kfold=10,scoring='roc_auc'):
# kflod=StratifiedKFold(n_splits=kfold,shuffle=True,random_state=7)
grid_search=GridSearchCV(clf,param_grid,scoring=scoring,n_jobs=2,cv=kfold,verbose=2,iid=True,refit=True)
#== 模型训练
grid_search.fit(dftrain[features],dftrain[resp])
#== 获取最优参数
return grid_search
def modelfit(clf, dftrain, features, resp,useTrainCV = True, kfold=10, eval_metric='auc',early_stopping_rounds=20):
'''
模型训练
:type useTrainCV: object
:param clf:XGBClassifier
:param dftrain:训练集
:param features: 特征
:param resp:label
:param useTrainCV:if True call cv function,目的是调节参数 n_estimators
:param cv_folds: N 折交叉验证
:param early_stopping_rounds:添加数loss变化不大这个状态持续的轮数,达到这个数就退出训练过程
:param eval_metric 同 目标函数 objective 有关,取值https://xgboost.readthedocs.io/en/latest/python/python_api.html#
:return:
'''
if useTrainCV:
# kflod = StratifiedKFold(n_splits=kfold, shuffle=True, random_state=7)
xgb_param = clf.get_xgb_params()
xgtrain = lgb.DMatrix(dftrain[features].values, label=dftrain[resp].values)
cvresult = lgb.cv(xgb_param, xgtrain, num_boost_round=clf.get_params()['n_estimators'], nfold=kfold,
metrics=eval_metric, early_stopping_rounds=early_stopping_rounds,verbose_eval=True)
clf.set_params(n_estimators=cvresult.shape[0])
clf.fit(dftrain[features], dftrain[resp],eval_metric=eval_metric)
return clf
This diff is collapsed.
...@@ -6,7 +6,6 @@ from data.analyis import datacal ...@@ -6,7 +6,6 @@ from data.analyis import datacal
from models import xgboost from models import xgboost
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
from data.graph import drawplot from data.graph import drawplot
import dhb from mvp import dhb
from data.datasource import mysqldb,mongodb
dhb = dhb()
df_dhb = dhb.dhb_features_extract()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment