debug

45721de0 · 王家华 · b5a3f366 · 45721de0 · 45721de0 · 45721de0
Commit 45721de0 authored Apr 22, 2019 by 王家华
8 changed files
--- a/models/__pycache__/__init__.cpython-37.pyc
+++ b/models/__pycache__/__init__.cpython-37.pyc
--- a/models/__pycache__/xgboost.cpython-37.pyc
+++ b/models/__pycache__/xgboost.cpython-37.pyc
--- a/models/lightgbm.py
+++ b/models/lightgbm.py
 import lightgbm as lgb
 from sklearn.metrics import roc_auc_score
+from sklearn.model_selection import GridSearchCV
 from sklearn.metrics import confusion_matrix, mean_squared_error
 import numpy 
 import pandas
@@ -21,8 +22,9 @@ params = {
    
    'verbose': 1 # <0 显示致命的, =0 显示错误 (警告), >0 显示信息
    }
-'''

+
+'''
 instructions : training lightgbm model with specified params

 Parameters : 
@@ -33,5 +35,53 @@ Parameters :
 '''
 def lgb_train(params,training_set,features,target):
    lgb_train = lgb.Dataset(training_set[features],training_set[target])
+    #lgb.train(params,)
+    return 1
+
+
+'''
+instructions : build a lgb classifier
+
+Params : 
    
+'''   
+def buildClf(params):
+    return lgb.LGBMClassifier(params)
+
+'''
+'''
+def automodelfit(clf,param_grid,dftrain,features,resp, kfold=10,scoring='roc_auc'):
+
+    # kflod=StratifiedKFold(n_splits=kfold,shuffle=True,random_state=7)
+    grid_search=GridSearchCV(clf,param_grid,scoring=scoring,n_jobs=2,cv=kfold,verbose=2,iid=True,refit=True)
+    #== 模型训练
+    grid_search.fit(dftrain[features],dftrain[resp])
+    #== 获取最优参数
+    return grid_search
+    
+
+def modelfit(clf, dftrain, features, resp,useTrainCV = True, kfold=10, eval_metric='auc',early_stopping_rounds=20):
+    '''
+    模型训练
+    :type useTrainCV: object
+    :param clf:XGBClassifier
+    :param dftrain:训练集
+    :param features: 特征
+    :param resp:label
+    :param useTrainCV:if True  call cv function,目的是调节参数 n_estimators
+    :param cv_folds: N 折交叉验证
+    :param early_stopping_rounds:添加数loss变化不大这个状态持续的轮数，达到这个数就退出训练过程
+    :param eval_metric 同 目标函数 objective 有关，取值https://xgboost.readthedocs.io/en/latest/python/python_api.html#
+    :return:
+    '''
+    if useTrainCV:
+        # kflod = StratifiedKFold(n_splits=kfold, shuffle=True, random_state=7)
+        xgb_param = clf.get_xgb_params()
+        xgtrain = lgb.DMatrix(dftrain[features].values, label=dftrain[resp].values)
+        cvresult = lgb.cv(xgb_param, xgtrain, num_boost_round=clf.get_params()['n_estimators'], nfold=kfold,
+            metrics=eval_metric, early_stopping_rounds=early_stopping_rounds,verbose_eval=True)
+        clf.set_params(n_estimators=cvresult.shape[0])
+
+    clf.fit(dftrain[features], dftrain[resp],eval_metric=eval_metric)
+    return clf

--- a/mvp/__pycache__/__init__.cpython-37.pyc
+++ b/mvp/__pycache__/__init__.cpython-37.pyc
--- a/mvp/__pycache__/dhb.cpython-37.pyc
+++ b/mvp/__pycache__/dhb.cpython-37.pyc
--- a/mvp/__pycache__/xgbreport.cpython-37.pyc
+++ b/mvp/__pycache__/xgbreport.cpython-37.pyc
--- a/mvp/dhb.py
+++ b/mvp/dhb.py
--- a/mvp/lgbreport.py
+++ b/mvp/lgbreport.py
@@ -6,7 +6,6 @@ from data.analyis import datacal
 from models import xgboost
 from matplotlib import pyplot as plt
 from data.graph import drawplot
-import dhb
+from mvp import dhb
+from data.datasource import mysqldb,mongodb

-dhb = dhb()
-df_dhb = dhb.dhb_features_extract()