update report nane

a132f117 · 王家华 · f1f45079 · a132f117 · a132f117 · a132f117
Commit a132f117 authored Apr 22, 2019 by 王家华
7 changed files
--- a/data/analyis/datacal.py
+++ b/data/analyis/datacal.py
@@ -221,4 +221,5 @@ def cal_miss(df,feature,classes=[]):
        df_out=tmp.groupby('flag')[feature].count().reset_index().rename(columns={feature:'cnt1'})
        df_out['cnt']=tmp.shape[0]
        df_out['match_rate']=np.round(df_out['cnt1']/df_out['cnt'],3)
-    return df_out[headers]
\ No newline at end of file
+    return df_out[headers]
+
--- a/data/datasource/mongodb.py
+++ b/data/datasource/mongodb.py
+import pymongo
+import pandas as pd
+import numpy as np
+
+
+limit = "{'wf_created_at': {'$gte': '@start_date', '$lt': '@end_date'}}"
+query = "{'order_id':1,'@key':1}"
+
+'''
+instructions : query from mongoDB which should assign a defined list
+
+Params : 
+    limit - limit dict
+    query - qurey dict which contains keys that should be query
+'''
+def querymongo(start_time_period, end_time_period, limit, query):    
+    myclient = pymongo.MongoClient("mongodb://rc_dp_feature_user:qgrcdpfeature_2019@172.20.1.150:20000/?authSource=rc_dp_feature_pro")
+    mydb = myclient["rc_dp_feature_pro"]
+    mycol = mydb["rc_feature_analysis_timing_v2"]
+    # all data 
+    #x = mycol.find()
+
+    # approval data
+    #x = mycol.find({"wf_audit_result":"1"})
+
+    # gt greater than, lt less than. e = equals
+    x = mycol.find(eval(limit),eval(query)) 
+    myclient.close()
+    return pd.DataFrame(list(x))
--- a/data/graph/matplot.py
+++ b/data/graph/matplot.py
+"""
+Created on Thu Apr 18 11:32:06 2019
+
+@author: wangjiahua
+"""
+
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns 
+
+
+plt.rcParams['font.sans-serif'] = ['SimHei']
+plt.rcParams['axes.unicode_minus'] = False
+plt.rcParams['savefig.dpi'] = 226 #图片像素 
+plt.rcParams['figure.dpi'] = 200 #分辨率
+
+
+
+def plot_curve_singleCurve(dataset, x_label = None, y_label = None,table_tab = None,
+                           save_path = None, figure_arrangement = 11, fig_size = (4,3),
+                           fig_title='General Plot', fig_name = 'untitled',
+                           fig_path = None):
+
+    
+    col = dataset.columns
+    index = pd.Series(dataset.index.sort_values()).astype(str)
+    plt.figure(figsize=fig_size)
+    metric = figure_arrangement // 10 * figure_arrangement % 10
+    
+    for i in range(int(np.ceil(len(col) // metric))):
+        
+        cols = col[i * metric:]
+        for fig_ith in range(len(cols)):
+            axs = plt.subplot(figure_arrangement * 10 + 1 + fig_ith)
+            axs.plot(index,dataset.loc[cols[fig_ith]])
+            axs.set_title(cols[fig_ith],fontsize = 7)
+            plt.xticks(fontsize = 5)
+            plt.yticks(fontsize = 5)
+            plt.grid()
+
+            if x_label != None:
+                axs.set_xlabel(x_label, fontsize = 5)
+                if y_label != None:        
+                    axs.set_ylabel(y_label, fontsize = 5)
+        plt.tight_layout()
+        plt.show()
+    return 1
+    
+
+
+
+#fig,axs = plt.subplots(1,1,figsize=(16,9),linewidth=0.1)
+
+
+
+#
+#for fig_ith in range(len(df.columns)):
+#    axs = plt.subplot(figure_arrangement * 10 + 1 + fig_ith)
+#    axs.plot(df.index,df.iloc[fig_ith])
+#    axs.set_title(col[])
+#plt.tight_layout()
+
+def plot_curve_multiCurve(dataset, x_label = None, y_label = None,table_tab = None,
+                           save_path = None, figure_arrangement = 11, fig_size = (4,3),
+                           fig_title='General Plot', fig_name = 'untitled',
+                           fig_path = None):
+
+    col = dataset.columns
+    index = pd.Series(dataset.index.sort_values()).astype(str)
+    plt.figure(figsize=fig_size)
+    #metric = figure_arrangement // 10 * figure_arrangement % 10
+       
+        #cols = col[i * metric:]
+    axs = plt.subplot(111)
+    for fig_ith in range(len(col)):            
+        axs.plot(index,dataset.loc[col[fig_ith]],label=col[fig_ith])
+    axs.set_title(col[fig_ith],fontsize = 7)
+    plt.xticks(fontsize = 5)
+    plt.yticks(fontsize = 5)
+    plt.grid()
+
+    if x_label != None:
+        axs.set_xlabel(x_label, fontsize = 5)
+    if y_label != None:        
+        axs.set_ylabel(y_label, fontsize = 5)
+    plt.legend()
+    plt.tight_layout()
+    plt.show()
+    return 1
+    
+'''
+
+'''
+def plot_curve_mingle():
+    return 1
+    
+    
+def density_chart(dataset,title):
+    for col in dataset.columns:
+        sns.kdeplot(dataset.loc[:,col],label = col)
+    plt.title(title)
+    plt.show()
+
+def learning_curve():
+
+        
+    
+def pdp_chart():
+    
+    return 1
+
+
+def uniVarChart():
+    return 1       
+        
+        
+        
+#        
+#	    alpha = 0.98 / 4 * fig_ith + 0.01
+#	    ax.set_title('%.3f' % alpha)
+#	    t1 = np.arange(0.0, 1.0, 0.01)
+#
+#
+#	    for n in [1, 2, 3, 4]:
+#	        plt.plot(t1, t1 ** n, label="n=%d" % n)
+#	    leg = plt.legend(loc='best', ncol=4, mode="expand", shadow=True)
+#	    leg.get_frame().set_alpha(alpha)
+#
+#
+#	# if this fig should be saved
+#	if fig_path != None:
+#		plt.savefig(fig_path + fig_name +'.png')
+#	
+#
+#
+##	for i in range(figure_arrangement%10):
+##		plt.subplots(,figsize=fig_size,linewidth=0.1)
+#
+#	return 1
\ No newline at end of file
--- a/models/lightgbm.py
+++ b/models/lightgbm.py
+import lightgbm as lgb
+from sklearn.metrics import roc_auc_score
+from sklearn.metrics import confusion_matrix, mean_squared_error
+import numpy 
+import pandas
+
+params = {
+    'task': 'train',   #用途
+    'application':'binary',   #用于二分类
+    'boosting_type': 'gbdt',  # 设置提升类型
+    'num_boost_round':100,   #迭代次数
+    'learning_rate': 0.01,  # 学习速率
+    'metric': {'logloss', 'auc'},  # 评估函数
+    'early_stopping_rounds':None,
+#         'objective': 'regression', # 目标函数
+    'max_depth':4,
+    'num_leaves': 20,   # 叶子节点数   
+    'feature_fraction': 0.9, # 建树的特征选择比例
+    'bagging_fraction': 0.8, # 建树的样本采样比例
+    'bagging_freq': 5,  # k 意味着每 k 次迭代执行bagging
+    
+    'verbose': 1 # <0 显示致命的, =0 显示错误 (警告), >0 显示信息
+    }
+'''
+
+instructions : training lightgbm model with specified params
+
+Parameters : 
+    dataset -
+    features - feature list of dataset
+    target - tagert column or label list of samples
+
+'''
+def lgb_train(params,training_set,features,target):
+    lgb_train = lgb.Dataset(training_set[features],training_set[target])
+    
+
--- a/mvp/report.py
+++ b/mvp/report.py
--- a/mvp/dhb.py
+++ b/mvp/dhb.py
--- a/mvp/lgbreport.py
+++ b/mvp/lgbreport.py
+import pandas as pd
+import numpy as np
+import datetime
+from data.analyis import filetool
+from data.analyis import datacal
+from models import xgboost
+from matplotlib import pyplot as plt
+from data.graph import drawplot
+import dhb
+
+dhb = dhb()
+df_dhb = dhb.dhb_features_extract()