Commit a132f117 authored by 王家华's avatar 王家华

update report nane

parent f1f45079
......@@ -221,4 +221,5 @@ def cal_miss(df,feature,classes=[]):
df_out=tmp.groupby('flag')[feature].count().reset_index().rename(columns={feature:'cnt1'})
df_out['cnt']=tmp.shape[0]
df_out['match_rate']=np.round(df_out['cnt1']/df_out['cnt'],3)
return df_out[headers]
\ No newline at end of file
return df_out[headers]
import pymongo
import pandas as pd
import numpy as np
limit = "{'wf_created_at': {'$gte': '@start_date', '$lt': '@end_date'}}"
query = "{'order_id':1,'@key':1}"
'''
instructions : query from mongoDB which should assign a defined list
Params :
limit - limit dict
query - qurey dict which contains keys that should be query
'''
def querymongo(start_time_period, end_time_period, limit, query):
myclient = pymongo.MongoClient("mongodb://rc_dp_feature_user:qgrcdpfeature_2019@172.20.1.150:20000/?authSource=rc_dp_feature_pro")
mydb = myclient["rc_dp_feature_pro"]
mycol = mydb["rc_feature_analysis_timing_v2"]
# all data
#x = mycol.find()
# approval data
#x = mycol.find({"wf_audit_result":"1"})
# gt greater than, lt less than. e = equals
x = mycol.find(eval(limit),eval(query))
myclient.close()
return pd.DataFrame(list(x))
"""
Created on Thu Apr 18 11:32:06 2019
@author: wangjiahua
"""
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['savefig.dpi'] = 226 #图片像素
plt.rcParams['figure.dpi'] = 200 #分辨率
def plot_curve_singleCurve(dataset, x_label = None, y_label = None,table_tab = None,
save_path = None, figure_arrangement = 11, fig_size = (4,3),
fig_title='General Plot', fig_name = 'untitled',
fig_path = None):
col = dataset.columns
index = pd.Series(dataset.index.sort_values()).astype(str)
plt.figure(figsize=fig_size)
metric = figure_arrangement // 10 * figure_arrangement % 10
for i in range(int(np.ceil(len(col) // metric))):
cols = col[i * metric:]
for fig_ith in range(len(cols)):
axs = plt.subplot(figure_arrangement * 10 + 1 + fig_ith)
axs.plot(index,dataset.loc[cols[fig_ith]])
axs.set_title(cols[fig_ith],fontsize = 7)
plt.xticks(fontsize = 5)
plt.yticks(fontsize = 5)
plt.grid()
if x_label != None:
axs.set_xlabel(x_label, fontsize = 5)
if y_label != None:
axs.set_ylabel(y_label, fontsize = 5)
plt.tight_layout()
plt.show()
return 1
#fig,axs = plt.subplots(1,1,figsize=(16,9),linewidth=0.1)
#
#for fig_ith in range(len(df.columns)):
# axs = plt.subplot(figure_arrangement * 10 + 1 + fig_ith)
# axs.plot(df.index,df.iloc[fig_ith])
# axs.set_title(col[])
#plt.tight_layout()
def plot_curve_multiCurve(dataset, x_label = None, y_label = None,table_tab = None,
save_path = None, figure_arrangement = 11, fig_size = (4,3),
fig_title='General Plot', fig_name = 'untitled',
fig_path = None):
col = dataset.columns
index = pd.Series(dataset.index.sort_values()).astype(str)
plt.figure(figsize=fig_size)
#metric = figure_arrangement // 10 * figure_arrangement % 10
#cols = col[i * metric:]
axs = plt.subplot(111)
for fig_ith in range(len(col)):
axs.plot(index,dataset.loc[col[fig_ith]],label=col[fig_ith])
axs.set_title(col[fig_ith],fontsize = 7)
plt.xticks(fontsize = 5)
plt.yticks(fontsize = 5)
plt.grid()
if x_label != None:
axs.set_xlabel(x_label, fontsize = 5)
if y_label != None:
axs.set_ylabel(y_label, fontsize = 5)
plt.legend()
plt.tight_layout()
plt.show()
return 1
'''
'''
def plot_curve_mingle():
return 1
def density_chart(dataset,title):
for col in dataset.columns:
sns.kdeplot(dataset.loc[:,col],label = col)
plt.title(title)
plt.show()
def learning_curve():
def pdp_chart():
return 1
def uniVarChart():
return 1
#
# alpha = 0.98 / 4 * fig_ith + 0.01
# ax.set_title('%.3f' % alpha)
# t1 = np.arange(0.0, 1.0, 0.01)
#
#
# for n in [1, 2, 3, 4]:
# plt.plot(t1, t1 ** n, label="n=%d" % n)
# leg = plt.legend(loc='best', ncol=4, mode="expand", shadow=True)
# leg.get_frame().set_alpha(alpha)
#
#
# # if this fig should be saved
# if fig_path != None:
# plt.savefig(fig_path + fig_name +'.png')
#
#
#
## for i in range(figure_arrangement%10):
## plt.subplots(,figsize=fig_size,linewidth=0.1)
#
# return 1
\ No newline at end of file
import lightgbm as lgb
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix, mean_squared_error
import numpy
import pandas
params = {
'task': 'train', #用途
'application':'binary', #用于二分类
'boosting_type': 'gbdt', # 设置提升类型
'num_boost_round':100, #迭代次数
'learning_rate': 0.01, # 学习速率
'metric': {'logloss', 'auc'}, # 评估函数
'early_stopping_rounds':None,
# 'objective': 'regression', # 目标函数
'max_depth':4,
'num_leaves': 20, # 叶子节点数
'feature_fraction': 0.9, # 建树的特征选择比例
'bagging_fraction': 0.8, # 建树的样本采样比例
'bagging_freq': 5, # k 意味着每 k 次迭代执行bagging
'verbose': 1 # <0 显示致命的, =0 显示错误 (警告), >0 显示信息
}
'''
instructions : training lightgbm model with specified params
Parameters :
dataset -
features - feature list of dataset
target - tagert column or label list of samples
'''
def lgb_train(params,training_set,features,target):
lgb_train = lgb.Dataset(training_set[features],training_set[target])
This diff is collapsed.
import pandas as pd
import numpy as np
import datetime
from data.analyis import filetool
from data.analyis import datacal
from models import xgboost
from matplotlib import pyplot as plt
from data.graph import drawplot
import dhb
dhb = dhb()
df_dhb = dhb.dhb_features_extract()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment