Commit 76a74874 authored by linfang.wang's avatar linfang.wang

加入MySQL 连接

parent 04719484
...@@ -56,10 +56,16 @@ def cal_feature_grid(df,feature,bin=10): ...@@ -56,10 +56,16 @@ def cal_feature_grid(df,feature,bin=10):
feature_grid = sorted(set(tmp[tmp[feature] >= 0][feature].quantile(bin_index)) | set([-99999,-0.00001])) feature_grid = sorted(set(tmp[tmp[feature] >= 0][feature].quantile(bin_index)) | set([-99999,-0.00001]))
return feature_grid return feature_grid
def cal_accume(df,feature,target,bin=10):
df_out=cal_univar(df,feature,target,bin)
df_out['acmCnt']=df_out['count'].cumsum()
df_out['acmEvent']=df_out['sum'].cumsum()
df_out['acmEventRate']=df_out['acmEvent']/df_out['acmCnt']
def cal_univar(df,feature,target,bin=10,classes=[]): def cal_univar(df,feature,target,bin=10,classes=[]):
''' '''
groupby(classes) 分组,对feature 进行bin 分位,对各个分位进行 count,mean 计算 groupby(classes) 分组,对feature 进行bin 分位,对各个分位进行 count,mean 计算,累计count,mean
:param df: dataframe :param df: dataframe
:param feature: feature in df.columns :param feature: feature in df.columns
:param target: in df.columns eg: count(target) mean(target) :param target: in df.columns eg: count(target) mean(target)
...@@ -88,12 +94,12 @@ def cal_univar(df,feature,target,bin=10,classes=[]): ...@@ -88,12 +94,12 @@ def cal_univar(df,feature,target,bin=10,classes=[]):
tmp['grid']=tmp[feature] tmp['grid']=tmp[feature]
if len(classes) > 0: if len(classes) > 0:
df_gp = tmp.groupby(classes+['grid','lbl']).agg({target: ['count', 'mean']}).reset_index() df_gp = tmp.groupby(classes+['grid','lbl']).agg({target: ['count', 'mean','sum']}).reset_index()
df_gp.columns = classes+['grid','lbl', 'count', 'mean'] df_gp.columns = classes+['grid','lbl', 'count', 'mean','sum']
df_out=df_gp df_out=df_gp
else: else:
df_all = tmp.groupby(['grid','lbl']).agg({target: ['count', 'mean']}).reset_index() df_all = tmp.groupby(['grid','lbl']).agg({target: ['count', 'mean','sum']}).reset_index()
df_all.columns = ['grid','lbl', 'count', 'mean'] df_all.columns = ['grid','lbl', 'count', 'mean','sum']
df_out = df_all df_out = df_all
return df_out return df_out
......
import os
from docx import Document
from docx.shared import Inches
def buildDocument(path,filename):
if str.rfind(filename,0,3)!='doc':
if str.rfind(filename,0,4) !='docx':
raise ValueError('{} is not a word file'.format(filename))
if os.path.exists(os.path.join(path,filename)):
return Document(os.path.join(path,filename))
return Document()
def saveDocument(document,path,filename):
if str.rfind(filename,0,3)!='doc':
if str.rfind(filename,0,4) !='docx':
raise ValueError('{} is not a word file'.format(filename))
return document.save(os.path.join(path,filename))
def insert_table(document, cols, values):
# cols 为列名
# values 为值,list
table = document.add_table(rows=1, cols=len(cols),style='Medium Grid 1 Accent 1')
hdr_cells = table.rows[0].cells
for i in range(len(cols)):
hdr_cells[i].text = cols[i]
for value in values:
row_cells = table.add_row().cells
for i in range(len(cols)):
row_cells[i].text = str(value[i])
return document
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment