Commit e511a80c authored by 王家华's avatar 王家华

修改了model refit和画图的部分代码

parent 38d4951f
from pyplotz.pyplotz import PyplotZ from pyplotz.pyplotz import PyplotZ
from pyplotz.pyplotz import plt from pyplotz.pyplotz import plt
from data.analyis import datacal from tools import datacal
import seaborn as sns import seaborn as sns
import pandas as pd import pandas as pd
......
""" """
Created on Thu Apr 18 11:32:06 2019 Created on Thu Apr 18 11:32:06 2019
@author: wangjiahua @author: Jason Wang
""" """
...@@ -10,54 +10,83 @@ import numpy as np ...@@ -10,54 +10,83 @@ import numpy as np
import pandas as pd import pandas as pd
import seaborn as sns import seaborn as sns
############# plot config ###############
plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['savefig.dpi'] = 226 #图片像素 plt.rcParams['savefig.dpi'] = 226 #图片像素
plt.rcParams['figure.dpi'] = 200 #分辨率 plt.rcParams['figure.dpi'] = 200 #分辨率
def plot_table(dataset, auc, title='untitled', X_label=None, y_label=None, plot_tab=True, legend_list=None, def topN_feature_importance(model, clf, title="untitled", save_path = './plots/', topN=20):
saved_path=None):
''' '''
instructions : visualization of pivot plot feature importance squence
params:
classifier
''' '''
plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['savefig.dpi'] = 226 # 图片像素 plt.rcParams['savefig.dpi'] = 226 # 图片像素
plt.rcParams['figure.dpi'] = 200 # 分辨率 plt.rcParams['figure.dpi'] = 200 # 分辨率
fig, axs = plt.subplots(1, 1, figsize=(16, 9), linewidth=0.1) plt.figure(figsize=(10, 6))
model.plot_importance(clf, max_num_features = topN)
plt.title("Feature Importances")
path = save_path + title + "featureImportance.png"
plt.savefig(path)
plt.show()
return path
def plot_table(dataset, auc, title='untitled', X_label=None, y_label=None,
tab_df=None, plot_tab=True, saved_path=None):
'''
instructions : visualization of pivot
Params :
dataset -
auc - auc list / array
title - title of plot('untitled' as default)
x_label - X axis label of plot
y_label - y axis label of plot
plot_tab - plot table or not , default as True
saved_path - saved path, set as None as there has no download needs
'''
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['savefig.dpi'] = 226 # 图片像素
plt.rcParams['figure.dpi'] = 100 # 分辨率
fig, axs = plt.subplots(1, 1, figsize=(6, 6), linewidth=0.1)
table_rows = dataset.columns table_rows = dataset.columns
table_cols = dataset.index table_cols = dataset.index
# traverse each columns of dataframe # traverse each columns of dataframe
for i in table_rows: for i in table_rows:
x = table_cols x = table_cols
y = dataset[i] y = dataset[i]
axs.plot(x, y, maker='o', label=str(i) + ' AUC: ' + auc[i]) axs.plot(x, y, label=str(i) + ' AUC: ' + str(auc[i]))
if plot_tab != False: # if table should be plot
the_table = plt.table(cellText=[list(dataset.iloc[i, :].values) for i in range(len(dataset.head()))], if plot_tab:
the_table = plt.table(cellText=[list(dataset.iloc[i, :].values) for i in range(len(dataset))],
rowLabels=table_rows, rowLabels=table_rows,
colLabels=table_cols, colLabels=table_cols,
colWidths=[0.91 / (len(table_cols) - 1)] * len(table_cols), colWidths=[0.91 / (len(table_cols) - 1)] * len(table_cols),
loc='bottom') loc='bottom')
plt.xticks([]) plt.xticks([])
# otherwise, nothing to do here
the_table.auto_set_font_size(False) the_table.auto_set_font_size(False)
the_table.set_fontsize(8) the_table.set_fontsize(6)
fig.subplots_adjust(bottom=0.2) fig.subplots_adjust(bottom=0.2)
plt.grid() plt.grid()
plt.ylabel(title) if y_label is not None:
plt.ylabel(y_label)
if X_label is not None:
plt.xlabel(X_label)
plt.legend() plt.legend()
# plt.vlines(xrange(len(cols))0],y,color='lightgrey',linestyle='--') # plt.vlines(xrange(len(cols))0],y,color='lightgrey',linestyle='--')
plt.title(title) plt.title(title)
plt.show() plt.show()
return 1 return 1
def plot_curve_singleCurve(dataset, x_label = None, y_label = None,table_tab = None, def plot_curve_singleCurve(dataset, x_label = None, y_label = None,table_tab = None,
save_path = None, figure_arrangement = 11, fig_size = (4,3), save_path = None, figure_arrangement = 11, fig_size = (4,3),
fig_title='General Plot', fig_name = 'untitled', fig_title='General Plot', fig_name = 'untitled',
...@@ -144,8 +173,6 @@ def density_chart(dataset,title): ...@@ -144,8 +173,6 @@ def density_chart(dataset,title):
plt.show() plt.show()
# #
# alpha = 0.98 / 4 * fig_ith + 0.01 # alpha = 0.98 / 4 * fig_ith + 0.01
# ax.set_title('%.3f' % alpha) # ax.set_title('%.3f' % alpha)
......
def topN_feature_importance(classifier, clf, topN=20, model=lgb):
'''
plot feature importance squence
'''
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['savefig.dpi'] = 226 # 图片像素
plt.rcParams['figure.dpi'] = 200 # 分辨率
plt.figure(figsize=(10, 6))
classifier.plot_importance(clf, max_num_features=topN)
plt.title("Feature Importances")
plt.show()
...@@ -6,7 +6,8 @@ import numpy as np ...@@ -6,7 +6,8 @@ import numpy as np
import pandas as pd import pandas as pd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import os,psutil import os,psutil
from tools import datacal
from graph import matplot
params_lgb = { params_lgb = {
'task': 'train', # 用途 'task': 'train', # 用途
'application': 'binary', # 用于二分类 'application': 'binary', # 用于二分类
...@@ -43,7 +44,8 @@ def returnAUC(clf, training_set, validation_set, features, target='target'): ...@@ -43,7 +44,8 @@ def returnAUC(clf, training_set, validation_set, features, target='target'):
return train_auc, val_auc return train_auc, val_auc
def train_lgbm(params, df_train, df_val, features, adds_on=None, target='target'): def train_lgbm(params, df_train, df_val, features, adds_on=None, target='target',
featureImportance_path = '../mvp/plots/', topN_featureImportance=20, featureImportance_title='lightgbm'):
''' '''
instructions : training lightgbm model with specified params instructions : training lightgbm model with specified params
...@@ -68,6 +70,8 @@ def train_lgbm(params, df_train, df_val, features, adds_on=None, target='target' ...@@ -68,6 +70,8 @@ def train_lgbm(params, df_train, df_val, features, adds_on=None, target='target'
lgbm = lgb.train(params, lgb_train, valid_sets=lgb_val, verbose_eval=False) lgbm = lgb.train(params, lgb_train, valid_sets=lgb_val, verbose_eval=False)
train_auc, val_auc = returnAUC(lgbm, df_train, df_val, features) train_auc, val_auc = returnAUC(lgbm, df_train, df_val, features)
matplot.topN_feature_importance(lgb, lgbm, title=featureImportance_title,
save_path = featureImportance_path, topN=topN_featureImportance)
# auc = roc_auc_score(dev['target'],gbm.predict(dev[features])) # auc = roc_auc_score(dev['target'],gbm.predict(dev[features]))
return train_auc, val_auc, lgbm return train_auc, val_auc, lgbm
...@@ -117,9 +121,8 @@ def lgb_params_tuning(params, features, train, val, target='target', topN=3, cv_ ...@@ -117,9 +121,8 @@ def lgb_params_tuning(params, features, train, val, target='target', topN=3, cv_
# replace the worst parameter with a greater combination # replace the worst parameter with a greater combination
para['max_depth'] = deepth para['max_depth'] = deepth
para['num_leaves'] = leaves para['num_leaves'] = leaves
optimal_para[topn.argmin()] = para optimal_para[topn.argmin()] = para
return optimal_para, lgb_train, lgb_val, topn return optimal_para, topn
# training_curve.append(train_auc) # training_curve.append(train_auc)
...@@ -163,18 +166,11 @@ def lgb_params_tuning(params, features, train, val, target='target', topN=3, cv_ ...@@ -163,18 +166,11 @@ def lgb_params_tuning(params, features, train, val, target='target', topN=3, cv_
# return 1 # return 1
def topN_feature_importance(classifier, clf, topN=20, model=lgb): def predict(lgbm,df_test,features,target='target'):
''' predictions = lgbm.predict(df_test[features])
plot feature importance squence auc = roc_auc_score(predictions,df_test[target])
''' return predictions, auc
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['savefig.dpi'] = 226 # 图片像素
plt.rcParams['figure.dpi'] = 200 # 分辨率
plt.figure(figsize=(10, 6))
classifier.plot_importance(clf, max_num_features=topN)
plt.title("Featurer Importances")
plt.show()
def buildClf(params=params_lgb): def buildClf(params=params_lgb):
...@@ -183,7 +179,7 @@ def buildClf(params=params_lgb): ...@@ -183,7 +179,7 @@ def buildClf(params=params_lgb):
Params : Params :
''' '''
return lgbm.LGBMClassifier(params) return lgb.LGBMClassifier(params)
def automodelfit(clf, param_grid, dftrain, features, resp, kfold=10, scoring='roc_auc'): def automodelfit(clf, param_grid, dftrain, features, resp, kfold=10, scoring='roc_auc'):
......
...@@ -210,8 +210,8 @@ class dhb: ...@@ -210,8 +210,8 @@ class dhb:
and datediff(now(),deadline) > ''' + str(passdue_day) + ''' and datediff(now(),deadline) > ''' + str(passdue_day) + '''
''' '''
def dhb_features_extract(self): def dhb_features_extract(self,df):
try:
value_map = { value_map = {
"近3天": 1, "近3天": 1,
"近4-5天": 2, "近4-5天": 2,
...@@ -270,24 +270,13 @@ class dhb: ...@@ -270,24 +270,13 @@ class dhb:
dhb_loan.to_csv("./dhb_loan_sample——" + str(datetime.date.today()) + ".csv") dhb_loan.to_csv("./dhb_loan_sample——" + str(datetime.date.today()) + ".csv")
print(time.strftime('%Y.%m.%d %H:%M:%S', time.localtime( print(time.strftime('%Y.%m.%d %H:%M:%S', time.localtime(
time.time())) + "提取了dhb " + self.start_time_period + "to" + self.end_time_period + "时段样本") time.time())) + "提取了dhb " + self.start_time_period + "to" + self.end_time_period + "时段样本")
# ignore exceptions such as "colmns doesn't exist"
except Exception as e:
print("data preprocessing ERR ",e)
pass
return dhb_loan return dhb_loan
'''
instructions : build a comparasion
Params :
df - test dataset which was given
score - score column
target - label
start_time_period -
end_time_period -
applied_tpye -
applied_from -
Returns :
auc comparasion
liftchart plot
'''
def dhb_predict_with_pkl(self,test,pkl='./dhb_cuishou_jianzhi_v3.pkl',features=features): def dhb_predict_with_pkl(self,test,pkl='./dhb_cuishou_jianzhi_v3.pkl',features=features):
open_file = open(pkl, "rb") open_file = open(pkl, "rb")
...@@ -327,7 +316,7 @@ class dhb: ...@@ -327,7 +316,7 @@ class dhb:
def dhb_comparasion(df, score_BM='model_exec_data_source#dhb', score_predict='predict', target='target',applied_type=None, applied_from=None): def dhb_comparasion(df, score_BM='model_exec_data_source#dhb', score_predict='predict', target='target',applied_type=None, applied_from=None):
''' '''
instructions : comparasion of previous dhb liftchart & auc instructions : obtain online dhb score from mongodb
''' '''
# spliting data with appliedType & applied_channel # spliting data with appliedType & applied_channel
df = df[df.applied_type == applied_type] df = df[df.applied_type == applied_type]
...@@ -337,15 +326,6 @@ class dhb: ...@@ -337,15 +326,6 @@ class dhb:
df['bins_BM'] = df.qcut(df[score_BM], q=10, percision=6, dupulicates='drop') df['bins_BM'] = df.qcut(df[score_BM], q=10, percision=6, dupulicates='drop')
## bins of predictions ## bins of predictions
df['bins_predict'] = df.qcut(df[score_predict], q=10, percision=6, dupulicates='drop') df['bins_predict'] = df.qcut(df[score_predict], q=10, percision=6, dupulicates='drop')
pivot_BM = df[['bins_BM', target]].groupby('bins_BM')
pivot_predict = df[['bins_predict', target]].groupby('bins_predict')
# output liftchart & AUC
pivot_BM = pivot_BM.sum() / pivot_BM.count()
pivot_predict = pivot_predict.sum() / pivot_predict.count()
# concate two pivot
pivot = pd.concat([pivot_BM, pivot_predict],axis = 1)
# pivottable plot
pivot.plot()
return 1 return 1
......
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import datetime import datetime
from mvp import xgbreport from tools import datacal
from mvp import lgbreport
from data.analyis import datacal
from models import xgboost from mvp import refit
from models import lightgbm from mvp import rebuild
from models_obj import dhb_obj
###### global variable ######
# label
target = 'target'
#############################
from mvp import dhb
# from mvp import dhb
from data.samples import dhb,sample
dhb = dhb.dhb()
dhb = dhb_obj.dhb()
df_sample = dhb.dhb_features_extract() df_sample = dhb.dhb_features_extract()
target = 'target'
features = dhb.features features = dhb.features
df_sample[features] = df_sample[features].astype(float) df_sample[features] = df_sample[features].astype(float)
df_sample['target'] = df_sample['target'].astype(int) df_sample['target'] = df_sample['target'].astype(int)
...@@ -27,8 +35,6 @@ print('----no.',len(features),'of samples of dhb----') ...@@ -27,8 +35,6 @@ print('----no.',len(features),'of samples of dhb----')
# to save model performance # to save model performance
if __name__ == '__main__': if __name__ == '__main__':
# data extraction # data extraction
''' ## Old Edition here ''' ## Old Edition here
...@@ -40,10 +46,10 @@ if __name__ == '__main__': ...@@ -40,10 +46,10 @@ if __name__ == '__main__':
# else: # else:
# df_train,df_test = datacal.train_test_split_general(df_sample, val_size=None, test_size=0.25, stratify='target', random_state=7) # df_train,df_test = datacal.train_test_split_general(df_sample, val_size=None, test_size=0.25, stratify='target', random_state=7)
''' '''
df_train, df_val, df_test = train_test_split_general() # 默认取样本方法
df_train, df_val, df_test = datacal.train_test_split_general()
# data manipulation
## TODO
...@@ -69,7 +75,7 @@ if __name__ == '__main__': ...@@ -69,7 +75,7 @@ if __name__ == '__main__':
#lgbreport.report(df_train, df_test, df_val, features, target,'','dhb模型迭代报告.doc', kfold = 2) #lgbreport.report(df_train, df_test, df_val, features, target,'','dhb模型迭代报告.doc', kfold = 2)
# merge as single dataframe full of models # merge as single dataframe full of models
pd.DataFrame(xgb_model) #pd.DataFrame(xgb_model)
...@@ -109,4 +115,63 @@ if __name__ == '__main__': ...@@ -109,4 +115,63 @@ if __name__ == '__main__':
# test_min_date=dftest.applied_at.min(),test_max_date=dftest.applied_at.max(),test_cnt=dftest.shape[0]) # test_min_date=dftest.applied_at.min(),test_max_date=dftest.applied_at.max(),test_cnt=dftest.shape[0])
#== xgboost gbtree #== xgboost gbtree
xgbreport.report(dftrain,dftest,dhb.get_feature(),'label','','xgboost_%s.doc' % datetime.datetime.now().date().strftime('%y%m%d'),kfold=2) xgbreport.report(dftrain,dftest,dhb.get_feature(),'label','','xgboost_%s.doc' % datetime.datetime.now().date().strftime('%y%m%d'),kfold=2)
#################################################### report settings #############################################################################
applied_from = {'1,214,217,198': '内部', '333': '融360', '159537': '360金融'}
applied_type = {'1,2':'首贷','1,2,3':'首付贷','1':'首申','2':'复申','3':'复贷'}
# refit / rebuild sequence
# 生成电话帮对象(使用默认参数)
dhb = dhb_obj.dhb(features=None, sql=None, start_time_period=None, end_time_period=None,passdue_day=15)
# 提取样本
df_sample = dhb.dhb_features_extract()
# 备份df_sample
df_sample.to_csv(str(datetime.date.today())+"dhb_samples.xlsx")
# 电话帮数据处理
# report sequence
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import datetime import datetime
from data.analyis import filetool from tools import filetool
from data.analyis import datacal from tools import datacal
from models import lightgbm from models_kit import lightgbm
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
from data.graph import matplot from graph import matplot
from models_obj import dhb_obj
from tools import datacal
import datetime
from models_kit import lightgbm
from models_kit import xgboost
import lightgbm as lgb
from graph import matplot
from tools import filetool
dhb = dhb_obj.dhb(features=None, sql=None, start_time_period=None, end_time_period=None,passdue_day=15)
# 提取样本
#df_sample = dhb.dhb_features_extract()
######### temp #############
import pandas as pd
df_sample = pd.read_csv('E:\\model\\model_mvp\\mvp\\dhb_loan_sample——2019-04-23.csv',engine='python')
############################
# 备份df_sample
df_sample.to_csv(str(datetime.date.today())+"dhb_samples.xlsx")
# 默认样本划分
df_train, df_val, df_test = datacal.train_test_split_general(df_sample, val_size=0.2, test_size=0.2, stratify='target',
random_state=7,split_methods='random',
time_label='applied_at')
del df_sample
# 用交叉验证获取最优参optimal_para和对应参数在CV验证集上最优AUC列表topn
optimal_para,topn = lightgbm.lgb_params_tuning(lightgbm.params_lgb, dhb.features, df_train, df_val, target='target',
topN=3, cv_fold=5)
print('topn 通过train交叉验证得到的auc ',topn)
# 用新参数(optimal_para)训练模型,adds_on是需要修改的参数字典,输出feature Importance
train_auc, val_auc, lgbm = lightgbm.train_lgbm(lightgbm.params_lgb, df_train, df_val, dhb.features,
adds_on=optimal_para, target='target')
predictions ,test_auc = lightgbm.predict(lgbm,df_test,features=dhb.features)
df_test['predict'] = predictions
####### allocator cache ############
applied_from = {'1,214,217,198': '内部', '333': '融360', '159537': '360金融'}
applied_type = {'1,2':'首贷','1,2,3':'首付贷','1':'首申','2':'复申','3':'复贷'}
####################################
### report
# plot feature importance
path = matplot.topN_feature_importance(lgb, lgbm, title="untitled", save_path='./plots/', topN=20)
# report file
report_path = "E:\\bla\\"
report_name = "lgb_report.docx"
document = filetool.buildDocument(report_path, report_name)
document.add_heading('lightGBM 算法refit报告')
filetool.Document.add_paragraph('特征权重图')
filetool.add_picture(path)
filetool.Document.add_paragraph('univar_chart')
for i in dhb.features:
univar = datacal.cal_univar(df_train,score='raw_score')
univarChart = matplot.plot_table(univar,title= i +' univar Chart',saved_path='./plots/cache')
filetool.add_picture("./plots/cache" + i +' univar Chart')
for i in dhb.features:
pdp = datacal.cal_pdp(df_test,score='predict')
pdpChart = matplot.plot_table(pdp,title= i +' PDP Chart',saved_path='./plots/cache')
filetool.add_picture("./plots/cache" + i + ' PDP Chart')
for i in dhb.features:
lift = datacal.cal_liftchart(df_test,score='predict')
liftChart = matplot.plot_table(lift, title=i +' lift Chart',saved_path='./plots/cache')
filetool.add_picture("./plots/cache" + i + ' lift Chart')
filetool.saveDocument(document, report_path, report_name)
...@@ -4,6 +4,31 @@ import datetime ...@@ -4,6 +4,31 @@ import datetime
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
def liftchart(df,target='target',qcut=10,retbins=True):
'''
instructions : return liftchart dataframe with qcut & pivot 逾期率liftchart
Params :
df - dataframe(注意一定是是放款集!!)
target - label column
qcut - quantiles
retbins - return bins interval when 'retbins' is True, else False
:return:
liftchart dataframe
'''
df = df.copy()
# create a bins column
df['bins'] = pd.qcut(df, q=10, precision=6, retbins=False, duplicates='drop')
pivot = df[['bins','target']].groupby('bins').agg(['mean','count'])
return pivot
def train_test_split_general(dataset, val_size=0.2, test_size=0.2, stratify='target', random_state=7, def train_test_split_general(dataset, val_size=0.2, test_size=0.2, stratify='target', random_state=7,
split_methods='random', time_label='applied_at'): split_methods='random', time_label='applied_at'):
''' '''
...@@ -92,6 +117,20 @@ def cal_month(df,date_name,date_name_new): ...@@ -92,6 +117,20 @@ def cal_month(df,date_name,date_name_new):
return df return df
def cal_feature_grid(df,feature,bin=10,method=2): def cal_feature_grid(df,feature,bin=10,method=2):
''' '''
定义 N分位切割区间,负数单独一个区间,非负数N 切割 定义 N分位切割区间,负数单独一个区间,非负数N 切割
...@@ -156,7 +195,7 @@ def cal_univar(df,feature,target,bin=10,classes=[]): ...@@ -156,7 +195,7 @@ def cal_univar(df,feature,target,bin=10,classes=[]):
:return: :return:
''' '''
if df.shape[0]==0: if df.shape[0]==0:
raise('no date') raise('no data')
columns=df.columns.tolist() columns=df.columns.tolist()
if target not in columns: if target not in columns:
raise('not found %s' % target) raise('not found %s' % target)
...@@ -167,9 +206,9 @@ def cal_univar(df,feature,target,bin=10,classes=[]): ...@@ -167,9 +206,9 @@ def cal_univar(df,feature,target,bin=10,classes=[]):
tmp[feature].fillna(-1, inplace=True) tmp[feature].fillna(-1, inplace=True)
# == bin 划分,feature 有可能 非数字 # == bin 划分,feature 有可能 非数字
try: try:
tmp[feature]=tmp[feature].astype(float) tmp[feature] = tmp[feature].astype(float)
feature_grid = cal_feature_grid(tmp,feature,bin) feature_grid = cal_feature_grid(tmp, feature, bin)
tmp['lbl'] = pd.cut(tmp[feature], feature_grid, include_lowest=True) tmp['lbl'] = pd.cut(tmp[feature], feature_grid, include_lowest = True)
tmp['grid'] = tmp['lbl'].cat.codes tmp['grid'] = tmp['lbl'].cat.codes
except ValueError: except ValueError:
tmp['lbl']=tmp[feature] tmp['lbl']=tmp[feature]
...@@ -181,7 +220,7 @@ def cal_univar(df,feature,target,bin=10,classes=[]): ...@@ -181,7 +220,7 @@ def cal_univar(df,feature,target,bin=10,classes=[]):
df_out=df_gp df_out=df_gp
else: else:
df_all = tmp.groupby(['grid','lbl']).agg({target: ['count', 'mean','sum']}).reset_index() df_all = tmp.groupby(['grid','lbl']).agg({target: ['count', 'mean','sum']}).reset_index()
df_all.columns = ['grid','lbl', 'count', 'mean','sum'] df_all.columns = ['grid', 'lbl', 'count', 'mean', 'sum']
df_out = df_all df_out = df_all
return df_out return df_out
......
...@@ -3,6 +3,12 @@ from docx import Document ...@@ -3,6 +3,12 @@ from docx import Document
from docx.shared import Inches from docx.shared import Inches
def buildDocument(path,filename): def buildDocument(path,filename):
'''
instrucions : build a document writer
:param path:
:param filename:
:return:
'''
if filename[-3:]!='doc': if filename[-3:]!='doc':
if filename[-4:] !='docx': if filename[-4:] !='docx':
raise ValueError('{} is not a word file'.format(filename)) raise ValueError('{} is not a word file'.format(filename))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment