Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
M
model_mvp
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
decision-science
model_mvp
Commits
b58ac63f
Commit
b58ac63f
authored
Apr 22, 2019
by
linfang.wang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
优化plot
parent
f2cef298
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
32 additions
and
40 deletions
+32
-40
drawplot.py
data/graph/drawplot.py
+17
-25
xgbreport.py
mvp/xgbreport.py
+15
-15
No files found.
data/graph/drawplot.py
View file @
b58ac63f
...
@@ -27,21 +27,30 @@ def liftchart(df,x,y,classes='',bin=10,title='',xlabel='',ylabel=''):
...
@@ -27,21 +27,30 @@ def liftchart(df,x,y,classes='',bin=10,title='',xlabel='',ylabel=''):
'''
'''
# #== 单个TODO 待输出
# #== 单个TODO 待输出
# df_fig1=pd.pivot_table(df_out, index=classes, columns=['lbl', 'grid'],
plt
.
close
(
'all'
)
# values=['count'], aggfunc=['mean'])
plt
.
cla
()
if
classes
!=
''
:
if
classes
!=
''
:
df_out
=
datacal
.
cal_accume
(
df
,
x
,
y
,
bin
,
classes
=
[
classes
])
df_out
=
datacal
.
cal_accume
(
df
,
x
,
y
,
bin
,
classes
=
[
classes
])
#== 显示样本数量
df_fig
=
pd
.
pivot_table
(
df_out
,
index
=
classes
,
columns
=
[
'lbl'
,
'grid'
],
values
=
[
'count'
],
aggfunc
=
[
'mean'
])
df_fig
=
df_fig
[
'mean'
][
'count'
]
#== 行数
rows
=
df_fig
.
index
.
tolist
()
n_rows
=
len
(
rows
)
# 列数
cols
=
df_fig
.
columns
.
levels
[
0
]
.
categories
.
to_tuples
()
.
tolist
()
n_cols
=
len
(
cols
)
cell_text
=
df_fig
.
values
.
tolist
()
plt
.
subplot
(
2
,
1
,
1
)
plt
.
subplot
(
2
,
1
,
1
)
draw_lineplot
(
df_out
,
'grid'
,
'mean'
,
hue
=
classes
,
title
=
title
,
xlabel
=
xlabel
,
ylabel
=
ylabel
)
draw_lineplot
(
df_out
,
'grid'
,
'mean'
,
hue
=
classes
,
title
=
title
,
xlabel
=
xlabel
,
ylabel
=
ylabel
)
plt
.
subplot
(
2
,
1
,
2
)
plt
.
subplot
(
2
,
1
,
2
)
draw_lineplot
(
df_out
,
'grid'
,
'acmMean'
,
hue
=
classes
,
title
=
title
,
xlabel
=
xlabel
,
ylabel
=
ylabel
)
draw_lineplot
(
df_out
,
'grid'
,
'acmMean'
,
hue
=
classes
,
title
=
title
+
'累计'
,
xlabel
=
xlabel
,
ylabel
=
ylabel
)
else
:
else
:
df_out
=
datacal
.
cal_accume
(
df
,
x
,
y
,
bin
)
df_out
=
datacal
.
cal_accume
(
df
,
x
,
y
,
bin
)
plt
.
subplot
(
2
,
1
,
1
)
plt
.
subplot
(
2
,
1
,
1
)
draw_lineplot
(
df_out
,
'grid'
,
'mean'
,
title
=
title
,
xlabel
=
xlabel
,
ylabel
=
ylabel
)
draw_lineplot
(
df_out
,
'grid'
,
'mean'
,
title
=
title
,
xlabel
=
xlabel
,
ylabel
=
ylabel
)
plt
.
subplot
(
2
,
1
,
2
)
plt
.
subplot
(
2
,
1
,
2
)
draw_lineplot
(
df_out
,
'grid'
,
'acmMean'
,
title
=
title
,
xlabel
=
xlabel
,
ylabel
=
ylabel
)
draw_lineplot
(
df_out
,
'grid'
,
'acmMean'
,
title
=
title
+
'累计'
,
xlabel
=
xlabel
,
ylabel
=
ylabel
)
plt
.
tight_layout
()
plt
.
tight_layout
()
# plt.show()
# plt.show()
return
plt
return
plt
...
@@ -54,7 +63,7 @@ def univarchart(df,x,y,bin=10,classes='',title='',xlabel='',ylabel=''):
...
@@ -54,7 +63,7 @@ def univarchart(df,x,y,bin=10,classes='',title='',xlabel='',ylabel=''):
:param df:
:param df:
:return:
:return:
'''
'''
plt
.
cl
a
(
)
plt
.
cl
ose
(
'all'
)
plt
.
subplot
(
1
,
1
,
1
)
plt
.
subplot
(
1
,
1
,
1
)
if
classes
!=
''
:
if
classes
!=
''
:
df_out
=
datacal
.
cal_univar
(
df
,
x
,
y
,
bin
,
classes
=
[
classes
])
df_out
=
datacal
.
cal_univar
(
df
,
x
,
y
,
bin
,
classes
=
[
classes
])
...
@@ -71,7 +80,7 @@ def pdpchart(df,x,y,bin=10,classes='',title='',xlabel='模型分',ylabel='逾期
...
@@ -71,7 +80,7 @@ def pdpchart(df,x,y,bin=10,classes='',title='',xlabel='模型分',ylabel='逾期
:param df:
:param df:
:return:
:return:
'''
'''
plt
.
cl
a
(
)
plt
.
cl
ose
(
'all'
)
plt
.
subplot
(
1
,
1
,
1
)
plt
.
subplot
(
1
,
1
,
1
)
if
classes
!=
''
:
if
classes
!=
''
:
...
@@ -83,21 +92,6 @@ def pdpchart(df,x,y,bin=10,classes='',title='',xlabel='模型分',ylabel='逾期
...
@@ -83,21 +92,6 @@ def pdpchart(df,x,y,bin=10,classes='',title='',xlabel='模型分',ylabel='逾期
# plt.show()
# plt.show()
return
plt
return
plt
'''
双坐标轴
'''
def
draw_lineplot_doubleaxes
(
df
,
x
,
y1
,
y2
,
y1_hue
=
''
,
y2_hue
=
''
,
title
=
''
):
'''
:param df:
:param x:
:param y1:
:param y2:
:param y1_hue:y1 轴分类
:param y2_hue:y2 轴分类
:param title:
:return:
'''
def
draw_barplot
(
df
,
x
,
y
,
hue
=
''
,
title
=
''
):
def
draw_barplot
(
df
,
x
,
y
,
hue
=
''
,
title
=
''
):
'''
'''
...
@@ -111,9 +105,7 @@ def draw_barplot(df,x,y,hue='',title=''):
...
@@ -111,9 +105,7 @@ def draw_barplot(df,x,y,hue='',title=''):
pltz
=
PyplotZ
()
pltz
=
PyplotZ
()
pltz
.
enable_chinese
()
pltz
.
enable_chinese
()
fig
=
plt
.
figure
()
fig
=
plt
.
figure
()
# ax = fig.add_subplot(1, 1, 1)
plt
.
close
(
'all'
)
plt
.
cla
()
sns
.
set
(
style
=
"whitegrid"
)
sns
.
set
(
style
=
"whitegrid"
)
fig
=
plt
.
figure
(
figsize
=
(
6
,
4
))
fig
=
plt
.
figure
(
figsize
=
(
6
,
4
))
ax
=
fig
.
add_subplot
(
1
,
1
,
1
)
ax
=
fig
.
add_subplot
(
1
,
1
,
1
)
...
...
mvp/xgbreport.py
View file @
b58ac63f
...
@@ -26,10 +26,10 @@ def report(dftrain,dftest,features,label,path,filename):
...
@@ -26,10 +26,10 @@ def report(dftrain,dftest,features,label,path,filename):
document
.
add_paragraph
(
'模型训练集{}'
.
format
(
xgboost
.
auc
(
clf
,
dftrain
,
features
,
label
)))
document
.
add_paragraph
(
'模型训练集{}'
.
format
(
xgboost
.
auc
(
clf
,
dftrain
,
features
,
label
)))
document
.
add_paragraph
(
'模型测试集{}'
.
format
(
xgboost
.
auc
(
clf
,
dftest
,
features
,
label
)))
document
.
add_paragraph
(
'模型测试集{}'
.
format
(
xgboost
.
auc
(
clf
,
dftest
,
features
,
label
)))
document
.
add_heading
(
'调整参数'
)
#
document.add_heading('调整参数')
max_depth
=
[
2
,
3
]
#
max_depth=[2,3]
min_child_weight
=
range
(
1
,
4
,
1
)
#
min_child_weight=range(1,4,1)
document
,
clf
=
tun_params
(
document
,
clf
,
dftrain
,
dftest
,
{
'max_depth'
:
max_depth
,
'min_child_weight'
:
min_child_weight
},
features
,
label
)
#
document, clf = tun_params(document, clf, dftrain, dftest, {'max_depth': max_depth,'min_child_weight':min_child_weight}, features, label)
# # gamma
# # gamma
# gamma=[i/10 for i in range(0,5)]
# gamma=[i/10 for i in range(0,5)]
...
@@ -87,17 +87,17 @@ def report(dftrain,dftest,features,label,path,filename):
...
@@ -87,17 +87,17 @@ def report(dftrain,dftest,features,label,path,filename):
document
.
add_paragraph
(
'测试集分渠道--liftchart'
)
document
.
add_paragraph
(
'测试集分渠道--liftchart'
)
document
.
add_picture
(
'tmp.png'
)
document
.
add_picture
(
'tmp.png'
)
#== 各个特征的 单变量图 和 pdp 图
#
#
== 各个特征的 单变量图 和 pdp 图
for
i
in
featureimp
.
feature
.
tolist
():
#
for i in featureimp.feature.tolist():
drawplot
.
univarchart
(
dftest
,
i
,
label
,
bin
=
10
,
title
=
'单变量
%
s'
%
i
,
#
drawplot.univarchart(dftest, i, label, bin=10, title='单变量%s' % i,
ylabel
=
'逾期率'
)
.
savefig
(
'tmp.png'
)
#
ylabel='逾期率').savefig('tmp.png')
document
.
add_paragraph
(
'单变量
%
s'
%
i
)
#
document.add_paragraph('单变量%s' % i)
document
.
add_picture
(
'tmp.png'
)
#
document.add_picture('tmp.png')
#= pdp
#
#= pdp
drawplot
.
pdpchart
(
dftest
,
i
,
'predict_proba'
,
bin
=
10
,
title
=
'pdp
%
s'
%
i
,
#
drawplot.pdpchart(dftest, i, 'predict_proba', bin=10, title='pdp %s' % i,
ylabel
=
'模型分'
)
.
savefig
(
'tmp.png'
)
#
ylabel='模型分').savefig('tmp.png')
document
.
add_paragraph
(
'pdp
%
s'
%
i
)
#
document.add_paragraph('pdp %s' % i)
document
.
add_picture
(
'tmp.png'
)
#
document.add_picture('tmp.png')
filetool
.
saveDocument
(
document
,
path
,
filename
)
filetool
.
saveDocument
(
document
,
path
,
filename
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment