Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
M
model_mvp
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
decision-science
model_mvp
Commits
76a74874
Commit
76a74874
authored
Apr 17, 2019
by
linfang.wang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
加入MySQL 连接
parent
04719484
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
40 additions
and
5 deletions
+40
-5
datacal.py
data/analyis/datacal.py
+11
-5
filetool.py
data/analyis/filetool.py
+29
-0
No files found.
data/analyis/datacal.py
View file @
76a74874
...
@@ -56,10 +56,16 @@ def cal_feature_grid(df,feature,bin=10):
...
@@ -56,10 +56,16 @@ def cal_feature_grid(df,feature,bin=10):
feature_grid
=
sorted
(
set
(
tmp
[
tmp
[
feature
]
>=
0
][
feature
]
.
quantile
(
bin_index
))
|
set
([
-
99999
,
-
0.00001
]))
feature_grid
=
sorted
(
set
(
tmp
[
tmp
[
feature
]
>=
0
][
feature
]
.
quantile
(
bin_index
))
|
set
([
-
99999
,
-
0.00001
]))
return
feature_grid
return
feature_grid
def
cal_accume
(
df
,
feature
,
target
,
bin
=
10
):
df_out
=
cal_univar
(
df
,
feature
,
target
,
bin
)
df_out
[
'acmCnt'
]
=
df_out
[
'count'
]
.
cumsum
()
df_out
[
'acmEvent'
]
=
df_out
[
'sum'
]
.
cumsum
()
df_out
[
'acmEventRate'
]
=
df_out
[
'acmEvent'
]
/
df_out
[
'acmCnt'
]
def
cal_univar
(
df
,
feature
,
target
,
bin
=
10
,
classes
=
[]):
def
cal_univar
(
df
,
feature
,
target
,
bin
=
10
,
classes
=
[]):
'''
'''
groupby(classes) 分组,对feature 进行bin 分位,对各个分位进行 count,mean 计算
groupby(classes) 分组,对feature 进行bin 分位,对各个分位进行 count,mean 计算
,累计count,mean
:param df: dataframe
:param df: dataframe
:param feature: feature in df.columns
:param feature: feature in df.columns
:param target: in df.columns eg: count(target) mean(target)
:param target: in df.columns eg: count(target) mean(target)
...
@@ -88,12 +94,12 @@ def cal_univar(df,feature,target,bin=10,classes=[]):
...
@@ -88,12 +94,12 @@ def cal_univar(df,feature,target,bin=10,classes=[]):
tmp
[
'grid'
]
=
tmp
[
feature
]
tmp
[
'grid'
]
=
tmp
[
feature
]
if
len
(
classes
)
>
0
:
if
len
(
classes
)
>
0
:
df_gp
=
tmp
.
groupby
(
classes
+
[
'grid'
,
'lbl'
])
.
agg
({
target
:
[
'count'
,
'mean'
]})
.
reset_index
()
df_gp
=
tmp
.
groupby
(
classes
+
[
'grid'
,
'lbl'
])
.
agg
({
target
:
[
'count'
,
'mean'
,
'sum'
]})
.
reset_index
()
df_gp
.
columns
=
classes
+
[
'grid'
,
'lbl'
,
'count'
,
'mean'
]
df_gp
.
columns
=
classes
+
[
'grid'
,
'lbl'
,
'count'
,
'mean'
,
'sum'
]
df_out
=
df_gp
df_out
=
df_gp
else
:
else
:
df_all
=
tmp
.
groupby
([
'grid'
,
'lbl'
])
.
agg
({
target
:
[
'count'
,
'mean'
]})
.
reset_index
()
df_all
=
tmp
.
groupby
([
'grid'
,
'lbl'
])
.
agg
({
target
:
[
'count'
,
'mean'
,
'sum'
]})
.
reset_index
()
df_all
.
columns
=
[
'grid'
,
'lbl'
,
'count'
,
'mean'
]
df_all
.
columns
=
[
'grid'
,
'lbl'
,
'count'
,
'mean'
,
'sum'
]
df_out
=
df_all
df_out
=
df_all
return
df_out
return
df_out
...
...
data/analyis/filetool.py
0 → 100644
View file @
76a74874
import
os
from
docx
import
Document
from
docx.shared
import
Inches
def
buildDocument
(
path
,
filename
):
if
str
.
rfind
(
filename
,
0
,
3
)
!=
'doc'
:
if
str
.
rfind
(
filename
,
0
,
4
)
!=
'docx'
:
raise
ValueError
(
'{} is not a word file'
.
format
(
filename
))
if
os
.
path
.
exists
(
os
.
path
.
join
(
path
,
filename
)):
return
Document
(
os
.
path
.
join
(
path
,
filename
))
return
Document
()
def
saveDocument
(
document
,
path
,
filename
):
if
str
.
rfind
(
filename
,
0
,
3
)
!=
'doc'
:
if
str
.
rfind
(
filename
,
0
,
4
)
!=
'docx'
:
raise
ValueError
(
'{} is not a word file'
.
format
(
filename
))
return
document
.
save
(
os
.
path
.
join
(
path
,
filename
))
def
insert_table
(
document
,
cols
,
values
):
# cols 为列名
# values 为值,list
table
=
document
.
add_table
(
rows
=
1
,
cols
=
len
(
cols
),
style
=
'Medium Grid 1 Accent 1'
)
hdr_cells
=
table
.
rows
[
0
]
.
cells
for
i
in
range
(
len
(
cols
)):
hdr_cells
[
i
]
.
text
=
cols
[
i
]
for
value
in
values
:
row_cells
=
table
.
add_row
()
.
cells
for
i
in
range
(
len
(
cols
)):
row_cells
[
i
]
.
text
=
str
(
value
[
i
])
return
document
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment