Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
M
model_mvp
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
decision-science
model_mvp
Commits
a3823e67
Commit
a3823e67
authored
Apr 28, 2019
by
linfang.wang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update 之前的模型
parent
fe8f7148
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
38 additions
and
14 deletions
+38
-14
dbquery.py
data/datasource/dbquery.py
+1
-1
dhb.py
data/samples/dhb.py
+3
-0
allocator.py
mvp/allocator.py
+34
-13
No files found.
data/datasource/dbquery.py
View file @
a3823e67
...
...
@@ -9,7 +9,7 @@ def mysql_query(sql,engine_sql):
'''
res
=
[]
#== palo 每次查询不超过10000
tmp
=
pd
.
read_sql
(
sql
,
engine_sql
,
chunksize
=
500
0
)
tmp
=
pd
.
read_sql
(
sql
,
engine_sql
,
chunksize
=
500
1
)
for
tt
in
tmp
:
res
.
append
(
tt
)
return
pd
.
concat
(
res
)
\ No newline at end of file
data/samples/dhb.py
View file @
a3823e67
...
...
@@ -37,7 +37,10 @@ def query_sample(start_date,end_date,is_loan=True):
'''
%
(
','
.
join
(
features
),
start_date
,
end_date
)
df
=
dbquery
.
mysql_query
(
sql
,
engine_risk_analysis
)
yewu
=
query_byloanid
(
df
.
loan_id
.
tolist
())
df
.
loan_id
=
df
.
loan_id
.
astype
(
int
)
yewu
.
loan_id
=
yewu
.
loan_id
.
astype
(
int
)
df
=
pd
.
merge
(
df
,
yewu
,
on
=
'loan_id'
,
how
=
'inner'
)
df
.
applied_at
=
pd
.
to_datetime
(
df
.
applied_at
)
value_map
=
{
"近3天"
:
1
,
"近4-5天"
:
2
,
...
...
mvp/allocator.py
View file @
a3823e67
...
...
@@ -3,7 +3,8 @@ import numpy as np
import
datetime
from
mvp
import
xgbreport
from
data.analyis
import
datacal
from
mvp
import
dhb
# from mvp import dhb
from
data.samples
import
dhb
,
sample
if
__name__
==
'__main__'
:
# features=[
...
...
@@ -47,16 +48,36 @@ if __name__ == '__main__':
# 'third_data_source#xy_pan_newqueryAorgAcount',
# 'third_data_source#xy_pan_newqueryAsumAcount'
# ]
dhb
=
dhb
.
dhb
(
start_time_period
=
'2019-01-19 11:00:00'
,
end_time_period
=
'2019-01-20 12:00:00'
)
df
=
dhb
.
dhb_features_extract
()
print
(
df
.
columns
.
tolist
())
print
(
df
.
target
.
unique
())
label
=
'target'
features
=
dhb
.
get_feature
()
df
[
features
]
=
df
[
features
]
.
astype
(
float
)
df
[
'target'
]
=
df
[
'target'
]
.
astype
(
int
)
print
(
'----feature---'
,
len
(
features
))
#
dhb = dhb.dhb(start_time_period='2019-01-19 11:00:00',end_time_period='2019-01-20 12:00:00')
#
df=dhb.dhb_features_extract()
#
print(df.columns.tolist())
#
print(df.target.unique())
#
label='target'
#
features=dhb.get_feature()
#
df[features]=df[features].astype(float)
#
df['target']=df['target'].astype(int)
#
print('----feature---',len(features))
# df=pd.read_csv('test.csv')
dftrain
,
dftest
=
datacal
.
split_train_val
(
df
,
trainsplit
=
'timeSeries'
,
trainsplitRatio
=
0.8
,
sort_col
=
'applied_at'
)
xgbreport
.
report
(
dftrain
,
dftest
,
features
,
label
,
''
,
'tmp.doc'
,
kfold
=
2
)
#== 模型名称
model_name
=
'dhb'
#== 目标是15天
passdue_day
=
15
df_log
=
sample
.
get_last_record
(
model_name
)
if
df_log
.
shape
[
0
]
==
1
:
start_date
,
end_date
=
sample
.
cal_sample_date
(
df_log
.
max_date
[
0
],
passdue_day
)
else
:
start_date
,
end_date
=
sample
.
cal_sample_date
(
passdue_day
=
passdue_day
)
start_date
=
'2019-01-01'
end_date
=
'2019-01-10'
print
(
start_date
,
end_date
)
df_sample
=
dhb
.
query_sample
(
start_date
,
end_date
)
df_sample
[
'applied_at'
]
=
pd
.
to_datetime
(
df_sample
[
'applied_at'
])
df_sample
[
'label'
]
=
1
df_sample
.
loc
[
df_sample
.
passdue_day
>=
passdue_day
,
'label'
]
=
0
dftrain
,
dftest
=
datacal
.
split_train_val
(
df_sample
,
trainsplit
=
'timeSeries'
,
trainsplitRatio
=
0.8
,
sort_col
=
'applied_at'
)
# 记录样本信息
# sample.save_model_record(model_name,min_date=df_sample.applied_at.min(),max_date=df_sample.applied_at.max(),sample_cnt=df_sample.shape[0],
# train_min_date=dftrain.applied_at.min(),train_max_date=dftrain.applied_at.max(),train_cnt=dftrain.shape[0],
# test_min_date=dftest.applied_at.min(),test_max_date=dftest.applied_at.max(),test_cnt=dftest.shape[0])
#== xgboost gbtree
xgbreport
.
report
(
dftrain
,
dftest
,
dhb
.
get_feature
(),
'label'
,
''
,
'xgboost_
%
s.doc'
%
datetime
.
datetime
.
now
()
.
date
()
.
strftime
(
'
%
y
%
m
%
d'
),
kfold
=
2
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment