Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
M
model_mvp
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
decision-science
model_mvp
Commits
b980367e
Commit
b980367e
authored
Apr 24, 2019
by
linfang.wang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
优化sample
parent
912b21b8
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
406 additions
and
198 deletions
+406
-198
__init__.py
data/__init__.py
+0
-0
__init__.py
data/samples/__init__.py
+0
-0
bairong.py
data/samples/bairong.py
+6
-0
dhb.py
data/samples/dhb.py
+67
-39
dhb.csv
data/samples/features/dhb.csv
+168
-159
sample.py
data/samples/sample.py
+161
-0
yewudata.py
data/samples/yewudata.py
+4
-0
No files found.
data/__init__.py
0 → 100644
View file @
b980367e
data/
etl
/__init__.py
→
data/
samples
/__init__.py
View file @
b980367e
File moved
data/samples/bairong.py
0 → 100644
View file @
b980367e
'''
目标:查询百融样本数据,百融样本特征
'''
data/samples/dhb.py
View file @
b980367e
import
pandas
as
pd
import
pandas
as
pd
from
data.samples
import
sample
from
data.datasource
import
dbquery
from
data.datasource.mysqldb
import
engine_risk_analysis
from
data.samples.yewudata
import
*
'''
'''
目的:获取电话邦特征,样本数据,数据源为风控分析库
目的:获取电话邦特征,样本数据,数据源为风控分析库
'''
'''
feature_file_name
=
'features/dhb.csv'
feature_file_name
=
'features/dhb.csv'
def
get_features_from_file
():
def
get_feature
():
'''
return
sample
.
get_feature_by_version
(
feature_file_name
)
从feature 文件中读取feature
:return: df,columns=['feature','version']
'''
df_feature
=
pd
.
read_csv
(
feature_file_name
,
sep
=
'
\t
'
)
return
df_feature
def
get_feature_by_version
(
version
=
None
):
def
query_sample
(
start_date
,
end_date
,
is_loan
=
True
):
'''
根据feature 的版本号,获取该版本下的feature,如果不指定,则获取最新的版本号
:param version:int 负数为不指定或者
:return:list
'''
'''
df_feature
=
get_features_from_file
()
默认提取放款集
if
(
version
==
None
)
or
(
version
<
1
):
:param start_date:
version
=
df_feature
.
version
.
max
()
:param end_date:
return
df_feature
[
df_feature
.
version
==
version
]
.
feature
.
tolist
()
:return:样本数据
def
save_features
(
features
):
'''
'''
针对新的feature,同维护的feature文档比较,如果同最新版的特征一样,那么无需保存,如果不一致,则作为新的一版特征进行保存
features
=
get_feature
()
:param features:list
if
is_loan
:
:return:
sql
=
'''
'''
select loan_id,
%
s
f2
=
get_feature_by_version
()
from risk_analysis
if
(
set
(
f2
)
&
set
(
features
))
==
(
set
(
f2
)
|
set
(
features
)):
where dhb_flag =1 and transacted=1 and applied=1
print
(
'features are already newest,not need to save'
)
and applied_at >='
%
s' and applied_at<'
%
s'
return
True
'''
%
(
','
.
join
(
features
),
start_date
,
end_date
)
else
:
else
:
#== 更新特征
sql
=
'''
tmp
=
pd
.
DataFrame
(
features
,
columns
=
[
'feature'
])
select loan_id,
%
s
df_feature
=
get_features_from_file
()
from risk_analysis
version
=
df_feature
.
version
.
max
()
+
1
where dhb_flag =1 and applied=1
tmp
[
'version'
]
=
version
and applied_at >='
%
s' and applied_at<'
%
s'
columns
=
[
'feature'
,
'version'
]
'''
%
(
','
.
join
(
features
),
start_date
,
end_date
)
df_feature
=
pd
.
concat
([
df_feature
[
columns
],
tmp
[
columns
]])
df
=
dbquery
.
mysql_query
(
sql
,
engine_risk_analysis
)
df_feature
[
columns
]
.
to_csv
(
feature_file_name
,
index
=
None
,
encoding
=
'utf8'
)
yewu
=
query_byloanid
(
df
.
loan_id
.
tolist
())
df
=
pd
.
merge
(
df
,
yewu
,
on
=
'loan_id'
,
how
=
'inner'
)
value_map
=
{
"近3天"
:
1
,
"近4-5天"
:
2
,
"近6-7天"
:
3
,
"近8-15天"
:
4
,
"近16-30天"
:
5
,
"近31-60天"
:
6
,
"近61-90天"
:
7
,
"近91-120天"
:
8
,
"近121-150天"
:
9
,
"近151-180天"
:
10
,
"180天前"
:
11
,
"无"
:
0
}
cols
=
[
"dhb_overview_dun_first_call_time"
,
"dhb_overview_dun_last_call_time"
,
"dhb_overview_ntdun_first_call_time"
,
"dhb_overview_ntdun_last_call_time"
]
df
[
cols
]
=
df
[
cols
]
.
applymap
(
lambda
x
:
value_map
[
x
])
df
.
loc
[
df
.
dhb_last_60_and_90_days_ntdun_call_avg_duration
>=
42
,
"dhb_last_60_and_90_days_ntdun_call_avg_duration"
]
=
42
df
.
loc
[
df
.
dhb_overview_ntdun_call_duration_above60
>=
25
,
"dhb_overview_ntdun_call_duration_above60"
]
=
25
df
.
loc
[
df
.
dhb_last_30_and_60_days_ntdun_call_total_duration
>=
800
,
"dhb_last_30_and_60_days_ntdun_call_total_duration"
]
=
800
df
.
loc
[
df
.
dhb_last_30_and_60_days_dun_call_in_duration
>=
1600
,
"dhb_last_30_and_60_days_dun_call_in_duration"
]
=
1600
df
.
loc
[
df
.
dhb_last_30_days_ntdun_call_total_duration
>=
2500
,
"dhb_last_30_days_ntdun_call_total_duration"
]
=
2500
df
.
loc
[
df
.
dhb_last_30_days_ntdun_call_tel_total_nums
>=
25
,
"dhb_last_30_days_ntdun_call_tel_total_nums"
]
=
25
df
.
loc
[
df
.
dhb_last_30_days_dun_call_in_duration
>=
1000
,
"dhb_last_30_days_dun_call_in_duration"
]
=
1000
df
.
loc
[
df
.
dhb_overview_ntdun_call_total_duration
>=
3000
,
"dhb_overview_ntdun_call_total_duration"
]
=
3000
df
.
loc
[
df
.
dhb_overview_ntdun_call_in_times
>=
25
,
"dhb_overview_ntdun_call_in_times"
]
=
25
df
.
loc
[
df
.
dhb_last_60_and_90_days_ntdun_call_in_duration
>=
1000
,
"dhb_last_60_and_90_days_ntdun_call_in_duration"
]
=
1000
df
.
loc
[
df
.
dhb_overview_dun_call_tel_total_nums
>=
22
,
"dhb_overview_dun_call_tel_total_nums"
]
=
22
df
.
loc
[
df
.
dhb_last_30_days_dun_call_total_duration
>=
1100
,
"dhb_last_30_days_dun_call_total_duration"
]
=
1100
df
.
loc
[
df
.
dhb_last_two_weeks_ntdun_call_in_duration
>=
300
,
"dhb_last_two_weeks_ntdun_call_in_duration"
]
=
300
return
df
if
__name__
==
'__main__'
:
#
if __name__ == '__main__':
features
=
get_feature_by_version
(
)
# features=sample.get_feature_by_version(feature_file_name
)
#
features=features[1:10]
#
features=features[1:10]
save_features
(
features
)
# sample.save_features(features,feature_file_name
)
data/samples/features/dhb.csv
View file @
b980367e
feature version
feature,version
dhb_last_30_and_60_days_dun_call_avg_duration 1
dhb_last_30_and_60_days_dun_call_avg_duration,1
dhb_last_30_and_60_days_dun_call_duration_above60 1
dhb_last_30_and_60_days_dun_call_duration_above60,1
dhb_last_30_and_60_days_dun_call_duration_below15 1
dhb_last_30_and_60_days_dun_call_duration_below15,1
dhb_last_30_and_60_days_dun_call_duration_between15_and_30 1
dhb_last_30_and_60_days_dun_call_duration_between15_and_30,1
dhb_last_30_and_60_days_dun_call_in_duration 1
dhb_last_30_and_60_days_dun_call_in_duration,1
dhb_last_30_and_60_days_dun_call_in_times 1
dhb_last_30_and_60_days_dun_call_in_times,1
dhb_last_30_and_60_days_dun_call_out_duration 1
dhb_last_30_and_60_days_dun_call_out_duration,1
dhb_last_30_and_60_days_dun_call_out_times 1
dhb_last_30_and_60_days_dun_call_out_times,1
dhb_last_30_and_60_days_dun_call_tel_total_nums 1
dhb_last_30_and_60_days_dun_call_tel_total_nums,1
dhb_last_30_and_60_days_dun_call_total_duration 1
dhb_last_30_and_60_days_dun_call_total_duration,1
dhb_last_30_and_60_days_dun_call_total_times 1
dhb_last_30_and_60_days_dun_call_total_times,1
dhb_last_30_and_60_days_ntdun_call_avg_duration 1
dhb_last_30_and_60_days_ntdun_call_avg_duration,1
dhb_last_30_and_60_days_ntdun_call_duration_above60 1
dhb_last_30_and_60_days_ntdun_call_duration_above60,1
dhb_last_30_and_60_days_ntdun_call_duration_below15 1
dhb_last_30_and_60_days_ntdun_call_duration_below15,1
dhb_last_30_and_60_days_ntdun_call_duration_between15_and_30 1
dhb_last_30_and_60_days_ntdun_call_duration_between15_and_30,1
dhb_last_30_and_60_days_ntdun_call_in_duration 1
dhb_last_30_and_60_days_ntdun_call_in_duration,1
dhb_last_30_and_60_days_ntdun_call_in_times 1
dhb_last_30_and_60_days_ntdun_call_in_times,1
dhb_last_30_and_60_days_ntdun_call_out_duration 1
dhb_last_30_and_60_days_ntdun_call_out_duration,1
dhb_last_30_and_60_days_ntdun_call_out_times 1
dhb_last_30_and_60_days_ntdun_call_out_times,1
dhb_last_30_and_60_days_ntdun_call_tel_total_nums 1
dhb_last_30_and_60_days_ntdun_call_tel_total_nums,1
dhb_last_30_and_60_days_ntdun_call_total_duration 1
dhb_last_30_and_60_days_ntdun_call_total_duration,1
dhb_last_30_and_60_days_ntdun_call_total_times 1
dhb_last_30_and_60_days_ntdun_call_total_times,1
dhb_last_30_days_dun_call_avg_duration 1
dhb_last_30_days_dun_call_avg_duration,1
dhb_last_30_days_dun_call_duration_above60 1
dhb_last_30_days_dun_call_duration_above60,1
dhb_last_30_days_dun_call_duration_below15 1
dhb_last_30_days_dun_call_duration_below15,1
dhb_last_30_days_dun_call_duration_between15_and_30 1
dhb_last_30_days_dun_call_duration_between15_and_30,1
dhb_last_30_days_dun_call_in_duration 1
dhb_last_30_days_dun_call_in_duration,1
dhb_last_30_days_dun_call_in_times 1
dhb_last_30_days_dun_call_in_times,1
dhb_last_30_days_dun_call_out_duration 1
dhb_last_30_days_dun_call_out_duration,1
dhb_last_30_days_dun_call_out_times 1
dhb_last_30_days_dun_call_out_times,1
dhb_last_30_days_dun_call_tel_total_nums 1
dhb_last_30_days_dun_call_tel_total_nums,1
dhb_last_30_days_dun_call_total_duration 1
dhb_last_30_days_dun_call_total_duration,1
dhb_last_30_days_dun_call_total_times 1
dhb_last_30_days_dun_call_total_times,1
dhb_last_30_days_ntdun_call_avg_duration 1
dhb_last_30_days_ntdun_call_avg_duration,1
dhb_last_30_days_ntdun_call_duration_above60 1
dhb_last_30_days_ntdun_call_duration_above60,1
dhb_last_30_days_ntdun_call_duration_below15 1
dhb_last_30_days_ntdun_call_duration_below15,1
dhb_last_30_days_ntdun_call_duration_between15_and_30 1
dhb_last_30_days_ntdun_call_duration_between15_and_30,1
dhb_last_30_days_ntdun_call_in_duration 1
dhb_last_30_days_ntdun_call_in_duration,1
dhb_last_30_days_ntdun_call_in_times 1
dhb_last_30_days_ntdun_call_in_times,1
dhb_last_30_days_ntdun_call_out_duration 1
dhb_last_30_days_ntdun_call_out_duration,1
dhb_last_30_days_ntdun_call_out_times 1
dhb_last_30_days_ntdun_call_out_times,1
dhb_last_30_days_ntdun_call_tel_total_nums 1
dhb_last_30_days_ntdun_call_tel_total_nums,1
dhb_last_30_days_ntdun_call_total_duration 1
dhb_last_30_days_ntdun_call_total_duration,1
dhb_last_30_days_ntdun_call_total_times 1
dhb_last_30_days_ntdun_call_total_times,1
dhb_last_60_and_90_days_dun_call_avg_duration 1
dhb_last_60_and_90_days_dun_call_avg_duration,1
dhb_last_60_and_90_days_dun_call_duration_above60 1
dhb_last_60_and_90_days_dun_call_duration_above60,1
dhb_last_60_and_90_days_dun_call_duration_below15 1
dhb_last_60_and_90_days_dun_call_duration_below15,1
dhb_last_60_and_90_days_dun_call_duration_between15_and_30 1
dhb_last_60_and_90_days_dun_call_duration_between15_and_30,1
dhb_last_60_and_90_days_dun_call_in_duration 1
dhb_last_60_and_90_days_dun_call_in_duration,1
dhb_last_60_and_90_days_dun_call_in_times 1
dhb_last_60_and_90_days_dun_call_in_times,1
dhb_last_60_and_90_days_dun_call_out_duration 1
dhb_last_60_and_90_days_dun_call_out_duration,1
dhb_last_60_and_90_days_dun_call_out_times 1
dhb_last_60_and_90_days_dun_call_out_times,1
dhb_last_60_and_90_days_dun_call_tel_total_nums 1
dhb_last_60_and_90_days_dun_call_tel_total_nums,1
dhb_last_60_and_90_days_dun_call_total_duration 1
dhb_last_60_and_90_days_dun_call_total_duration,1
dhb_last_60_and_90_days_dun_call_total_times 1
dhb_last_60_and_90_days_dun_call_total_times,1
dhb_last_60_and_90_days_ntdun_call_avg_duration 1
dhb_last_60_and_90_days_ntdun_call_avg_duration,1
dhb_last_60_and_90_days_ntdun_call_duration_above60 1
dhb_last_60_and_90_days_ntdun_call_duration_above60,1
dhb_last_60_and_90_days_ntdun_call_duration_below15 1
dhb_last_60_and_90_days_ntdun_call_duration_below15,1
dhb_last_60_and_90_days_ntdun_call_duration_between15_and_30 1
dhb_last_60_and_90_days_ntdun_call_duration_between15_and_30,1
dhb_last_60_and_90_days_ntdun_call_in_duration 1
dhb_last_60_and_90_days_ntdun_call_in_duration,1
dhb_last_60_and_90_days_ntdun_call_in_times 1
dhb_last_60_and_90_days_ntdun_call_in_times,1
dhb_last_60_and_90_days_ntdun_call_out_duration 1
dhb_last_60_and_90_days_ntdun_call_out_duration,1
dhb_last_60_and_90_days_ntdun_call_out_times 1
dhb_last_60_and_90_days_ntdun_call_out_times,1
dhb_last_60_and_90_days_ntdun_call_tel_total_nums 1
dhb_last_60_and_90_days_ntdun_call_tel_total_nums,1
dhb_last_60_and_90_days_ntdun_call_total_duration 1
dhb_last_60_and_90_days_ntdun_call_total_duration,1
dhb_last_60_and_90_days_ntdun_call_total_times 1
dhb_last_60_and_90_days_ntdun_call_total_times,1
dhb_last_three_weeks_dun_call_avg_duration 1
dhb_last_three_weeks_dun_call_avg_duration,1
dhb_last_three_weeks_dun_call_duration_above60 1
dhb_last_three_weeks_dun_call_duration_above60,1
dhb_last_three_weeks_dun_call_duration_below15 1
dhb_last_three_weeks_dun_call_duration_below15,1
dhb_last_three_weeks_dun_call_duration_between15_and_30 1
dhb_last_three_weeks_dun_call_duration_between15_and_30,1
dhb_last_three_weeks_dun_call_in_duration 1
dhb_last_three_weeks_dun_call_in_duration,1
dhb_last_three_weeks_dun_call_in_times 1
dhb_last_three_weeks_dun_call_in_times,1
dhb_last_three_weeks_dun_call_out_duration 1
dhb_last_three_weeks_dun_call_out_duration,1
dhb_last_three_weeks_dun_call_out_times 1
dhb_last_three_weeks_dun_call_out_times,1
dhb_last_three_weeks_dun_call_tel_total_nums 1
dhb_last_three_weeks_dun_call_tel_total_nums,1
dhb_last_three_weeks_dun_call_total_duration 1
dhb_last_three_weeks_dun_call_total_duration,1
dhb_last_three_weeks_dun_call_total_times 1
dhb_last_three_weeks_dun_call_total_times,1
dhb_last_three_weeks_ntdun_call_avg_duration 1
dhb_last_three_weeks_ntdun_call_avg_duration,1
dhb_last_three_weeks_ntdun_call_duration_above60 1
dhb_last_three_weeks_ntdun_call_duration_above60,1
dhb_last_three_weeks_ntdun_call_duration_below15 1
dhb_last_three_weeks_ntdun_call_duration_below15,1
dhb_last_three_weeks_ntdun_call_duration_between15_and_30 1
dhb_last_three_weeks_ntdun_call_duration_between15_and_30,1
dhb_last_three_weeks_ntdun_call_in_duration 1
dhb_last_three_weeks_ntdun_call_in_duration,1
dhb_last_three_weeks_ntdun_call_in_times 1
dhb_last_three_weeks_ntdun_call_in_times,1
dhb_last_three_weeks_ntdun_call_out_duration 1
dhb_last_three_weeks_ntdun_call_out_duration,1
dhb_last_three_weeks_ntdun_call_out_times 1
dhb_last_three_weeks_ntdun_call_out_times,1
dhb_last_three_weeks_ntdun_call_tel_total_nums 1
dhb_last_three_weeks_ntdun_call_tel_total_nums,1
dhb_last_three_weeks_ntdun_call_total_duration 1
dhb_last_three_weeks_ntdun_call_total_duration,1
dhb_last_three_weeks_ntdun_call_total_times 1
dhb_last_three_weeks_ntdun_call_total_times,1
dhb_last_two_weeks_dun_call_avg_duration 1
dhb_last_two_weeks_dun_call_avg_duration,1
dhb_last_two_weeks_dun_call_duration_above60 1
dhb_last_two_weeks_dun_call_duration_above60,1
dhb_last_two_weeks_dun_call_duration_below15 1
dhb_last_two_weeks_dun_call_duration_below15,1
dhb_last_two_weeks_dun_call_duration_between15_and_30 1
dhb_last_two_weeks_dun_call_duration_between15_and_30,1
dhb_last_two_weeks_dun_call_in_duration 1
dhb_last_two_weeks_dun_call_in_duration,1
dhb_last_two_weeks_dun_call_in_times 1
dhb_last_two_weeks_dun_call_in_times,1
dhb_last_two_weeks_dun_call_out_duration 1
dhb_last_two_weeks_dun_call_out_duration,1
dhb_last_two_weeks_dun_call_out_times 1
dhb_last_two_weeks_dun_call_out_times,1
dhb_last_two_weeks_dun_call_tel_total_nums 1
dhb_last_two_weeks_dun_call_tel_total_nums,1
dhb_last_two_weeks_dun_call_total_duration 1
dhb_last_two_weeks_dun_call_total_duration,1
dhb_last_two_weeks_dun_call_total_times 1
dhb_last_two_weeks_dun_call_total_times,1
dhb_last_two_weeks_ntdun_call_avg_duration 1
dhb_last_two_weeks_ntdun_call_avg_duration,1
dhb_last_two_weeks_ntdun_call_duration_above60 1
dhb_last_two_weeks_ntdun_call_duration_above60,1
dhb_last_two_weeks_ntdun_call_duration_below15 1
dhb_last_two_weeks_ntdun_call_duration_below15,1
dhb_last_two_weeks_ntdun_call_duration_between15_and_30 1
dhb_last_two_weeks_ntdun_call_duration_between15_and_30,1
dhb_last_two_weeks_ntdun_call_in_duration 1
dhb_last_two_weeks_ntdun_call_in_duration,1
dhb_last_two_weeks_ntdun_call_in_times 1
dhb_last_two_weeks_ntdun_call_in_times,1
dhb_last_two_weeks_ntdun_call_out_duration 1
dhb_last_two_weeks_ntdun_call_out_duration,1
dhb_last_two_weeks_ntdun_call_out_times 1
dhb_last_two_weeks_ntdun_call_out_times,1
dhb_last_two_weeks_ntdun_call_tel_total_nums 1
dhb_last_two_weeks_ntdun_call_tel_total_nums,1
dhb_last_two_weeks_ntdun_call_total_duration 1
dhb_last_two_weeks_ntdun_call_total_duration,1
dhb_last_two_weeks_ntdun_call_total_times 1
dhb_last_two_weeks_ntdun_call_total_times,1
dhb_last_week_dun_call_avg_duration 1
dhb_last_week_dun_call_avg_duration,1
dhb_last_week_dun_call_duration_above60 1
dhb_last_week_dun_call_duration_above60,1
dhb_last_week_dun_call_duration_below15 1
dhb_last_week_dun_call_duration_below15,1
dhb_last_week_dun_call_duration_between15_and_30 1
dhb_last_week_dun_call_duration_between15_and_30,1
dhb_last_week_dun_call_in_duration 1
dhb_last_week_dun_call_in_duration,1
dhb_last_week_dun_call_in_times 1
dhb_last_week_dun_call_in_times,1
dhb_last_week_dun_call_out_duration 1
dhb_last_week_dun_call_out_duration,1
dhb_last_week_dun_call_out_times 1
dhb_last_week_dun_call_out_times,1
dhb_last_week_dun_call_tel_total_nums 1
dhb_last_week_dun_call_tel_total_nums,1
dhb_last_week_dun_call_total_duration 1
dhb_last_week_dun_call_total_duration,1
dhb_last_week_dun_call_total_times 1
dhb_last_week_dun_call_total_times,1
dhb_last_week_ntdun_call_avg_duration 1
dhb_last_week_ntdun_call_avg_duration,1
dhb_last_week_ntdun_call_duration_above60 1
dhb_last_week_ntdun_call_duration_above60,1
dhb_last_week_ntdun_call_duration_below15 1
dhb_last_week_ntdun_call_duration_below15,1
dhb_last_week_ntdun_call_duration_between15_and_30 1
dhb_last_week_ntdun_call_duration_between15_and_30,1
dhb_last_week_ntdun_call_in_duration 1
dhb_last_week_ntdun_call_in_duration,1
dhb_last_week_ntdun_call_in_times 1
dhb_last_week_ntdun_call_in_times,1
dhb_last_week_ntdun_call_out_duration 1
dhb_last_week_ntdun_call_out_duration,1
dhb_last_week_ntdun_call_out_times 1
dhb_last_week_ntdun_call_out_times,1
dhb_last_week_ntdun_call_tel_total_nums 1
dhb_last_week_ntdun_call_tel_total_nums,1
dhb_last_week_ntdun_call_total_duration 1
dhb_last_week_ntdun_call_total_duration,1
dhb_last_week_ntdun_call_total_times 1
dhb_last_week_ntdun_call_total_times,1
dhb_overview_dun_call_avg_duration 1
dhb_overview_dun_call_avg_duration,1
dhb_overview_dun_call_duration_above60 1
dhb_overview_dun_call_duration_above60,1
dhb_overview_dun_call_duration_below15 1
dhb_overview_dun_call_duration_below15,1
dhb_overview_dun_call_duration_between15_and_30 1
dhb_overview_dun_call_duration_between15_and_30,1
dhb_overview_dun_call_in_duration 1
dhb_overview_dun_call_in_duration,1
dhb_overview_dun_call_in_times 1
dhb_overview_dun_call_in_times,1
dhb_overview_dun_call_out_duration 1
dhb_overview_dun_call_out_duration,1
dhb_overview_dun_call_out_times 1
dhb_overview_dun_call_out_times,1
dhb_overview_dun_call_tel_total_nums 1
dhb_overview_dun_call_tel_total_nums,1
dhb_overview_dun_call_total_duration 1
dhb_overview_dun_call_total_duration,1
dhb_overview_dun_call_total_times 1
dhb_overview_dun_call_total_times,1
dhb_overview_dun_first_call_time 1
dhb_overview_dun_first_call_time,1
dhb_overview_dun_last_call_time 1
dhb_overview_dun_last_call_time,1
dhb_overview_ntdun_call_avg_duration 1
dhb_overview_ntdun_call_avg_duration,1
dhb_overview_ntdun_call_duration_above60 1
dhb_overview_ntdun_call_duration_above60,1
dhb_overview_ntdun_call_duration_below15 1
dhb_overview_ntdun_call_duration_below15,1
dhb_overview_ntdun_call_duration_between15_and_30 1
dhb_overview_ntdun_call_duration_between15_and_30,1
dhb_overview_ntdun_call_in_duration 1
dhb_overview_ntdun_call_in_duration,1
dhb_overview_ntdun_call_in_times 1
dhb_overview_ntdun_call_in_times,1
dhb_overview_ntdun_call_out_duration 1
dhb_overview_ntdun_call_out_duration,1
dhb_overview_ntdun_call_out_times 1
dhb_overview_ntdun_call_out_times,1
dhb_overview_ntdun_call_tel_total_nums 1
dhb_overview_ntdun_call_tel_total_nums,1
dhb_overview_ntdun_call_total_duration 1
dhb_overview_ntdun_call_total_duration,1
dhb_overview_ntdun_call_total_times 1
dhb_overview_ntdun_call_total_times,1
dhb_overview_ntdun_first_call_time 1
dhb_overview_ntdun_first_call_time,1
dhb_overview_ntdun_last_call_time 1
dhb_overview_ntdun_last_call_time,1
\ No newline at end of file
dhb_last_30_and_60_days_dun_call_duration_above60,2
dhb_last_30_and_60_days_dun_call_duration_below15,2
dhb_last_30_and_60_days_dun_call_duration_between15_and_30,2
dhb_last_30_and_60_days_dun_call_in_duration,2
dhb_last_30_and_60_days_dun_call_in_times,2
dhb_last_30_and_60_days_dun_call_out_duration,2
dhb_last_30_and_60_days_dun_call_out_times,2
dhb_last_30_and_60_days_dun_call_tel_total_nums,2
dhb_last_30_and_60_days_dun_call_total_duration,2
data/samples/sample.py
0 → 100644
View file @
b980367e
import
pandas
as
pd
import
datetime
from
dateutil.relativedelta
import
relativedelta
import
os
'''
读取特征文件
'''
def
get_features_from_file
(
feature_file_name
):
'''
从feature 文件中读取feature
:return: df,columns=['feature','version']
'''
df_feature
=
pd
.
read_csv
(
feature_file_name
)
return
df_feature
def
get_feature_by_version
(
feature_file_name
,
version
=
None
):
'''
根据feature 的版本号,获取该版本下的feature,如果不指定,则获取最新的版本号
:param version:int 负数为不指定或者
:return:list
'''
df_feature
=
get_features_from_file
(
feature_file_name
)
if
(
version
==
None
)
or
(
version
<
1
):
version
=
df_feature
.
version
.
max
()
return
df_feature
[
df_feature
.
version
==
version
]
.
feature
.
tolist
()
def
save_features
(
features
,
feature_file_name
):
'''
针对新的feature,同维护的feature文档比较,如果同最新版的特征一样,那么无需保存,如果不一致,则作为新的一版特征进行保存
:param features:list
:return:
'''
f2
=
get_feature_by_version
(
feature_file_name
)
if
(
set
(
f2
)
&
set
(
features
))
==
(
set
(
f2
)
|
set
(
features
)):
print
(
'features are already newest,not need to save'
)
return
True
else
:
#== 更新特征
tmp
=
pd
.
DataFrame
(
features
,
columns
=
[
'feature'
])
df_feature
=
get_features_from_file
(
feature_file_name
)
version
=
df_feature
.
version
.
max
()
+
1
tmp
[
'version'
]
=
version
columns
=
[
'feature'
,
'version'
]
df_feature
=
pd
.
concat
([
df_feature
[
columns
],
tmp
[
columns
]])
df_feature
[
columns
]
.
to_csv
(
feature_file_name
,
index
=
None
,
encoding
=
'utf8'
)
def
cal_sample_date
(
last_sample_max_date
=
None
,
passdue_day
=
15
):
'''
提取样本数据,基于上次样本的last_sample_max_date,和 passdue_day 提取有表现的到目前的数据
如果不指定 last_sample_max_date ,则以当前时间为基准,提取有passdue_day表现的近3个月的样本数据
:param last_sample_min_date:上一次样本最早时间 精确到天,格式为 '
%
Y-
%
m-
%
d'
:param last_sample_max_date:上一次样本最新时间 精确到天,格式为 '
%
Y-
%
m-
%
d'
:param passdue_day:查看表现的,比如逾期15天表现的样本
:return:start_date,end_date,可提取样本的最早时间,最晚时间
'''
base_date
=
datetime
.
date
()
#== +5 是因为不是每个用户的放款都是30天周期,有的可能是31天等
#== 提取的样本数据不得超过base_date
base_date
=
base_date
+
relativedelta
(
days
=-
(
passdue_day
+
5
),
months
=-
1
)
if
last_sample_max_date
is
None
:
start_date
=
base_date
+
relativedelta
(
months
=-
3
)
end_date
=
base_date
else
:
#last_sample_max_date 为基准,计算
if
type
(
last_sample_max_date
)
==
str
:
last_sample_max_date
=
datetime
.
strptime
(
last_sample_max_date
,
'
%
Y-
%
m-
%
d'
)
.
date
()
if
last_sample_max_date
>=
base_date
:
last_sample_max_date
=
base_date
start_date
=
last_sample_max_date
end_date
=
base_date
return
start_date
,
end_date
def
read_record
():
file_name
=
'record.txt'
cols
=
[
'model_name'
,
'min_date'
,
'max_date'
,
'sample_cnt'
,
'train_min_date'
,
'train_max_date'
,
'train_cnt'
,
'train_auc'
,
'test_min_date'
,
'test_max_date'
,
'test_cnt'
,
'test_auc'
,
'update_date'
]
if
os
.
path
.
exists
(
file_name
):
df
=
pd
.
read_csv
(
'record.txt'
)
else
:
df
=
pd
.
DataFrame
(
columns
=
cols
)
return
df
def
get_record
(
model_name
):
'''
获取某一个模型下的所有的迭代的记录
:param model_name:
:return:
'''
df
=
read_record
()
df_select
=
df
[
df
.
model_name
==
model_name
]
df_select
.
sort_values
([
'update_date'
],
ascending
=
False
,
inplace
=
True
)
return
df_select
def
get_last_record
(
model_name
):
'''
获取指定模型的上一次迭代模型的样本信息
:param model_name:
:return:
'''
df_select
=
get_record
(
model_name
)
if
df_select
.
shape
==
0
:
return
df_select
return
df_select
.
head
(
1
)
def
save_model_record
(
model_name
,
min_date
=
None
,
max_date
=
None
,
sample_cnt
=
None
,
train_min_date
=
None
,
train_max_date
=
None
,
train_cnt
=
None
,
train_auc
=
None
,
test_min_date
=
None
,
test_max_date
=
None
,
test_cnt
=
None
,
test_auc
=
None
):
'''
model_name,update_date 组成唯一健;如果有值,则更新,否则不进行更新
:param model_name:
:param min_date:
:param max_date:
:param sample_cnt:
:param train_min_date:
:param train_max_date:
:param train_cnt:
:param train_auc:
:param test_min_date:
:param test_max_date:
:param test_cnt:
:param test_auc:
:return:
'''
df_all
=
read_record
()
df_all
.
reset_index
(
inplace
=
True
)
#== 获取当下的记录
df_record
=
get_record
(
model_name
)
df_record
=
df_record
[
df_record
.
update_date
==
datetime
.
date
()]
cols
=
[
'model_name'
,
'min_date'
,
'max_date'
,
'sample_cnt'
,
'train_min_date'
,
'train_max_date'
,
'train_cnt'
,
'train_auc'
,
'test_min_date'
,
'test_max_date'
,
'test_cnt'
,
'test_auc'
,
'update_date'
]
if
df_record
.
shape
[
0
]
==
0
:
df_record
=
pd
.
DataFrame
(
columns
=
cols
)
df_record
[
'model_name'
]
=
model_name
df_record
[
'update_date'
]
=
datetime
.
date
()
else
:
df_all
=
df_all
[
~
df_all
.
index
.
isin
(
df_record
.
index
)]
df_record
=
__update__
(
df_record
,
'min_date'
,
min_date
)
df_record
=
__update__
(
df_record
,
'max_date'
,
max_date
)
df_record
=
__update__
(
df_record
,
'sample_cnt'
,
sample_cnt
)
df_record
=
__update__
(
df_record
,
'train_min_date'
,
train_min_date
)
df_record
=
__update__
(
df_record
,
'train_max_date'
,
train_max_date
)
df_record
=
__update__
(
df_record
,
'train_cnt'
,
train_cnt
)
df_record
=
__update__
(
df_record
,
'train_auc'
,
train_auc
)
df_record
=
__update__
(
df_record
,
'test_min_date'
,
test_min_date
)
df_record
=
__update__
(
df_record
,
'test_max_date'
,
test_max_date
)
df_record
=
__update__
(
df_record
,
'test_cnt'
,
test_cnt
)
df_record
=
__update__
(
df_record
,
'test_auc'
,
test_auc
)
pd
.
concat
([
df_all
[
cols
],
df_record
[
cols
]])
.
to_csv
(
'record.txt'
,
index
=
None
,
encoding
=
'utf8'
)
def
__update__
(
df
,
name
,
value
):
if
value
is
not
None
:
df
[
name
]
=
value
return
df
data/samples/
xyqb
data.py
→
data/samples/
yewu
data.py
View file @
b980367e
import
pandas
as
pd
import
pandas
as
pd
from
data.datasource.mysqldb
import
*
from
data.datasource.mysqldb
import
*
from
data.datasource
import
dbquery
from
data.datasource
import
dbquery
'''
'''
目的:提供业务数据,包括 order_no,loan_id,用户类型,支持策略用risk_info,渠道,申请时间,拒绝原因,首逾天数,最大逾期天数,放款期数
目的:提供业务数据,包括 order_no,loan_id,用户类型,支持策略用risk_info,渠道,申请时间,拒绝原因,首逾天数,最大逾期天数,放款期数
'''
'''
...
@@ -126,3 +127,6 @@ def query_bydate(start_date,end_date,is_loan=True):
...
@@ -126,3 +127,6 @@ def query_bydate(start_date,end_date,is_loan=True):
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment