Commit a33add70 authored by linfang.wang's avatar linfang.wang

大数据查询

parent 45721de0
......@@ -47,7 +47,7 @@ if __name__ == '__main__':
# 'third_data_source#xy_pan_newqueryAorgAcount',
# 'third_data_source#xy_pan_newqueryAsumAcount'
# ]
dhb = dhb.dhb()
dhb = dhb.dhb(start_time_period='2019-01-19 11:00:00',end_time_period='2019-01-20 12:00:00')
df=dhb.dhb_features_extract()
label='target'
# df=pd.read_csv('test.csv')
......
......@@ -236,8 +236,19 @@ class dhb:
#print(self.sql.replace('@start_time_period',self.start_time_period).replace('@end_time_period',self.end_time_period))
# use risk_analysis to extract data
dhb_loan = pd.read_sql(self.sql.replace('@start_time_period',self.start_time_period).replace('@end_time_period',self.end_time_period),mysqldb.engine_risk_analysis)
dhb_loan[["dhb_overview_dun_first_call_time", "dhb_overview_dun_last_call_time","dhb_overview_ntdun_first_call_time", "dhb_overview_ntdun_last_call_time"]] = dhb_loan[["dhb_overview_dun_first_call_time", "dhb_overview_dun_last_call_time","dhb_overview_ntdun_first_call_time", "dhb_overview_ntdun_last_call_time"]].applymap(lambda x : value_map[x])
print('-----get dhb features from risk_analysis---',datetime.datetime.now())
cnt=self.cnt_samples()
print('-----samples number is %d ' % cnt['cnt'][0])
res = []
tmp=pd.read_sql(
self.sql.replace('@start_time_period', self.start_time_period).replace('@end_time_period',
self.end_time_period),
mysqldb.engine_risk_analysis, chunksize=10000)
for tt in tmp:
res.append(tt)
dhb_loan=pd.concat(res)
cols=["dhb_overview_dun_first_call_time", "dhb_overview_dun_last_call_time","dhb_overview_ntdun_first_call_time", "dhb_overview_ntdun_last_call_time"]
dhb_loan[cols] = dhb_loan[cols].applymap(lambda x : value_map[x])
dhb_loan.loc[dhb_loan.dhb_last_60_and_90_days_ntdun_call_avg_duration >= 42,"dhb_last_60_and_90_days_ntdun_call_avg_duration"] = 42
dhb_loan.loc[dhb_loan.dhb_overview_ntdun_call_duration_above60 >= 25,"dhb_overview_ntdun_call_duration_above60"] = 25
......@@ -280,6 +291,16 @@ class dhb:
# df['bins'] = df.qcut(df['target'], q = 10, percision = 6, dupulicates='drop')
# df.groupby
# return 1
def cnt_samples(self):
sql = '''
select count(1) as cnt
from risk_analysis
where applied_at >= '%s' and applied_at < '%s'
and transacted = 1
and dhb_flag =1
and datediff(now(),deadline) > 15
''' % (self.start_time_period,self.end_time_period)
return pd.read_sql(sql,mysqldb.engine_risk_analysis)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment