Commit b48407c2 authored by 舒皓月's avatar 舒皓月

20200101_0

parent 552e8ad0
...@@ -46,6 +46,8 @@ class AUCMonitor: ...@@ -46,6 +46,8 @@ class AUCMonitor:
self.field_name_list = self.field_info_df.field_name.tolist() self.field_name_list = self.field_info_df.field_name.tolist()
self.field_query_list = self.field_info_df.field_query.tolist() self.field_query_list = self.field_info_df.field_query.tolist()
self.field_query_name_dict = dict(zip(self.field_query_list, self.field_name_list)) self.field_query_name_dict = dict(zip(self.field_query_list, self.field_name_list))
self.field_is_high = self.field_info_df.is_high.tolist()
self.field_query_is_high_dict = dict(zip(self.field_query_list, self.field_is_high))
# 一些定义的常量 # 一些定义的常量
self.passdue_day = passdue_day # 逾期天数, 默认15. self.passdue_day = passdue_day # 逾期天数, 默认15.
...@@ -157,12 +159,12 @@ class AUCMonitor: ...@@ -157,12 +159,12 @@ class AUCMonitor:
# 统一时间格式. # 统一时间格式.
if repr(df_copy['applied_at'].dtype) == "dtype('O')": if repr(df_copy['applied_at'].dtype) == "dtype('O')":
df_copy = df_copy.loc[ df_copy = df_copy.loc[
(df_copy[field].notna()) & (df_copy['applied_at'].apply(lambda x: x[:10]) <= self.date_list[-1]) & ( (df_copy[field].notna()) & (df_copy['applied_at'].apply(lambda x: x[:10]) <= self.date_list[-1]) &
df_copy[field] > 0) & (df_copy['passdue_day'].notna())] (df_copy['passdue_day'].notna())]
else: else:
df_copy = df_copy.loc[(df_copy[field].notna()) & ( df_copy = df_copy.loc[(df_copy[field].notna()) & (
df_copy['applied_at'].apply(lambda x: x.strftime('%Y-%m-%d')) <= self.date_list[-1]) & ( df_copy['applied_at'].apply(lambda x: x.strftime('%Y-%m-%d')) <= self.date_list[-1]) &
df_copy[field] > 0) & (df_copy['passdue_day'].notna())] (df_copy['passdue_day'].notna())]
# 包含各种信息的字典. # 包含各种信息的字典.
# 如: {'全样本': # 如: {'全样本':
...@@ -357,11 +359,22 @@ class AUCMonitor: ...@@ -357,11 +359,22 @@ class AUCMonitor:
except: except:
return np.nan return np.nan
# 转化数据, 使其值越大风险越高.
def is_high_func(x, is_high):
if pd.isnull(x):
return np.nan
if is_high:
return x
else:
return -x
na_field_list = [] na_field_list = []
for field in self.field_query_list: for field in self.field_query_list:
if field in self.mongo_df.columns.tolist(): if field in self.mongo_df.columns.tolist():
print('正在清洗%s' % self.field_query_name_dict[field]) print('正在清洗%s' % self.field_query_name_dict[field])
self.mongo_df[field] = self.mongo_df[field].apply(clean_data) self.mongo_df[field] = self.mongo_df[field].apply(clean_data)
self.mongo_df[field] = self.mongo_df[field].apply(lambda x:
is_high_func(x, self.field_query_is_high_dict[field]))
else: else:
na_field_list.append(field) na_field_list.append(field)
## 去除因为一些原因未抽取到的字段. ## 去除因为一些原因未抽取到的字段.
......
No preview for this file type
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment