20200101_0

b48407c2 · 舒皓月 · 552e8ad0 · b48407c2 · b48407c2 · b48407c2
Commit b48407c2 authored Jan 15, 2020 by 舒皓月
5 changed files
--- a/AUC_time.py
+++ b/AUC_time.py
@@ -46,6 +46,8 @@ class AUCMonitor:
        self.field_name_list = self.field_info_df.field_name.tolist()
        self.field_query_list = self.field_info_df.field_query.tolist()
        self.field_query_name_dict = dict(zip(self.field_query_list, self.field_name_list))
+        self.field_is_high = self.field_info_df.is_high.tolist()
+        self.field_query_is_high_dict = dict(zip(self.field_query_list, self.field_is_high))
        # 一些定义的常量
        self.passdue_day = passdue_day  # 逾期天数, 默认15.
@@ -157,12 +159,12 @@ class AUCMonitor:
        # 统一时间格式.
        if repr(df_copy['applied_at'].dtype) == "dtype('O')":
            df_copy = df_copy.loc[
-                (df_copy[field].notna()) & (df_copy['applied_at'].apply(lambda x: x[:10]) <= self.date_list[-1]) & (
+                (df_copy[field].notna()) & (df_copy['applied_at'].apply(lambda x: x[:10]) <= self.date_list[-1]) &
-                        df_copy[field] > 0) & (df_copy['passdue_day'].notna())]
+                (df_copy['passdue_day'].notna())]
        else:
            df_copy = df_copy.loc[(df_copy[field].notna()) & (
-                    df_copy['applied_at'].apply(lambda x: x.strftime('%Y-%m-%d')) <= self.date_list[-1]) & (
+                    df_copy['applied_at'].apply(lambda x: x.strftime('%Y-%m-%d')) <= self.date_list[-1]) &
-                                          df_copy[field] > 0) & (df_copy['passdue_day'].notna())]
+                                  (df_copy['passdue_day'].notna())]
        # 包含各种信息的字典.
        # 如: {'全样本':
@@ -357,11 +359,22 @@ class AUCMonitor:
            except:
                return np.nan
+        # 转化数据, 使其值越大风险越高.
+        def is_high_func(x, is_high):
+            if pd.isnull(x):
+                return np.nan
+            if is_high:
+                return x
+            else:
+                return -x
        na_field_list = []
        for field in self.field_query_list:
            if field in self.mongo_df.columns.tolist():
                print('正在清洗%s' % self.field_query_name_dict[field])
                self.mongo_df[field] = self.mongo_df[field].apply(clean_data)
+                self.mongo_df[field] = self.mongo_df[field].apply(lambda x:
+                                                                  is_high_func(x, self.field_query_is_high_dict[field]))
            else:
                na_field_list.append(field)
        ## 去除因为一些原因未抽取到的字段.

--- a/model_score.xlsx
+++ b/model_score.xlsx
--- a/report/20200101/MM_report_20200101.docx
+++ b/report/20200101/MM_report_20200101.docx
--- a/report/20200101/MM_report_20200101.pdf
+++ b/report/20200101/MM_report_20200101.pdf
--- a/report/20200101/~$_report_20200101.docx
+++ b/report/20200101/~$_report_20200101.docx