bug修正

bd18c3b0 · 王家华 · 589bfcb3 · bd18c3b0 · bd18c3b0 · bd18c3b0
Commit bd18c3b0 authored May 17, 2019 by 王家华
16 changed files
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
--- a/graph/__pycache__/matplot.cpython-36.pyc
+++ b/graph/__pycache__/matplot.cpython-36.pyc
--- a/graph/matplot.py
+++ b/graph/matplot.py
@@ -26,7 +26,7 @@ def topN_feature_importance(model, clf, title="untitled", save_path='./mvp/plots
    model.plot_importance(clf, max_num_features=topN)
    plt.title("Feature Importances")
-    path = save_path + title + " featureImportance.png"
+    path = save_path + title + "_featureImportance.png"
    plt.savefig(path)
    plt.show()
    return path
@@ -67,13 +67,14 @@ def plot_table_list(datalist, auc, datalist_description=None, title='untitled',
        tab_df = []
        if tab_df_list is None:
            for data in datalist:
+                data = data.fillna(-1)
                tab_df.append(
                    pd.Series(data.index).astype(str).map(lambda x: x.replace(' ', '').replace('0.', '.')).tolist())
                tab_df.append(
                    pd.Series(data.values).astype(str).map(lambda x: x.replace(' ', '').replace('0.', '.')).tolist())
                # validate tab_rows
                if tab_rows is None:
-                    table_rows.append('index');
+                    table_rows.append('index')
                    table_rows.append('values')
                else:
                    # tab_rows was given by
@@ -113,12 +114,11 @@ def plot_table_list(datalist, auc, datalist_description=None, title='untitled',
    if saved_path is not None:
        plt.savefig(saved_path + title + ".png")
    plt.show()
-    return 1
+    return saved_path + title + ".png"
-def plot_table_df(dataset, auc, title='untitled', X_label=None, y_label=None,
+def plot_table_df(dataset, auc=None, title='untitled', X_label=None, y_label=None,
                  tab_df=None, plot_tab=True, saved_path=None):
-    print(tab_df)
    '''
    instructions : visualization of pivot with single dataframe
    Params :
@@ -132,7 +132,24 @@ def plot_table_df(dataset, auc, title='untitled', X_label=None, y_label=None,
    '''
    fig, axs = plt.subplots(1, 1, figsize=(13, 9), linewidth=0.1)
-    table_rows = dataset.columns
+    if isinstance(dataset, pd.DataFrame):
+        table_rows = dataset.columns
+    else:
+        axs.plot(range(len(dataset.index)), dataset.values)
+        fig.subplots_adjust(bottom=0.2)
+        plt.grid()
+        if y_label is not None:
+            plt.ylabel(y_label)
+        if X_label is not None:
+            plt.xlabel(X_label)
+        # plt.vlines(xrange(len(cols))0],y,color='lightgrey',linestyle='--')
+        plt.title(title)
+        if saved_path is not None:
+            plt.savefig(saved_path + title + ".png")
+        plt.show()
+        return saved_path + title + ".png"
    table_cols = pd.Series(dataset.index).astype(str).map(lambda x: x.replace(' ', '')).map(
        lambda x : x.replace('0.', '.'))
@@ -140,7 +157,10 @@ def plot_table_df(dataset, auc, title='untitled', X_label=None, y_label=None,
    for i in range(len(table_rows)):
        x = range(len(table_cols))
        y = dataset.iloc[:, i]
-        axs.plot(x, y, label=str(table_rows[i]) + ' AUC: ' + str(auc[i]))
+        if auc != None:
+            axs.plot(x, y, label=str(table_rows[i]) + ' AUC: ' + str(auc[i]))
+        else:
+            axs.plot(x, y, label=str(table_rows[i]))
    # if table should be plot
    if plot_tab:
        if tab_df is None:
@@ -157,8 +177,8 @@ def plot_table_df(dataset, auc, title='untitled', X_label=None, y_label=None,
                              loc='bottom')
        plt.xticks([])
    # otherwise, nothing to do here
-    the_table.auto_set_font_size(False)
+        the_table.auto_set_font_size(False)
-    the_table.set_fontsize(9)
+        the_table.set_fontsize(8)
    fig.subplots_adjust(bottom=0.2)
    plt.grid()
    if y_label is not None:
@@ -171,7 +191,7 @@ def plot_table_df(dataset, auc, title='untitled', X_label=None, y_label=None,
    if saved_path is not None:
        plt.savefig(saved_path + title + ".png")
    plt.show()
-    return 1
+    return saved_path + title + ".png"

--- a/mvp/plots/cache/dhb_last_30_and_60_days_ntdun_call_total_duration PDP Chart.png
+++ b/mvp/plots/cache/dhb_last_30_and_60_days_ntdun_call_total_duration PDP Chart.png
--- a/mvp/plots/cache/dhb_last_30_and_60_days_ntdun_call_total_duration univar Chart.png
+++ b/mvp/plots/cache/dhb_last_30_and_60_days_ntdun_call_total_duration univar Chart.png
--- a/mvp/plots/cache/dhb_last_30_and_60_days_ntdun_call_total_times PDP Chart.png
+++ b/mvp/plots/cache/dhb_last_30_and_60_days_ntdun_call_total_times PDP Chart.png
--- a/mvp/plots/cache/dhb_last_30_and_60_days_ntdun_call_total_times univar Chart.png
+++ b/mvp/plots/cache/dhb_last_30_and_60_days_ntdun_call_total_times univar Chart.png
--- a/mvp/plots/cache/dhb_last_30_days_dun_call_avg_duration PDP Chart.png
+++ b/mvp/plots/cache/dhb_last_30_days_dun_call_avg_duration PDP Chart.png
--- a/mvp/plots/cache/dhb_last_30_days_dun_call_avg_duration lift Chart.png
+++ b/mvp/plots/cache/dhb_last_30_days_dun_call_avg_duration lift Chart.png
--- a/mvp/plots/cache/dhb_last_30_days_dun_call_avg_duration univar Chart.png
+++ b/mvp/plots/cache/dhb_last_30_days_dun_call_avg_duration univar Chart.png
--- a/mvp/plots/cache/untitled.png
+++ b/mvp/plots/cache/untitled.png
--- a/mvp/plots/cache/内部全量客群 lift Chart.png
+++ b/mvp/plots/cache/内部全量客群 lift Chart.png
--- a/mvp/plots/cache/内部复申 lift Chart.png
+++ b/mvp/plots/cache/内部复申 lift Chart.png
--- a/mvp/plots/cache/内部首申 lift Chart.png
+++ b/mvp/plots/cache/内部首申 lift Chart.png
--- a/mvp/plots/cache/内部首贷 lift Chart.png
+++ b/mvp/plots/cache/内部首贷 lift Chart.png
--- a/mvp/refit.py
+++ b/mvp/refit.py
@@ -22,8 +22,6 @@ score = 'score'
 prediction = 'predict'
 ############################
 # 备份df_sample
 #df_sample.to_csv(str(datetime.date.today())+"dhb_samples.xlsx")
@@ -61,15 +59,24 @@ predictions ,test_auc = lightgbm.predict(lgbm,df_test,dhb.features,target)
 # 把新的预测结果加入test
 df_test[prediction] = predictions
 ####### allocator cache ############
 applied_from = {'1,214,217,198': '内部', '333': '融360', '159537': '360金融'}
 applied_type = {'1,2':'首贷','1,2,3':'全量客群','1':'首申','2':'复申','3':'复贷'}
 ####################################
 ### report
+import os
+os.chdir("E:/bla/model_mvp/")
 # plot feature importance
-topnfeat_path = matplot.topN_feature_importance(lgb, lgbm, title="untitled", save_path='./mvp/plots/', topN=20)
+topnfeat_path = matplot.topN_feature_importance(lgb, lgbm, title="untitled", save_path='./mvp/plots/cache/', topN=20)
+importanct_feat = pd.DataFrame({
+        'column': dhb.features,
+        'importance': lgbm.feature_importance(),
+    }).sort_values(by='importance',ascending=False).column.tolist()[:3]
 # report file
 report_path = "E:/bla/model_mvp/"
@@ -91,21 +98,23 @@ document.add_picture(topnfeat_path)
 document.add_paragraph('univar_chart')
 # 遍历目标features画出univarchart
-for i in dhb.features[:3]:
+for i in importanct_feat:
    univar_train = datacal.cal_univar(df_train, i, target, qcut=10)
    univar_val = datacal.cal_univar(df_val, i, target, qcut=10)
    univar_test = datacal.cal_univar(df_test, i, target, qcut=10)
+    tab_df_list = [univar_train,univar_val,univar_test]
    univarChart = matplot.plot_table_list([univar_train,univar_val,univar_test], [1,2,3], datalist_description=None, title= i +' univar Chart', X_label=None, y_label=None,
-                    tab_df_list=None, plot_tab=True,
+                    tab_df_list=tab_df_list, plot_tab=False,
                    saved_path='./mvp/plots/cache/')
-    document.add_picture('./mvp/plots/cache/' + i +' univar Chart' + ".png")
+    document.add_picture(univarChart)
 document.add_paragraph('PDP_chart')
 # 遍历目标features 画出对应PDP
-for i in dhb.features[:3]:
+for i in importanct_feat:
    pdp = datacal.cal_pdp(df=df_test, score=prediction, feature=i, qcut=10)
-    pdpChart = matplot.plot_table(pdp,title= i +' PDP Chart',saved_path='./mvp/plots/cache/')
+    pdpChart = matplot.plot_table_df(pdp, ['1'], title=i + ' PDP Chart', X_label=None, y_label=None,
-    document.add_picture('./mvp/plots/cache/' + i +' PDP Chart' + ".png")
+                  tab_df=None, plot_tab=True, saved_path='./mvp/plots/cache/')
+    document.add_picture(pdpChart)
@@ -113,13 +122,20 @@ filetool.saveDocument(document, report_path, report_name)
 document.add_paragraph('lift_chart')
 # 遍历给定渠道 & 客群 默认等频画出liftchart
-for channel in applied_from:
+try:
-    for type in applied_type:
+    for channel in ['333','159537','1,214,217,198']:
-        df_sliced = df_test[df_test.applied_type.map(lambda x : True if str(x) in type.split(',') else False) & df_test.applied_from.map(lambda x : True if str(x) in channel.split(',') else False)]
+        for type in ['1','2','3']:
-        lift = datacal.cal_liftchart(df_sliced,score=prediction)
+            df_sliced = df_test[df_test.applied_type.map(lambda x : True if str(x) in type.split(',') else False) & df_test.applied_from.map(lambda x : True if str(x) in channel.split(',') else False)]
-        liftChart = matplot.plot_table(lift, title=i +' lift Chart',saved_path='./mvp/plots/cache')
+            lift_pred = datacal.cal_lift(df_sliced,score=prediction)
-        document.add_picture("./mvp/plots/cache" + i + ' lift Chart.png')
+            lift_online = datacal.cal_lift(df_sliced,score=score)
+            #liftChart = matplot.plot_table(lift, title=i +' lift Chart',saved_path='./mvp/plots/cache/')
+            liftChart = matplot.plot_table_list([lift_pred, lift_online], [roc_auc_score(df_test[target],df_test[prediction]),roc_auc_score(df_test[target],df_test[score])], datalist_description=None,
+                                    title= applied_from[channel]+applied_type[type]+ ' lift Chart', X_label=None, y_label=None,
+                                    tab_df_list=tab_df_list, plot_tab=False,
+                                    saved_path='./mvp/plots/cache/')
+            document.add_picture(liftChart)
+except:
+    pass
 filetool.saveDocument(document, report_path, report_name)