对画图组件做了一些bug修正，新增了多个子图的支持

589bfcb3 · 王家华 · e511a80c · 589bfcb3 · 589bfcb3 · 589bfcb3
Commit 589bfcb3 authored May 17, 2019 by 王家华
28 changed files
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (model_mvp)" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
--- a/.idea/model_mvp.iml
+++ b/.idea/model_mvp.iml
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="Python 3.6 (model_mvp)" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="TestRunnerService">
+    <option name="PROJECT_TEST_RUNNER" value="Unittests" />
+  </component>
+</module>
\ No newline at end of file
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/model_mvp.iml" filepath="$PROJECT_DIR$/.idea/model_mvp.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ChangeListManager">
+    <list default="true" id="c45d2e80-934e-41cc-8f01-c6d0d282db9d" name="Default Changelist" comment="">
+      <change beforePath="$PROJECT_DIR$/graph/matplot.py" beforeDir="false" afterPath="$PROJECT_DIR$/graph/matplot.py" afterDir="false" />
+      <change beforePath="$PROJECT_DIR$/models_kit/general_methods.py" beforeDir="false" afterPath="$PROJECT_DIR$/models_kit/general_methods.py" afterDir="false" />
+      <change beforePath="$PROJECT_DIR$/models_kit/lightgbm.py" beforeDir="false" afterPath="$PROJECT_DIR$/models_kit/lightgbm.py" afterDir="false" />
+      <change beforePath="$PROJECT_DIR$/mvp/refit.py" beforeDir="false" afterPath="$PROJECT_DIR$/mvp/refit.py" afterDir="false" />
+      <change beforePath="$PROJECT_DIR$/tools/datacal.py" beforeDir="false" afterPath="$PROJECT_DIR$/tools/datacal.py" afterDir="false" />
+    </list>
+    <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
+    <option name="SHOW_DIALOG" value="false" />
+    <option name="HIGHLIGHT_CONFLICTS" value="true" />
+    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
+    <option name="LAST_RESOLUTION" value="IGNORE" />
+  </component>
+  <component name="FileEditorManager">
+    <leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
+      <file pinned="false" current-in-tab="false">
+        <entry file="file://$PROJECT_DIR$/models_kit/general_methods.py">
+          <provider selected="true" editor-type-id="text-editor">
+            <state relative-caret-position="289">
+              <caret line="17" selection-start-line="17" selection-end-line="17" />
+            </state>
+          </provider>
+        </entry>
+      </file>
+      <file pinned="false" current-in-tab="false">
+        <entry file="file://$PROJECT_DIR$/mvp/allocator.py">
+          <provider selected="true" editor-type-id="text-editor">
+            <state>
+              <folding>
+                <element signature="e#0#19#0" expanded="true" />
+              </folding>
+            </state>
+          </provider>
+        </entry>
+      </file>
+      <file pinned="false" current-in-tab="true">
+        <entry file="file://$PROJECT_DIR$/mvp/refit.py">
+          <provider selected="true" editor-type-id="text-editor">
+            <state relative-caret-position="-986">
+              <caret line="26" column="13" lean-forward="true" selection-start-line="26" selection-start-column="13" selection-end-line="26" selection-end-column="13" />
+              <folding>
+                <element signature="e#0#30#0" expanded="true" />
+              </folding>
+            </state>
+          </provider>
+        </entry>
+      </file>
+      <file pinned="false" current-in-tab="false">
+        <entry file="file://$PROJECT_DIR$/tools/filetool.py">
+          <provider selected="true" editor-type-id="text-editor">
+            <state relative-caret-position="-102">
+              <folding>
+                <element signature="e#0#9#0" expanded="true" />
+              </folding>
+            </state>
+          </provider>
+        </entry>
+      </file>
+      <file pinned="false" current-in-tab="false">
+        <entry file="file://$PROJECT_DIR$/tools/datacal.py">
+          <provider selected="true" editor-type-id="text-editor">
+            <state relative-caret-position="289">
+              <caret line="68" column="46" lean-forward="true" selection-start-line="68" selection-start-column="46" selection-end-line="68" selection-end-column="46" />
+              <folding>
+                <element signature="e#0#19#0" expanded="true" />
+              </folding>
+            </state>
+          </provider>
+        </entry>
+      </file>
+      <file pinned="false" current-in-tab="false">
+        <entry file="file://$PROJECT_DIR$/datasource/mongodb.py">
+          <provider selected="true" editor-type-id="text-editor">
+            <state relative-caret-position="493">
+              <caret line="29" selection-start-line="29" selection-end-line="29" />
+              <folding>
+                <element signature="e#0#14#0" expanded="true" />
+              </folding>
+            </state>
+          </provider>
+        </entry>
+      </file>
+      <file pinned="false" current-in-tab="false">
+        <entry file="file://$PROJECT_DIR$/models_obj/dhb_obj.py">
+          <provider selected="true" editor-type-id="text-editor">
+            <state relative-caret-position="-952">
+              <caret line="19" column="137" selection-start-line="19" selection-start-column="125" selection-end-line="19" selection-end-column="137" />
+              <folding>
+                <element signature="e#0#19#0" expanded="true" />
+              </folding>
+            </state>
+          </provider>
+        </entry>
+      </file>
+      <file pinned="false" current-in-tab="false">
+        <entry file="file://$PROJECT_DIR$/graph/matplot.py">
+          <provider selected="true" editor-type-id="text-editor">
+            <state relative-caret-position="452">
+              <caret line="101" column="43" lean-forward="true" selection-start-line="101" selection-start-column="43" selection-end-line="101" selection-end-column="43" />
+            </state>
+          </provider>
+        </entry>
+      </file>
+      <file pinned="false" current-in-tab="false">
+        <entry file="file://$PROJECT_DIR$/models_kit/lightgbm.py">
+          <provider selected="true" editor-type-id="text-editor">
+            <state relative-caret-position="408">
+              <caret line="165" selection-start-line="165" selection-end-line="169" />
+              <folding>
+                <element signature="e#0#22#0" expanded="true" />
+              </folding>
+            </state>
+          </provider>
+        </entry>
+      </file>
+      <file pinned="false" current-in-tab="false">
+        <entry file="file://$PROJECT_DIR$/models_kit/xgboost.py">
+          <provider selected="true" editor-type-id="text-editor">
+            <state>
+              <folding>
+                <element signature="e#0#19#0" expanded="true" />
+              </folding>
+            </state>
+          </provider>
+        </entry>
+      </file>
+    </leaf>
+  </component>
+  <component name="Git.Settings">
+    <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
+  </component>
+  <component name="IdeDocumentHistory">
+    <option name="CHANGED_PATHS">
+      <list>
+        <option value="$PROJECT_DIR$/models_kit/general_methods.py" />
+        <option value="$PROJECT_DIR$/models_kit/lightgbm.py" />
+        <option value="$PROJECT_DIR$/mvp/refit.py" />
+        <option value="$PROJECT_DIR$/graph/matplot.py" />
+        <option value="$PROJECT_DIR$/tools/datacal.py" />
+      </list>
+    </option>
+  </component>
+  <component name="ProjectConfigurationFiles">
+    <option name="files">
+      <list>
+        <option value="$PROJECT_DIR$/.idea/model_mvp.iml" />
+        <option value="$PROJECT_DIR$/.idea/vcs.xml" />
+        <option value="$PROJECT_DIR$/.idea/misc.xml" />
+        <option value="$PROJECT_DIR$/.idea/modules.xml" />
+      </list>
+    </option>
+  </component>
+  <component name="ProjectFrameBounds" extendedState="6">
+    <option name="x" value="174" />
+    <option name="y" value="167" />
+    <option name="width" value="1400" />
+    <option name="height" value="831" />
+  </component>
+  <component name="ProjectView">
+    <navigator proportions="" version="1">
+      <foldersAlwaysOnTop value="true" />
+    </navigator>
+    <panes>
+      <pane id="Scope" />
+      <pane id="ProjectPane">
+        <subPane>
+          <expand>
+            <path>
+              <item name="model_mvp" type="b2602c69:ProjectViewProjectNode" />
+              <item name="model_mvp" type="462c0819:PsiDirectoryNode" />
+            </path>
+            <path>
+              <item name="model_mvp" type="b2602c69:ProjectViewProjectNode" />
+              <item name="model_mvp" type="462c0819:PsiDirectoryNode" />
+              <item name="datasource" type="462c0819:PsiDirectoryNode" />
+            </path>
+            <path>
+              <item name="model_mvp" type="b2602c69:ProjectViewProjectNode" />
+              <item name="model_mvp" type="462c0819:PsiDirectoryNode" />
+              <item name="feature" type="462c0819:PsiDirectoryNode" />
+            </path>
+            <path>
+              <item name="model_mvp" type="b2602c69:ProjectViewProjectNode" />
+              <item name="model_mvp" type="462c0819:PsiDirectoryNode" />
+              <item name="graph" type="462c0819:PsiDirectoryNode" />
+            </path>
+            <path>
+              <item name="model_mvp" type="b2602c69:ProjectViewProjectNode" />
+              <item name="model_mvp" type="462c0819:PsiDirectoryNode" />
+              <item name="models_kit" type="462c0819:PsiDirectoryNode" />
+            </path>
+            <path>
+              <item name="model_mvp" type="b2602c69:ProjectViewProjectNode" />
+              <item name="model_mvp" type="462c0819:PsiDirectoryNode" />
+              <item name="models_obj" type="462c0819:PsiDirectoryNode" />
+            </path>
+            <path>
+              <item name="model_mvp" type="b2602c69:ProjectViewProjectNode" />
+              <item name="model_mvp" type="462c0819:PsiDirectoryNode" />
+              <item name="mvp" type="462c0819:PsiDirectoryNode" />
+            </path>
+            <path>
+              <item name="model_mvp" type="b2602c69:ProjectViewProjectNode" />
+              <item name="model_mvp" type="462c0819:PsiDirectoryNode" />
+              <item name="mvp" type="462c0819:PsiDirectoryNode" />
+              <item name="plots" type="462c0819:PsiDirectoryNode" />
+            </path>
+            <path>
+              <item name="model_mvp" type="b2602c69:ProjectViewProjectNode" />
+              <item name="model_mvp" type="462c0819:PsiDirectoryNode" />
+              <item name="mvp" type="462c0819:PsiDirectoryNode" />
+              <item name="refit_pkls" type="462c0819:PsiDirectoryNode" />
+            </path>
+            <path>
+              <item name="model_mvp" type="b2602c69:ProjectViewProjectNode" />
+              <item name="model_mvp" type="462c0819:PsiDirectoryNode" />
+              <item name="tools" type="462c0819:PsiDirectoryNode" />
+            </path>
+            <path>
+              <item name="model_mvp" type="b2602c69:ProjectViewProjectNode" />
+              <item name="External Libraries" type="cb654da1:ExternalLibrariesNode" />
+            </path>
+          </expand>
+          <select />
+        </subPane>
+      </pane>
+    </panes>
+  </component>
+  <component name="PropertiesComponent">
+    <property name="ASKED_SHARE_PROJECT_CONFIGURATION_FILES" value="true" />
+    <property name="restartRequiresConfirmation" value="false" />
+    <property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
+  </component>
+  <component name="RunDashboard">
+    <option name="ruleStates">
+      <list>
+        <RuleState>
+          <option name="name" value="ConfigurationTypeDashboardGroupingRule" />
+        </RuleState>
+        <RuleState>
+          <option name="name" value="StatusDashboardGroupingRule" />
+        </RuleState>
+      </list>
+    </option>
+  </component>
+  <component name="RunManager" selected="Python.lightgbm">
+    <configuration name="lightgbm" type="PythonConfigurationType" factoryName="Python" temporary="true">
+      <module name="model_mvp" />
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/models_kit" />
+      <option name="IS_MODULE_SDK" value="true" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/models_kit/lightgbm.py" />
+      <option name="PARAMETERS" value="" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <option name="EMULATE_TERMINAL" value="false" />
+      <option name="MODULE_MODE" value="false" />
+      <option name="REDIRECT_INPUT" value="false" />
+      <option name="INPUT_FILE" value="" />
+      <method v="2" />
+    </configuration>
+    <configuration name="refit" type="PythonConfigurationType" factoryName="Python" temporary="true">
+      <module name="model_mvp" />
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/mvp" />
+      <option name="IS_MODULE_SDK" value="true" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/mvp/refit.py" />
+      <option name="PARAMETERS" value="" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <option name="EMULATE_TERMINAL" value="false" />
+      <option name="MODULE_MODE" value="false" />
+      <option name="REDIRECT_INPUT" value="false" />
+      <option name="INPUT_FILE" value="" />
+      <method v="2" />
+    </configuration>
+    <recent_temporary>
+      <list>
+        <item itemvalue="Python.lightgbm" />
+        <item itemvalue="Python.refit" />
+      </list>
+    </recent_temporary>
+  </component>
+  <component name="SvnConfiguration">
+    <configuration />
+  </component>
+  <component name="TaskManager">
+    <task active="true" id="Default" summary="Default task">
+      <changelist id="c45d2e80-934e-41cc-8f01-c6d0d282db9d" name="Default Changelist" comment="" />
+      <created>1557804124990</created>
+      <option name="number" value="Default" />
+      <option name="presentableId" value="Default" />
+      <updated>1557804124990</updated>
+    </task>
+    <servers />
+  </component>
+  <component name="ToolWindowManager">
+    <frame x="-8" y="-8" width="1936" height="1066" extended-state="6" />
+    <layout>
+      <window_info content_ui="combo" id="Project" order="0" visible="true" weight="0.17492098" />
+      <window_info id="Structure" order="1" side_tool="true" weight="0.25" />
+      <window_info id="Favorites" order="2" side_tool="true" />
+      <window_info anchor="bottom" id="Message" order="0" />
+      <window_info anchor="bottom" id="Find" order="1" />
+      <window_info anchor="bottom" id="Run" order="2" weight="0.3290461" />
+      <window_info anchor="bottom" id="Debug" order="3" weight="0.39978564" />
+      <window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
+      <window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
+      <window_info anchor="bottom" id="TODO" order="6" />
+      <window_info anchor="bottom" id="Version Control" order="7" />
+      <window_info anchor="bottom" id="Terminal" order="8" weight="0.3290461" />
+      <window_info anchor="bottom" id="Event Log" order="9" side_tool="true" />
+      <window_info active="true" anchor="bottom" id="Python Console" order="10" visible="true" weight="0.31511253" />
+      <window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
+      <window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
+      <window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
+    </layout>
+  </component>
+  <component name="editorHistoryManager">
+    <entry file="file://C:/ProgramData/Anaconda3/Lib/site-packages/matplotlib/backends/qt_compat.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="729">
+          <caret line="203" selection-start-line="203" selection-end-line="203" />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/models_kit/general_methods.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="289">
+          <caret line="17" selection-start-line="17" selection-end-line="17" />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/mvp/allocator.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state>
+          <folding>
+            <element signature="e#0#19#0" expanded="true" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$APPLICATION_HOME_DIR$/helpers/pydev/_pydev_imps/_pydev_execfile.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="289">
+          <caret line="17" column="57" selection-start-line="17" selection-start-column="57" selection-end-line="17" selection-end-column="57" />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/models_kit/xgboost.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state>
+          <folding>
+            <element signature="e#0#19#0" expanded="true" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/models_kit/lightgbm.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="408">
+          <caret line="165" selection-start-line="165" selection-end-line="169" />
+          <folding>
+            <element signature="e#0#22#0" expanded="true" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/datasource/mongodb.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="493">
+          <caret line="29" selection-start-line="29" selection-end-line="29" />
+          <folding>
+            <element signature="e#0#14#0" expanded="true" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/models_obj/dhb_obj.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="-952">
+          <caret line="19" column="137" selection-start-line="19" selection-start-column="125" selection-end-line="19" selection-end-column="137" />
+          <folding>
+            <element signature="e#0#19#0" expanded="true" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/tools/filetool.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="-102">
+          <folding>
+            <element signature="e#0#9#0" expanded="true" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/graph/matplot.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="452">
+          <caret line="101" column="43" lean-forward="true" selection-start-line="101" selection-start-column="43" selection-end-line="101" selection-end-column="43" />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/tools/datacal.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="289">
+          <caret line="68" column="46" lean-forward="true" selection-start-line="68" selection-start-column="46" selection-end-line="68" selection-end-column="46" />
+          <folding>
+            <element signature="e#0#19#0" expanded="true" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/mvp/refit.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="-986">
+          <caret line="26" column="13" lean-forward="true" selection-start-line="26" selection-start-column="13" selection-end-line="26" selection-end-column="13" />
+          <folding>
+            <element signature="e#0#30#0" expanded="true" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+  </component>
+  <component name="masterDetails">
+    <states>
+      <state key="ScopeChooserConfigurable.UI">
+        <settings>
+          <splitter-proportions>
+            <option name="proportions">
+              <list>
+                <option value="0.2" />
+              </list>
+            </option>
+          </splitter-proportions>
+        </settings>
+      </state>
+    </states>
+  </component>
+</project>
\ No newline at end of file
--- a/datasource/__pycache__/__init__.cpython-36.pyc
+++ b/datasource/__pycache__/__init__.cpython-36.pyc
--- a/datasource/__pycache__/mongodb.cpython-36.pyc
+++ b/datasource/__pycache__/mongodb.cpython-36.pyc
--- a/datasource/__pycache__/mysqldb.cpython-36.pyc
+++ b/datasource/__pycache__/mysqldb.cpython-36.pyc
--- a/graph/__pycache__/__init__.cpython-36.pyc
+++ b/graph/__pycache__/__init__.cpython-36.pyc
--- a/graph/__pycache__/matplot.cpython-36.pyc
+++ b/graph/__pycache__/matplot.cpython-36.pyc
--- a/graph/matplot.py
+++ b/graph/matplot.py
@@ -4,43 +4,123 @@ Created on Thu Apr 18 11:32:06 2019
 @author: Jason Wang
 """

-
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
-import seaborn as sns 
+import seaborn as sns

 ############# plot config ###############
 plt.rcParams['font.sans-serif'] = ['SimHei']
 plt.rcParams['axes.unicode_minus'] = False
-plt.rcParams['savefig.dpi'] = 226 #图片像素 
-plt.rcParams['figure.dpi'] = 200 #分辨率
+plt.rcParams['savefig.dpi'] = 226  # 图片像素
+plt.rcParams['figure.dpi'] = 200  # 分辨率


-def topN_feature_importance(model, clf, title="untitled", save_path = './plots/', topN=20):
+def topN_feature_importance(model, clf, title="untitled", save_path='./mvp/plots/', topN=20):
    '''
    plot feature importance squence
    params:
        classifier
    '''
-    plt.rcParams['font.sans-serif'] = ['SimHei']
-    plt.rcParams['axes.unicode_minus'] = False
-    plt.rcParams['savefig.dpi'] = 226  # 图片像素
-    plt.rcParams['figure.dpi'] = 200  # 分辨率
    plt.figure(figsize=(10, 6))
-    model.plot_importance(clf, max_num_features = topN)
+    model.plot_importance(clf, max_num_features=topN)
    plt.title("Feature Importances")

-    path = save_path + title + "featureImportance.png"
+    path = save_path + title + " featureImportance.png"
    plt.savefig(path)
    plt.show()
    return path


-def plot_table(dataset, auc, title='untitled', X_label=None, y_label=None,
-               tab_df=None, plot_tab=True, saved_path=None):
+def plot_table_list(datalist, auc, datalist_description=None, title='untitled', X_label=None, y_label=None,
+                    tab_df_list=None, plot_tab=True,
+                    tab_rows=None, saved_path=None):
    '''
-    instructions : visualization of pivot
+        instructions : visualization of pivot with given list of dataframe
+        Params :
+            dataset -
+            auc - auc list / array
+            title - title of plot('untitled' as default)
+            x_label - X axis label of plot
+            y_label - y axis label of plot
+            plot_tab - plot table or not , default as True
+            saved_path - saved path, set as None as there has no download needs
+        '''
+    fig, axs = plt.subplots(1, 1, figsize=(13, 9), linewidth=0.1)
+
+
+
+    # datalist description
+    if datalist_description is None:
+        datalist_description = range(len(datalist))
+
+    for table_index in range(len(datalist)):
+        # 每个table需要只有一个index，一个values
+        x = range(len(datalist[table_index].index))
+        y = datalist[table_index].values
+        axs.plot(x, y, label=datalist_description[table_index])
+        if len(x) == 1:
+            plot_tab = False
+    if plot_tab:
+        table_rows = []
+        table_cols = range(len(datalist[table_index].index))
+        tab_df = []
+        if tab_df_list is None:
+            for data in datalist:
+                tab_df.append(
+                    pd.Series(data.index).astype(str).map(lambda x: x.replace(' ', '').replace('0.', '.')).tolist())
+                tab_df.append(
+                    pd.Series(data.values).astype(str).map(lambda x: x.replace(' ', '').replace('0.', '.')).tolist())
+                # validate tab_rows
+                if tab_rows is None:
+                    table_rows.append('index');
+                    table_rows.append('values')
+                else:
+                    # tab_rows was given by
+                    table_rows = table_rows + tab_rows
+        else:
+            for data in tab_df_list:
+                tab_df.append(
+                    pd.Series(data.index).astype(str).map(lambda x: x.replace(' ', '').replace('0.', '.')).tolist())
+                tab_df.append(
+                    pd.Series(data.values).astype(str).map(lambda x: x.replace(' ', '').replace('0.', '.')).tolist())
+                # validate tab_rows
+                if tab_rows is None:
+                    table_rows.append('index')
+                    table_rows.append('values')
+                else:
+                    # tab_rows was given by
+                    table_rows = table_rows + tab_rows
+
+        the_table = plt.table(cellText=tab_df,
+                              rowLabels=table_rows,
+                              colLabels=table_cols,
+                              colWidths=[0.91 / (len(table_cols) - 1)] * len(table_cols),
+                              loc='bottom')
+        plt.xticks([])
+    # otherwise, nothing to do here
+        the_table.auto_set_font_size(False)
+        the_table.set_fontsize(8)
+    fig.subplots_adjust(bottom=0.2)
+    plt.grid()
+    if y_label is not None:
+        plt.ylabel(y_label)
+    if X_label is not None:
+        plt.xlabel(X_label)
+    plt.legend()
+    # plt.vlines(xrange(len(cols))0],y,color='lightgrey',linestyle='--')
+    plt.title(title)
+    if saved_path is not None:
+        plt.savefig(saved_path + title + ".png")
+    plt.show()
+    return 1
+
+
+def plot_table_df(dataset, auc, title='untitled', X_label=None, y_label=None,
+                  tab_df=None, plot_tab=True, saved_path=None):
+    print(tab_df)
+    '''
+    instructions : visualization of pivot with single dataframe
    Params :
        dataset -
        auc - auc list / array
@@ -50,21 +130,27 @@ def plot_table(dataset, auc, title='untitled', X_label=None, y_label=None,
        plot_tab - plot table or not , default as True
        saved_path - saved path, set as None as there has no download needs
    '''
-    plt.rcParams['font.sans-serif'] = ['SimHei']
-    plt.rcParams['axes.unicode_minus'] = False
-    plt.rcParams['savefig.dpi'] = 226  # 图片像素
-    plt.rcParams['figure.dpi'] = 100  # 分辨率
-    fig, axs = plt.subplots(1, 1, figsize=(6, 6), linewidth=0.1)
+    fig, axs = plt.subplots(1, 1, figsize=(13, 9), linewidth=0.1)
+
    table_rows = dataset.columns
-    table_cols = dataset.index
+    table_cols = pd.Series(dataset.index).astype(str).map(lambda x: x.replace(' ', '')).map(
+        lambda x : x.replace('0.', '.'))
+
    # traverse each columns of dataframe
-    for i in table_rows:
-        x = table_cols
-        y = dataset[i]
-        axs.plot(x, y, label=str(i) + ' AUC: ' + str(auc[i]))
+    for i in range(len(table_rows)):
+        x = range(len(table_cols))
+        y = dataset.iloc[:, i]
+        axs.plot(x, y, label=str(table_rows[i]) + ' AUC: ' + str(auc[i]))
    # if table should be plot
    if plot_tab:
-        the_table = plt.table(cellText=[list(dataset.iloc[i, :].values) for i in range(len(dataset))],
+        if tab_df is None:
+
+            tab_df = [list(dataset.iloc[:, 1].values) for i in range(len(table_rows))]
+        else:
+            table_rows = tab_df.columns
+            table_cols = tab_df.index
+            tab_df = [list(tab_df.iloc[:, 1].values) for i in range(len(table_rows))]
+        the_table = plt.table(cellText=tab_df,
                              rowLabels=table_rows,
                              colLabels=table_cols,
                              colWidths=[0.91 / (len(table_cols) - 1)] * len(table_cols),
@@ -72,7 +158,7 @@ def plot_table(dataset, auc, title='untitled', X_label=None, y_label=None,
        plt.xticks([])
    # otherwise, nothing to do here
    the_table.auto_set_font_size(False)
-    the_table.set_fontsize(6)
+    the_table.set_fontsize(9)
    fig.subplots_adjust(bottom=0.2)
    plt.grid()
    if y_label is not None:
@@ -82,98 +168,99 @@ def plot_table(dataset, auc, title='untitled', X_label=None, y_label=None,
    plt.legend()
    # plt.vlines(xrange(len(cols))0],y,color='lightgrey',linestyle='--')
    plt.title(title)
+    if saved_path is not None:
+        plt.savefig(saved_path + title + ".png")
    plt.show()
    return 1



-def plot_curve_singleCurve(dataset, x_label = None, y_label = None,table_tab = None,
-                           save_path = None, figure_arrangement = 11, fig_size = (4,3),
-                           fig_title='General Plot', fig_name = 'untitled',
-                           fig_path = None):

-    
+
+
+def plot_curve_singleCurve(dataset, x_label=None, y_label=None, table_tab=None,
+                           save_path=None, figure_arrangement=11, fig_size=(4, 3),
+                           fig_title='General Plot', fig_name='untitled',
+                           fig_path=None):
    col = dataset.columns
    index = pd.Series(dataset.index.sort_values()).astype(str)
    plt.figure(figsize=fig_size)
    metric = figure_arrangement // 10 * figure_arrangement % 10
-    
+
    for i in range(int(np.ceil(len(col) // metric))):
-        
+
        cols = col[i * metric:]
        for fig_ith in range(len(cols)):
            axs = plt.subplot(figure_arrangement * 10 + 1 + fig_ith)
-            axs.plot(index,dataset.loc[cols[fig_ith]])
-            axs.set_title(cols[fig_ith],fontsize = 7)
-            plt.xticks(fontsize = 5)
-            plt.yticks(fontsize = 5)
+            axs.plot(index, dataset.loc[cols[fig_ith]])
+            axs.set_title(cols[fig_ith], fontsize=7)
+            plt.xticks(fontsize=5)
+            plt.yticks(fontsize=5)
            plt.grid()

            if x_label != None:
-                axs.set_xlabel(x_label, fontsize = 5)
-                if y_label != None:        
-                    axs.set_ylabel(y_label, fontsize = 5)
+                axs.set_xlabel(x_label, fontsize=5)
+                if y_label != None:
+                    axs.set_ylabel(y_label, fontsize=5)
        plt.tight_layout()
        plt.show()
    return 1
-    


-
-#fig,axs = plt.subplots(1,1,figsize=(16,9),linewidth=0.1)
-
+# fig,axs = plt.subplots(1,1,figsize=(16,9),linewidth=0.1)


 #
-#for fig_ith in range(len(df.columns)):
+# for fig_ith in range(len(df.columns)):
 #    axs = plt.subplot(figure_arrangement * 10 + 1 + fig_ith)
 #    axs.plot(df.index,df.iloc[fig_ith])
 #    axs.set_title(col[])
-#plt.tight_layout()
-
-def plot_curve_multiCurve(dataset, x_label = None, y_label = None,table_tab = None,
-                           save_path = None, figure_arrangement = 11, fig_size = (4,3),
-                           fig_title='General Plot', fig_name = 'untitled',
-                           fig_path = None):
+# plt.tight_layout()

+def plot_curve_multiCurve(dataset, x_label=None, y_label=None, table_tab=None,
+                          save_path=None, figure_arrangement=11, fig_size=(4, 3),
+                          fig_title='General Plot', fig_name='untitled',
+                          fig_path=None):
    col = dataset.columns
    index = pd.Series(dataset.index.sort_values()).astype(str)
    plt.figure(figsize=fig_size)
-    #metric = figure_arrangement // 10 * figure_arrangement % 10
-       
-        #cols = col[i * metric:]
+    # metric = figure_arrangement // 10 * figure_arrangement % 10
+
+    # cols = col[i * metric:]
    axs = plt.subplot(111)
-    for fig_ith in range(len(col)):            
-        axs.plot(index,dataset.loc[col[fig_ith]],label=col[fig_ith])
-    axs.set_title(col[fig_ith],fontsize = 7)
-    plt.xticks(fontsize = 5)
-    plt.yticks(fontsize = 5)
+    for fig_ith in range(len(col)):
+        axs.plot(index, dataset.loc[col[fig_ith]], label=col[fig_ith])
+    axs.set_title(col[fig_ith], fontsize=7)
+    plt.xticks(fontsize=5)
+    plt.yticks(fontsize=5)
    plt.grid()

    if x_label != None:
-        axs.set_xlabel(x_label, fontsize = 5)
-    if y_label != None:        
-        axs.set_ylabel(y_label, fontsize = 5)
+        axs.set_xlabel(x_label, fontsize=5)
+    if y_label != None:
+        axs.set_ylabel(y_label, fontsize=5)
    plt.legend()
    plt.tight_layout()
    plt.show()
    return 1
-    
+
+
 '''

 '''
+
+
 def plot_curve_mingle():
    return 1
-    
-    
-def density_chart(dataset,title):
+
+
+def density_chart(dataset, title):
    for col in dataset.columns:
-        sns.kdeplot(dataset.loc[:,col],label = col)
+        sns.kdeplot(dataset.loc[:, col], label=col)
    plt.title(title)
    plt.show()

-        
-#        
+#
 #	    alpha = 0.98 / 4 * fig_ith + 0.01
 #	    ax.set_title('%.3f' % alpha)
 #	    t1 = np.arange(0.0, 1.0, 0.01)
@@ -194,4 +281,4 @@ def density_chart(dataset,title):
 ##	for i in range(figure_arrangement%10):
 ##		plt.subplots(,figsize=fig_size,linewidth=0.1)
 #
-#	return 1
\ No newline at end of file
+#	return 1
--- a/models_kit/__pycache__/__init__.cpython-36.pyc
+++ b/models_kit/__pycache__/__init__.cpython-36.pyc
--- a/models_kit/__pycache__/lightgbm.cpython-36.pyc
+++ b/models_kit/__pycache__/lightgbm.cpython-36.pyc
--- a/models_kit/__pycache__/xgboost.cpython-36.pyc
+++ b/models_kit/__pycache__/xgboost.cpython-36.pyc
--- a/models_kit/general_methods.py
+++ b/models_kit/general_methods.py
-def topN_feature_importance(classifier, clf, topN=20, model=lgb):
+import matplotlib.pyplot as plt
+
+
+
+def topN_feature_importance(classifier, clf ,mode , topN=20):
    '''
    plot feature importance squence
    '''
@@ -11,3 +15,4 @@ def topN_feature_importance(classifier, clf, topN=20, model=lgb):
    plt.title("Feature Importances")
    plt.show()

+
--- a/models_kit/lightgbm.py
+++ b/models_kit/lightgbm.py
@@ -44,8 +44,7 @@ def returnAUC(clf, training_set, validation_set, features, target='target'):
    return train_auc, val_auc


-def train_lgbm(params, df_train, df_val, features, adds_on=None, target='target',
-               featureImportance_path = '../mvp/plots/', topN_featureImportance=20, featureImportance_title='lightgbm'):
+def train_lgbm(params, df_train, df_val, features, adds_on=None, target='target'):
    '''
    instructions : training lightgbm model with specified params

@@ -70,8 +69,6 @@ def train_lgbm(params, df_train, df_val, features, adds_on=None, target='target'

    lgbm = lgb.train(params, lgb_train, valid_sets=lgb_val, verbose_eval=False)
    train_auc, val_auc = returnAUC(lgbm, df_train, df_val, features)
-    matplot.topN_feature_importance(lgb, lgbm, title=featureImportance_title,
-                                    save_path = featureImportance_path, topN=topN_featureImportance)
    # auc = roc_auc_score(dev['target'],gbm.predict(dev[features]))
    return train_auc, val_auc, lgbm

@@ -102,12 +99,12 @@ def lgb_params_tuning(params, features, train, val, target='target', topN=3, cv_
    print('Memory Occupancy Rate: ' + (str)(psutil.virtual_memory().percent) + '%')
    optimal_para = list(topn)

-    for deepth in np.arange(2, 7, 1):
-        for leaves in np.arange(2, 2 ** deepth, 2):
+    for deepth in np.arange(2, 4, 1):
+        for leaves in np.arange(2, 2 ** deepth, 4):
            params['max_depth'] = deepth
            params['num_leaves'] = leaves
            print("parameter combination : ", 'max_depth ', deepth, 'num_leaves ', leaves)
-            cv_result = lgb.cv(params, lgb_train, seed=7, nfold=cv_fold, verbose_eval=False)
+            cv_result = lgb.cv(params, lgb_train, seed=7, nfold=cv_fold, verbose_eval=30)
            # return max auc(best performance)
            auc_score = pd.Series(cv_result['auc-mean']).max()
            print('auc ', auc_score)
@@ -122,7 +119,7 @@ def lgb_params_tuning(params, features, train, val, target='target', topN=3, cv_
                para['max_depth'] = deepth
                para['num_leaves'] = leaves
                optimal_para[topn.argmin()] = para
-    return optimal_para, topn
+    return optimal_para, list(topn)


 #        training_curve.append(train_auc)
@@ -168,7 +165,7 @@ def lgb_params_tuning(params, features, train, val, target='target', topN=3, cv_

 def predict(lgbm,df_test,features,target='target'):
    predictions = lgbm.predict(df_test[features])
-    auc = roc_auc_score(predictions,df_test[target])
+    auc = roc_auc_score(df_test[target],predictions)
    return predictions, auc



--- a/models_obj/__pycache__/__init__.cpython-36.pyc
+++ b/models_obj/__pycache__/__init__.cpython-36.pyc
--- a/models_obj/__pycache__/dhb_obj.cpython-36.pyc
+++ b/models_obj/__pycache__/dhb_obj.cpython-36.pyc
--- a/mvp/plots/cache/dhb_last_30_and_60_days_dun_call_avg_duration univar Chart.png
+++ b/mvp/plots/cache/dhb_last_30_and_60_days_dun_call_avg_duration univar Chart.png
--- a/mvp/plots/cache/dhb_last_30_and_60_days_dun_call_duration_above60 univar Chart.png
+++ b/mvp/plots/cache/dhb_last_30_and_60_days_dun_call_duration_above60 univar Chart.png
--- a/mvp/plots/cache/dhb_last_30_and_60_days_dun_call_duration_below15 univar Chart.png
+++ b/mvp/plots/cache/dhb_last_30_and_60_days_dun_call_duration_below15 univar Chart.png
--- a/mvp/plots/cachedhb_last_30_and_60_days_dun_call_avg_duration univar Chart.png
+++ b/mvp/plots/cachedhb_last_30_and_60_days_dun_call_avg_duration univar Chart.png
--- a/mvp/plots/untitled featureImportance.png
+++ b/mvp/plots/untitled featureImportance.png
--- a/mvp/refit.py
+++ b/mvp/refit.py
@@ -6,85 +6,122 @@ from models_kit import xgboost
 import lightgbm as lgb
 from graph import matplot
 from tools import filetool
+from sklearn.metrics import roc_auc_score


 dhb = dhb_obj.dhb(features=None, sql=None, start_time_period=None, end_time_period=None,passdue_day=15)
-
 # 提取样本
 #df_sample = dhb.dhb_features_extract()


 ######### temp #############
 import pandas as pd
-df_sample = pd.read_csv('E:\\model\\model_mvp\\mvp\\dhb_loan_sample——2019-04-23.csv',engine='python')
+df_sample = pd.read_csv('E:\\model\\model_mvp\\mvp\\sample.csv',engine='python')
+target = 'target'
+score = 'score'
+prediction = 'predict'

 ############################


 # 备份df_sample
-df_sample.to_csv(str(datetime.date.today())+"dhb_samples.xlsx")
+#df_sample.to_csv(str(datetime.date.today())+"dhb_samples.xlsx")


 # 默认样本划分
-df_train, df_val, df_test = datacal.train_test_split_general(df_sample, val_size=0.2, test_size=0.2, stratify='target',
+df_train, df_val, df_test = datacal.train_test_split_general(df_sample, val_size=0.2, test_size=0.2, stratify=target,
                                                             random_state=7,split_methods='random',
                                                             time_label='applied_at')
 del df_sample
 # 用交叉验证获取最优参optimal_para和对应参数在CV验证集上最优AUC列表topn
-optimal_para,topn = lightgbm.lgb_params_tuning(lightgbm.params_lgb, dhb.features, df_train, df_val, target='target',
+optimal_para,topn = lightgbm.lgb_params_tuning(lightgbm.params_lgb, dhb.features, df_train, df_val, target=target,
                                               topN=3, cv_fold=5)
 print('topn 通过train交叉验证得到的auc ',topn)
-# 用新参数(optimal_para)训练模型,adds_on是需要修改的参数字典,输出feature Importance
-train_auc, val_auc, lgbm = lightgbm.train_lgbm(lightgbm.params_lgb, df_train, df_val, dhb.features,
-                                               adds_on=optimal_para, target='target')

-predictions ,test_auc = lightgbm.predict(lgbm,df_test,features=dhb.features)
-df_test['predict'] = predictions
+# model matrix
+model_matrix_index = ['name','Params','trainAUC','validationAUC']
+model_matrix = pd.DataFrame(['NULL','NULL',roc_auc_score(df_train[target],df_train[score]),roc_auc_score(df_train[target],df_train[score])],index=model_matrix_index,columns=['线上模型'])

+pointer = 0
+for param in optimal_para:
+    train_auc, val_auc, lgbm = lightgbm.train_lgbm(lightgbm.params_lgb, df_train, df_val, dhb.features,
+                                                   adds_on=param, target=target)
+    model_matrix = pd.concat([model_matrix, pd.DataFrame(['lightGBM', param, train_auc, val_auc], index=model_matrix_index, columns=[pointer])],axis=1)
+    pointer += 1

+# 简单选取一下validation set auc 最高的 params
+best_params = model_matrix.T.sort_values(by='validationAUC',ascending=False).iloc[0,:].loc['Params']
+
+# 用新参数(optimal_para)训练模型,adds_on是需要修改的参数字典,输出feature Importance
+train_auc, val_auc, lgbm = lightgbm.train_lgbm(lightgbm.params_lgb, df_train, df_val, dhb.features,
+                                               adds_on=best_params, target='target')

+# 用新模型预测结果
+predictions ,test_auc = lightgbm.predict(lgbm,df_test,dhb.features,target)
+# 把新的预测结果加入test
+df_test[prediction] = predictions

 ####### allocator cache ############
 applied_from = {'1,214,217,198': '内部', '333': '融360', '159537': '360金融'}
-applied_type = {'1,2':'首贷','1,2,3':'首付贷','1':'首申','2':'复申','3':'复贷'}
+applied_type = {'1,2':'首贷','1,2,3':'全量客群','1':'首申','2':'复申','3':'复贷'}
 ####################################

-
 ### report

 # plot feature importance
-path = matplot.topN_feature_importance(lgb, lgbm, title="untitled", save_path='./plots/', topN=20)
+topnfeat_path = matplot.topN_feature_importance(lgb, lgbm, title="untitled", save_path='./mvp/plots/', topN=20)
+
 # report file
-report_path = "E:\\bla\\"
+report_path = "E:/bla/model_mvp/"
 report_name = "lgb_report.docx"

+# 生成docx Documents
 document = filetool.buildDocument(report_path, report_name)

+# docx加入title
 document.add_heading('lightGBM 算法refit报告')

-filetool.Document.add_paragraph('特征权重图')
+# docx新增 特征权重段
+document.add_paragraph('特征权重图')
+
+# docx加入特征权重图像
+document.add_picture(topnfeat_path)

-filetool.add_picture(path)
+# 新增 univar_chart段
+document.add_paragraph('univar_chart')

-filetool.Document.add_paragraph('univar_chart')
+# 遍历目标features画出univarchart
+for i in dhb.features[:3]:
+    univar_train = datacal.cal_univar(df_train, i, target, qcut=10)
+    univar_val = datacal.cal_univar(df_val, i, target, qcut=10)
+    univar_test = datacal.cal_univar(df_test, i, target, qcut=10)
+    univarChart = matplot.plot_table_list([univar_train,univar_val,univar_test], [1,2,3], datalist_description=None, title= i +' univar Chart', X_label=None, y_label=None,
+                    tab_df_list=None, plot_tab=True,
+                    saved_path='./mvp/plots/cache/')
+    document.add_picture('./mvp/plots/cache/' + i +' univar Chart' + ".png")

-for i in dhb.features:
-    univar = datacal.cal_univar(df_train,score='raw_score')
-    univarChart = matplot.plot_table(univar,title= i +' univar Chart',saved_path='./plots/cache')
-    filetool.add_picture("./plots/cache" + i +' univar Chart')
+document.add_paragraph('PDP_chart')
+# 遍历目标features 画出对应PDP
+for i in dhb.features[:3]:
+    pdp = datacal.cal_pdp(df=df_test, score=prediction, feature=i, qcut=10)
+    pdpChart = matplot.plot_table(pdp,title= i +' PDP Chart',saved_path='./mvp/plots/cache/')
+    document.add_picture('./mvp/plots/cache/' + i +' PDP Chart' + ".png")

-for i in dhb.features:
-    pdp = datacal.cal_pdp(df_test,score='predict')
-    pdpChart = matplot.plot_table(pdp,title= i +' PDP Chart',saved_path='./plots/cache')
-    filetool.add_picture("./plots/cache" + i + ' PDP Chart')

-for i in dhb.features:
-    lift = datacal.cal_liftchart(df_test,score='predict')
-    liftChart = matplot.plot_table(lift, title=i +' lift Chart',saved_path='./plots/cache')
-    filetool.add_picture("./plots/cache" + i + ' lift Chart')

 filetool.saveDocument(document, report_path, report_name)

+document.add_paragraph('lift_chart')
+# 遍历给定渠道 & 客群 默认等频画出liftchart
+for channel in applied_from:
+    for type in applied_type:
+        df_sliced = df_test[df_test.applied_type.map(lambda x : True if str(x) in type.split(',') else False) & df_test.applied_from.map(lambda x : True if str(x) in channel.split(',') else False)]
+        lift = datacal.cal_liftchart(df_sliced,score=prediction)
+        liftChart = matplot.plot_table(lift, title=i +' lift Chart',saved_path='./mvp/plots/cache')
+        document.add_picture("./mvp/plots/cache" + i + ' lift Chart.png')
+
+
+filetool.saveDocument(document, report_path, report_name)




--- a/tools/__pycache__/__init__.cpython-36.pyc
+++ b/tools/__pycache__/__init__.cpython-36.pyc
--- a/tools/__pycache__/datacal.cpython-36.pyc
+++ b/tools/__pycache__/datacal.cpython-36.pyc
--- a/tools/__pycache__/filetool.cpython-36.pyc
+++ b/tools/__pycache__/filetool.cpython-36.pyc
--- a/tools/datacal.py
+++ b/tools/datacal.py
@@ -4,26 +4,79 @@ import datetime
 from sklearn.model_selection import train_test_split


-
-def liftchart(df,target='target',qcut=10,retbins=True):
+def cal_lift(df_list, score, target='target', qcut=10, retbin=False):
    '''
    instructions : return liftchart dataframe with qcut & pivot 逾期率liftchart
    Params :
-        df - dataframe(注意一定是是放款集！！)
+        df - dataframe(注意一定是是放款集！！) list
+        score - 模型分数
        target - label column
        qcut - quantiles
        retbins - return bins interval when 'retbins' is True, else False
    :return:
-        liftchart dataframe
+        liftchart pivot
+    '''
+    pivot = pd.DataFrame([])
+    if type(df_list) == pd.DataFrame:
+        df = df_list.copy()
+        # fillin missing with -1
+        df.fillna(value=-1,inplace=True)
+        df = df[[score, target]]
+        # create a bins column
+        df_noneNA = [df[score] < 0]
+
+
+        df['bins'] = pd.qcut(df[score], q=qcut, precision=6, retbins=retbin, duplicates='drop')
+        pivot_tmp = df[['bins', target]].groupby('bins').agg(['mean', 'count'])
+        pivot = pd.concat([pivot, pivot_tmp], axis=1)
+    if type(df_list) == list:
+        print('none')
+        for df in df_list:
+            df = df.copy()
+            df = df[[score, target]]
+            # create a bins column
+            df['bins'] = pd.qcut(df[score], q=qcut, precision=6, retbins=retbin, duplicates='drop')
+            pivot_tmp = df[['bins', target]].groupby('bins').agg(['mean', 'count'])
+            pivot = pd.concat([pivot, pivot_tmp], axis=1)
+    return pivot[target]
+
+
+def cal_univar(df, feature, target, qcut=10):
+    '''
+    instructions : return univar pivot
+    Params:
+        :param df: dataframe with unvariable & label target(overdue label)
+        :param feature: single feature to
+        :param target:
+        :param qcut: N bins in the same frequency
+
+    :return: univar pivot
    '''
    df = df.copy()
-    # create a bins column
-    df['bins'] = pd.qcut(df, q=10, precision=6, retbins=False, duplicates='drop')
-    pivot = df[['bins','target']].groupby('bins').agg(['mean','count'])
-    return pivot
+    df = df[[feature, target]]
+    # fill missing with -1
+    df.fillna(value=-1,inplace=True)
+    df['bins'] = pd.qcut(df[feature], q=qcut, precision=6, retbins=False, duplicates='drop')
+    pivot = df[[target,'bins']].groupby('bins').sum() / df[[target,'bins']].groupby('bins').count()
+    return pivot[target]



+def cal_pdp(df, score, feature, qcut=10):
+    '''
+    instructions : return pdp pivot
+    :param df: dataframe of test set
+    :param score: score that predicts by model
+    :param feature:
+    :param qcut:
+    :return:
+    '''
+    df = df.copy()
+    df = df[[feature, score]]
+    df['bins'] = pd.qcut(df[feature], q=qcut, precision=6, retbins=False, duplicates='drop')
+    pivot = df[[score,'bins']].groupby('bins').sum() / df[[score,'bins']].groupby('bins').count()
+    return pivot[score]
+



@@ -184,45 +237,45 @@ def cal_accume(df,feature,target,bin=10,classes=[]):
    return df_out


-def cal_univar(df,feature,target,bin=10,classes=[]):
-    '''
-    groupby(classes) 分组,对feature 进行bin 分位，对各个分位进行 count,mean ,sum计算
-    :param df: dataframe
-    :param feature: feature in df.columns
-    :param target: in df.columns eg: count(target) mean(target)
-    :param bins:default =10
-    :param classes: 分组
-    :return:
-    '''
-    if df.shape[0]==0:
-        raise('no data')
-    columns=df.columns.tolist()
-    if target not in columns:
-        raise('not found %s' % target)
-    if feature not in columns:
-        raise('not found %s' % feature)
-
-    tmp=df.copy()
-    tmp[feature].fillna(-1, inplace=True)
-    # == bin 划分,feature 有可能 非数字
-    try:
-        tmp[feature] = tmp[feature].astype(float)
-        feature_grid = cal_feature_grid(tmp, feature, bin)
-        tmp['lbl'] = pd.cut(tmp[feature], feature_grid, include_lowest = True)
-        tmp['grid'] = tmp['lbl'].cat.codes
-    except ValueError:
-        tmp['lbl']=tmp[feature]
-        tmp['grid']=tmp[feature]
-
-    if len(classes) > 0:
-        df_gp = tmp.groupby(classes+['grid','lbl']).agg({target: ['count', 'mean','sum']}).reset_index()
-        df_gp.columns = classes+['grid','lbl', 'count', 'mean','sum']
-        df_out=df_gp
-    else:
-        df_all = tmp.groupby(['grid','lbl']).agg({target: ['count', 'mean','sum']}).reset_index()
-        df_all.columns = ['grid', 'lbl', 'count', 'mean', 'sum']
-        df_out = df_all
-    return df_out
+# def cal_univar(df,feature,target,bin=10,classes=[]):
+#     '''
+#     groupby(classes) 分组,对feature 进行bin 分位，对各个分位进行 count,mean ,sum计算
+#     :param df: dataframe
+#     :param feature: feature in df.columns
+#     :param target: in df.columns eg: count(target) mean(target)
+#     :param bins:default =10
+#     :param classes: 分组
+#     :return:
+#     '''
+#     if df.shape[0]==0:
+#         raise('no data')
+#     columns=df.columns.tolist()
+#     if target not in columns:
+#         raise('not found %s' % target)
+#     if feature not in columns:
+#         raise('not found %s' % feature)
+#
+#     tmp=df.copy()
+#     tmp[feature].fillna(-1, inplace=True)
+#     # == bin 划分,feature 有可能 非数字
+#     try:
+#         tmp[feature] = tmp[feature].astype(float)
+#         feature_grid = cal_feature_grid(tmp, feature, bin)
+#         tmp['lbl'] = pd.cut(tmp[feature], feature_grid, include_lowest = True)
+#         tmp['grid'] = tmp['lbl'].cat.codes
+#     except ValueError:
+#         tmp['lbl']=tmp[feature]
+#         tmp['grid']=tmp[feature]
+#
+#     if len(classes) > 0:
+#         df_gp = tmp.groupby(classes+['grid','lbl']).agg({target: ['count', 'mean','sum']}).reset_index()
+#         df_gp.columns = classes+['grid','lbl', 'count', 'mean','sum']
+#         df_out=df_gp
+#     else:
+#         df_all = tmp.groupby(['grid','lbl']).agg({target: ['count', 'mean','sum']}).reset_index()
+#         df_all.columns = ['grid', 'lbl', 'count', 'mean', 'sum']
+#         df_out = df_all
+#     return df_out