Merge branch 'development'

4daf61d6 · 张鹏程 · 86574248 · 75935b37 · 4daf61d6 · 4daf61d6
Commit 4daf61d6 authored Dec 10, 2019 by 张鹏程
12 changed files
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
--- a/J_feature/三方数据调用.ipynb
+++ b/J_feature/三方数据调用.ipynb
--- a/J_feature/同盾特征平铺.ipynb
+++ b/J_feature/同盾特征平铺.ipynb
@@ -2352,7 +2352,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.5.5"
+   "version": "3.6.0"
  }
 },
 "nbformat": 4,

--- a/J_feature/数据一致性验证.ipynb
+++ b/J_feature/数据一致性验证.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 568,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from collections import defaultdict\n",
+    "reDict = defaultdict(int)  # 返回错误计数，用于出测试报告图表\n",
+    "\n",
+    "def str_connect(x,y):\n",
+    "    return str(x)+'_'+str(y)\n",
+    "\n",
+    "def assert_diff(x,y,col_name,code=None):\n",
+    "    global reDict\n",
+    "    try:\n",
+    "        if isinstance(eval(x),list) and isinstance(eval(y),list):\n",
+    "            diff = list(set(a) ^ set(b))\n",
+    "            if diff:\n",
+    "                if code:\n",
+    "                    reDict['error'][code]+=1\n",
+    "                    return False\n",
+    "                else:\n",
+    "                    reDict[col_name]+=1\n",
+    "                    return col_name+' : '+str(x)+'_'+str(y) + '\\n'\n",
+    "            else:\n",
+    "                if code:\n",
+    "                    reDict['error'][code]+=0\n",
+    "                return \n",
+    "    except:\n",
+    "        pass\n",
+    "\n",
+    "    try:\n",
+    "        if abs(float(x) - float(y)) > 1/100000:\n",
+    "            if code:\n",
+    "                reDict[code]['error']+=1\n",
+    "                reDict[code]['sum']+=1\n",
+    "                return False\n",
+    "            else:\n",
+    "                reDict[col_name]+=1\n",
+    "                return col_name+' : '+str(x)+'_'+str(y) + '\\n'\n",
+    "        else:\n",
+    "            if code:\n",
+    "                reDict[code]['pass']+=1\n",
+    "                reDict[code]['sum']+=1\n",
+    "            return \n",
+    "    except Exception as e:\n",
+    "        pass\n",
+    "\n",
+    "    try:\n",
+    "        if str(x) != str(y):\n",
+    "            \n",
+    "            if code:\n",
+    "                reDict[code]['error']+=1\n",
+    "                reDict[code]['sum']+=1\n",
+    "                return False\n",
+    "            else:\n",
+    "#                 print(col_name,'---',str(x) ,'    ----    ',str(y))\n",
+    "                reDict[col_name]+=1\n",
+    "                return col_name+' : '+str(x)+'_'+str(y) + '\\n'\n",
+    "        else:\n",
+    "            if code:\n",
+    "                reDict[code]['pass']+=1\n",
+    "                reDict[code]['sum']+=1\n",
+    "            return \n",
+    "    except:\n",
+    "        pass\n",
+    "\n",
+    "def assert_columns(df,columns,not_columns=[],is_AssertFilter=True,is_reDict = False,is_DelColumnsXY = True,code=None):\n",
+    "    \"\"\" 校验Dataframe columns 的 x 和 y 列\n",
+    "        not_columns : 不参与校验的列\n",
+    "        is_AssertFilter: 默认返回错误的数据，过滤正确值\n",
+    "        is_reDict: 默认不返回总结信息\n",
+    "        is_DelColumnsXY : 默认删除 columns 的 x和y\n",
+    "        code : 提取某列作为错误统计\n",
+    "    \"\"\"\n",
+    "    df.fillna(False,inplace=True)\n",
+    "    df['测试结论'] = 'False'\n",
+    "    df['测试描述'] = \"\"\n",
+    "    global reDict\n",
+    "    reDict = defaultdict(int)\n",
+    "    \n",
+    "    if len(df)>0:\n",
+    "        if code:\n",
+    "            code = df.loc[df.duplicated('code',keep='first')==False]['code'].tolist()\n",
+    "            for i in code:\n",
+    "                reDict[i]={}\n",
+    "                reDict[i]['sum'] = 0\n",
+    "                reDict[i]['error'] = 0\n",
+    "                reDict[i]['pass'] = 0\n",
+    "        for cls in columns:\n",
+    "            if cls not in ['测试结论'] + not_columns :\n",
+    "                try:\n",
+    "                    if code:\n",
+    "                        _assert = df.apply(lambda x : assert_diff(x[cls+'_x'],x[cls+'_y'],cls,x['code']),axis=1)\n",
+    "                        df['测试描述']+=_assert.fillna('').astype(str)\n",
+    "                    else:\n",
+    "                        _assert = df.apply(lambda x : assert_diff(x[cls+'_x'],x[cls+'_y'],cls),axis=1)\n",
+    "                        df['测试描述'] += _assert.fillna('').astype(str)\n",
+    "                    df[cls] = df.apply(lambda x : str_connect(x[cls+'_x'],x[cls+'_y']),axis=1)\n",
+    "                    if is_DelColumnsXY:\n",
+    "                        del df[cls+'_x']\n",
+    "                        del df[cls+'_y']\n",
+    "                except:\n",
+    "                    pass\n",
+    "        if is_AssertFilter:\n",
+    "            df = df.loc[df['测试结论']!=True]\n",
+    "        df['测试描述'] = df['测试描述'].apply(lambda x : x.strip('\\n').strip(' '))\n",
+    "        df.loc[df['测试描述'] == '','测试结论'] = True\n",
+    "        if is_reDict:\n",
+    "            return df,dict(reDict)\n",
+    "        else:return df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 569,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 570,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_excel('/Users/zhangpengcheng/Desktop/assert_1208 2.xlsx')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 571,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "columns = ['ref_id', 'channel_id', 'term_no', 'deadline', 'plan_interest', 'plan_overdue_interest', 'plan_service_fee', 'plan_guarantee_fee',\n",
+    " 'plan_other_fee', 'plan_amt', 'actual_interest', 'actual_overdue_interest', 'actual_service_fee', 'actual_guarantee_fee', 'actual_other_fee',\n",
+    " 'actual_amt', 'sub_amt', 'repay_apply_time', 'repay_type', 'remain_principal', 'remain_interest', 'remain_service_fee', 'remain_guarantee_fee',\n",
+    " 'remain_other_fee']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 574,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "df_assert,reDict = assert_columns(df,\n",
+    "                           columns,[],\n",
+    "                           is_AssertFilter=False,is_DelColumnsXY=True,is_reDict=True)\n",
+    "# ,'remain_interest_x','remain_interest_y'\n",
+    "# df_assert\n",
+    "df_assert.reset_index(drop=True,inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 566,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_assert.to_clipboard()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 575,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{}"
+      ]
+     },
+     "execution_count": 575,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "reDict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/J_feature/朝阳_assert.ipynb
+++ b/J_feature/朝阳_assert.ipynb
--- a/J_feature/特征计算 - 多订单_test.ipynb
+++ b/J_feature/特征计算 - 多订单_test.ipynb
--- a/J_feature/用户召回-王林芳.ipynb
+++ b/J_feature/用户召回-王林芳.ipynb
--- a/J_feature/紧急联系人运营商特征_test.ipynb
+++ b/J_feature/紧急联系人运营商特征_test.ipynb
--- a/J_feature_move/feature_move_同盾.ipynb
+++ b/J_feature_move/feature_move_同盾.ipynb
--- a/service/Sql_complexOrder.py
+++ b/service/Sql_complexOrder.py
@@ -43,8 +43,6 @@ def get_Loan_rollBack(df_loan,apply_time,passdueDay_Negative=False):
    df_loan['observationTime'] = apply_time
    # df_loan = df_loan.loc[(df_loan['apply_time'] < df_loan['observationTime']) & (df_loan['loan_paid_at'] < df_loan['observationTime'] )]
    df_loan['repaid_at'] = df_loan['repaid_at'].apply(lambda x : datetime.datetime.strptime(datetime.datetime.strftime(x,'%Y-%m-%d %H:%M:%S'),'%Y-%m-%d %H:%M:%S'))
    df_loan['deadline'] = df_loan['deadline'].apply(lambda x : datetime.datetime.strptime(datetime.datetime.strftime(x,'%Y-%m-%d 00:00:00'),'%Y-%m-%d 00:00:00'))
    df_loan['deadline'] = df_loan['deadline'].apply(lambda x : datetime.datetime.strptime(datetime.datetime.strftime(x,'%Y-%m-%d 00:00:00'),'%Y-%m-%d 00:00:00'))

--- a/startup.py
+++ b/startup.py
@@ -55,7 +55,7 @@ if __name__ == "__main__":
    from tornado.options import define, options
-    define("port", default=23018, help="run on the given port ", type=int)
+    define("port", default=23010, help="run on the given port ", type=int)
    define("log_path", default='/tmp', help="log path ", type=str)
    tornado.options.parse_command_line()
    app = apps()

--- a/utils/tools.py
+++ b/utils/tools.py
@@ -285,8 +285,6 @@ def dict_generator_tongdun(indict, pre=None):
        yield indict
-def str_connect(x,y):
-    return str(x)+'_'+str(y)
 def del_diff_columns(old_columns,new_columns,df):
    """并修改名称,返回修改后的名称"""
@@ -331,7 +329,8 @@ def bytesToStr(x):
    return x
+def str_connect(x,y):
+    return str(x)+'_'+str(y)
 from collections import defaultdict
 reDict = defaultdict(int)  # 返回错误计数，用于出测试报告图表
@@ -351,7 +350,7 @@ def assert_diff(x,y,col_name,code=None):
            else:
                if code:
                    reDict['error'][code]+=0
-                return True
+                return
    except:
        pass
@@ -368,7 +367,7 @@ def assert_diff(x,y,col_name,code=None):
            if code:
                reDict[code]['pass']+=1
                reDict[code]['sum']+=1
-            return True
+            return
    except Exception as e:
        pass
@@ -380,18 +379,17 @@ def assert_diff(x,y,col_name,code=None):
                reDict[code]['sum']+=1
                return False
            else:
+                #                 print(col_name,'---',str(x) ,'    ----    ',str(y))
                reDict[col_name]+=1
                return col_name+' : '+str(x)+'_'+str(y) + '\n'
        else:
            if code:
                reDict[code]['pass']+=1
                reDict[code]['sum']+=1
-            return True
+            return
    except:
        pass
 def assert_columns(df,columns,not_columns=[],is_AssertFilter=True,is_reDict = False,is_DelColumnsXY = True,code=None):
    """ 校验Dataframe columns 的 x 和 y 列
        not_columns : 不参与校验的列
@@ -399,9 +397,10 @@ def assert_columns(df,columns,not_columns=[],is_AssertFilter=True,is_reDict = Fa
        is_reDict: 默认不返回总结信息
        is_DelColumnsXY : 默认删除 columns 的 x和y
        code : 提取某列作为错误统计
    """
-    df['测试结论'] = False
+    df.fillna(False,inplace=True)
+    df['测试结论'] = 'False'
+    df['测试描述'] = ""
    global reDict
    reDict = defaultdict(int)
@@ -417,11 +416,11 @@ def assert_columns(df,columns,not_columns=[],is_AssertFilter=True,is_reDict = Fa
            if cls not in ['测试结论'] + not_columns :
                try:
                    if code:
-                        df['测试结论'] = df.apply(lambda x : assert_diff(x[cls+'_x'],x[cls+'_y'],cls,x['code']),axis=1).astype(str)
+                        _assert = df.apply(lambda x : assert_diff(x[cls+'_x'],x[cls+'_y'],cls,x['code']),axis=1)
+                        df['测试描述']+=_assert.fillna('').astype(str)
                    else:
+                        _assert = df.apply(lambda x : assert_diff(x[cls+'_x'],x[cls+'_y'],cls),axis=1)
-                        df['测试结论'] = df.apply(lambda x : assert_diff(x[cls+'_x'],x[cls+'_y'],cls),axis=1).astype(str)
+                        df['测试描述'] += _assert.fillna('').astype(str)
                    df[cls] = df.apply(lambda x : str_connect(x[cls+'_x'],x[cls+'_y']),axis=1)
                    if is_DelColumnsXY:
                        del df[cls+'_x']
@@ -429,11 +428,14 @@ def assert_columns(df,columns,not_columns=[],is_AssertFilter=True,is_reDict = Fa
                except:
                    pass
        if is_AssertFilter:
-            df = df.loc[df['assert']!=True]
+            df = df.loc[df['测试结论']!=True]
+        df['测试描述'] = df['测试描述'].apply(lambda x : x.strip('\n').strip(' '))
+        df.loc[df['测试描述'] == '','测试结论'] = True
        if is_reDict:
            return df,dict(reDict)
        else:return df
 def matching_data(df,key):
    """
    feature 特征匹配规则