Commit 4daf61d6 authored by 张鹏程's avatar 张鹏程

Merge branch 'development'

parents 86574248 75935b37
This diff is collapsed.
This diff is collapsed.
...@@ -2352,7 +2352,7 @@ ...@@ -2352,7 +2352,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.5.5" "version": "3.6.0"
} }
}, },
"nbformat": 4, "nbformat": 4,
......
{
"cells": [
{
"cell_type": "code",
"execution_count": 568,
"metadata": {},
"outputs": [],
"source": [
"from collections import defaultdict\n",
"reDict = defaultdict(int) # 返回错误计数,用于出测试报告图表\n",
"\n",
"def str_connect(x,y):\n",
" return str(x)+'_'+str(y)\n",
"\n",
"def assert_diff(x,y,col_name,code=None):\n",
" global reDict\n",
" try:\n",
" if isinstance(eval(x),list) and isinstance(eval(y),list):\n",
" diff = list(set(a) ^ set(b))\n",
" if diff:\n",
" if code:\n",
" reDict['error'][code]+=1\n",
" return False\n",
" else:\n",
" reDict[col_name]+=1\n",
" return col_name+' : '+str(x)+'_'+str(y) + '\\n'\n",
" else:\n",
" if code:\n",
" reDict['error'][code]+=0\n",
" return \n",
" except:\n",
" pass\n",
"\n",
" try:\n",
" if abs(float(x) - float(y)) > 1/100000:\n",
" if code:\n",
" reDict[code]['error']+=1\n",
" reDict[code]['sum']+=1\n",
" return False\n",
" else:\n",
" reDict[col_name]+=1\n",
" return col_name+' : '+str(x)+'_'+str(y) + '\\n'\n",
" else:\n",
" if code:\n",
" reDict[code]['pass']+=1\n",
" reDict[code]['sum']+=1\n",
" return \n",
" except Exception as e:\n",
" pass\n",
"\n",
" try:\n",
" if str(x) != str(y):\n",
" \n",
" if code:\n",
" reDict[code]['error']+=1\n",
" reDict[code]['sum']+=1\n",
" return False\n",
" else:\n",
"# print(col_name,'---',str(x) ,' ---- ',str(y))\n",
" reDict[col_name]+=1\n",
" return col_name+' : '+str(x)+'_'+str(y) + '\\n'\n",
" else:\n",
" if code:\n",
" reDict[code]['pass']+=1\n",
" reDict[code]['sum']+=1\n",
" return \n",
" except:\n",
" pass\n",
"\n",
"def assert_columns(df,columns,not_columns=[],is_AssertFilter=True,is_reDict = False,is_DelColumnsXY = True,code=None):\n",
" \"\"\" 校验Dataframe columns 的 x 和 y 列\n",
" not_columns : 不参与校验的列\n",
" is_AssertFilter: 默认返回错误的数据,过滤正确值\n",
" is_reDict: 默认不返回总结信息\n",
" is_DelColumnsXY : 默认删除 columns 的 x和y\n",
" code : 提取某列作为错误统计\n",
" \"\"\"\n",
" df.fillna(False,inplace=True)\n",
" df['测试结论'] = 'False'\n",
" df['测试描述'] = \"\"\n",
" global reDict\n",
" reDict = defaultdict(int)\n",
" \n",
" if len(df)>0:\n",
" if code:\n",
" code = df.loc[df.duplicated('code',keep='first')==False]['code'].tolist()\n",
" for i in code:\n",
" reDict[i]={}\n",
" reDict[i]['sum'] = 0\n",
" reDict[i]['error'] = 0\n",
" reDict[i]['pass'] = 0\n",
" for cls in columns:\n",
" if cls not in ['测试结论'] + not_columns :\n",
" try:\n",
" if code:\n",
" _assert = df.apply(lambda x : assert_diff(x[cls+'_x'],x[cls+'_y'],cls,x['code']),axis=1)\n",
" df['测试描述']+=_assert.fillna('').astype(str)\n",
" else:\n",
" _assert = df.apply(lambda x : assert_diff(x[cls+'_x'],x[cls+'_y'],cls),axis=1)\n",
" df['测试描述'] += _assert.fillna('').astype(str)\n",
" df[cls] = df.apply(lambda x : str_connect(x[cls+'_x'],x[cls+'_y']),axis=1)\n",
" if is_DelColumnsXY:\n",
" del df[cls+'_x']\n",
" del df[cls+'_y']\n",
" except:\n",
" pass\n",
" if is_AssertFilter:\n",
" df = df.loc[df['测试结论']!=True]\n",
" df['测试描述'] = df['测试描述'].apply(lambda x : x.strip('\\n').strip(' '))\n",
" df.loc[df['测试描述'] == '','测试结论'] = True\n",
" if is_reDict:\n",
" return df,dict(reDict)\n",
" else:return df"
]
},
{
"cell_type": "code",
"execution_count": 569,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd "
]
},
{
"cell_type": "code",
"execution_count": 570,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_excel('/Users/zhangpengcheng/Desktop/assert_1208 2.xlsx')"
]
},
{
"cell_type": "code",
"execution_count": 571,
"metadata": {},
"outputs": [],
"source": [
"columns = ['ref_id', 'channel_id', 'term_no', 'deadline', 'plan_interest', 'plan_overdue_interest', 'plan_service_fee', 'plan_guarantee_fee',\n",
" 'plan_other_fee', 'plan_amt', 'actual_interest', 'actual_overdue_interest', 'actual_service_fee', 'actual_guarantee_fee', 'actual_other_fee',\n",
" 'actual_amt', 'sub_amt', 'repay_apply_time', 'repay_type', 'remain_principal', 'remain_interest', 'remain_service_fee', 'remain_guarantee_fee',\n",
" 'remain_other_fee']"
]
},
{
"cell_type": "code",
"execution_count": 574,
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"df_assert,reDict = assert_columns(df,\n",
" columns,[],\n",
" is_AssertFilter=False,is_DelColumnsXY=True,is_reDict=True)\n",
"# ,'remain_interest_x','remain_interest_y'\n",
"# df_assert\n",
"df_assert.reset_index(drop=True,inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 566,
"metadata": {},
"outputs": [],
"source": [
"df_assert.to_clipboard()"
]
},
{
"cell_type": "code",
"execution_count": 575,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{}"
]
},
"execution_count": 575,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"reDict"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
This diff is collapsed.
...@@ -43,8 +43,6 @@ def get_Loan_rollBack(df_loan,apply_time,passdueDay_Negative=False): ...@@ -43,8 +43,6 @@ def get_Loan_rollBack(df_loan,apply_time,passdueDay_Negative=False):
df_loan['observationTime'] = apply_time df_loan['observationTime'] = apply_time
# df_loan = df_loan.loc[(df_loan['apply_time'] < df_loan['observationTime']) & (df_loan['loan_paid_at'] < df_loan['observationTime'] )] # df_loan = df_loan.loc[(df_loan['apply_time'] < df_loan['observationTime']) & (df_loan['loan_paid_at'] < df_loan['observationTime'] )]
df_loan['repaid_at'] = df_loan['repaid_at'].apply(lambda x : datetime.datetime.strptime(datetime.datetime.strftime(x,'%Y-%m-%d %H:%M:%S'),'%Y-%m-%d %H:%M:%S')) df_loan['repaid_at'] = df_loan['repaid_at'].apply(lambda x : datetime.datetime.strptime(datetime.datetime.strftime(x,'%Y-%m-%d %H:%M:%S'),'%Y-%m-%d %H:%M:%S'))
df_loan['deadline'] = df_loan['deadline'].apply(lambda x : datetime.datetime.strptime(datetime.datetime.strftime(x,'%Y-%m-%d 00:00:00'),'%Y-%m-%d 00:00:00')) df_loan['deadline'] = df_loan['deadline'].apply(lambda x : datetime.datetime.strptime(datetime.datetime.strftime(x,'%Y-%m-%d 00:00:00'),'%Y-%m-%d 00:00:00'))
df_loan['deadline'] = df_loan['deadline'].apply(lambda x : datetime.datetime.strptime(datetime.datetime.strftime(x,'%Y-%m-%d 00:00:00'),'%Y-%m-%d 00:00:00')) df_loan['deadline'] = df_loan['deadline'].apply(lambda x : datetime.datetime.strptime(datetime.datetime.strftime(x,'%Y-%m-%d 00:00:00'),'%Y-%m-%d 00:00:00'))
......
...@@ -55,7 +55,7 @@ if __name__ == "__main__": ...@@ -55,7 +55,7 @@ if __name__ == "__main__":
from tornado.options import define, options from tornado.options import define, options
define("port", default=23018, help="run on the given port ", type=int) define("port", default=23010, help="run on the given port ", type=int)
define("log_path", default='/tmp', help="log path ", type=str) define("log_path", default='/tmp', help="log path ", type=str)
tornado.options.parse_command_line() tornado.options.parse_command_line()
app = apps() app = apps()
......
...@@ -285,8 +285,6 @@ def dict_generator_tongdun(indict, pre=None): ...@@ -285,8 +285,6 @@ def dict_generator_tongdun(indict, pre=None):
yield indict yield indict
def str_connect(x,y):
return str(x)+'_'+str(y)
def del_diff_columns(old_columns,new_columns,df): def del_diff_columns(old_columns,new_columns,df):
"""并修改名称,返回修改后的名称""" """并修改名称,返回修改后的名称"""
...@@ -331,7 +329,8 @@ def bytesToStr(x): ...@@ -331,7 +329,8 @@ def bytesToStr(x):
return x return x
def str_connect(x,y):
return str(x)+'_'+str(y)
from collections import defaultdict from collections import defaultdict
reDict = defaultdict(int) # 返回错误计数,用于出测试报告图表 reDict = defaultdict(int) # 返回错误计数,用于出测试报告图表
...@@ -351,7 +350,7 @@ def assert_diff(x,y,col_name,code=None): ...@@ -351,7 +350,7 @@ def assert_diff(x,y,col_name,code=None):
else: else:
if code: if code:
reDict['error'][code]+=0 reDict['error'][code]+=0
return True return
except: except:
pass pass
...@@ -368,7 +367,7 @@ def assert_diff(x,y,col_name,code=None): ...@@ -368,7 +367,7 @@ def assert_diff(x,y,col_name,code=None):
if code: if code:
reDict[code]['pass']+=1 reDict[code]['pass']+=1
reDict[code]['sum']+=1 reDict[code]['sum']+=1
return True return
except Exception as e: except Exception as e:
pass pass
...@@ -380,18 +379,17 @@ def assert_diff(x,y,col_name,code=None): ...@@ -380,18 +379,17 @@ def assert_diff(x,y,col_name,code=None):
reDict[code]['sum']+=1 reDict[code]['sum']+=1
return False return False
else: else:
# print(col_name,'---',str(x) ,' ---- ',str(y))
reDict[col_name]+=1 reDict[col_name]+=1
return col_name+' : '+str(x)+'_'+str(y) + '\n' return col_name+' : '+str(x)+'_'+str(y) + '\n'
else: else:
if code: if code:
reDict[code]['pass']+=1 reDict[code]['pass']+=1
reDict[code]['sum']+=1 reDict[code]['sum']+=1
return True return
except: except:
pass pass
def assert_columns(df,columns,not_columns=[],is_AssertFilter=True,is_reDict = False,is_DelColumnsXY = True,code=None): def assert_columns(df,columns,not_columns=[],is_AssertFilter=True,is_reDict = False,is_DelColumnsXY = True,code=None):
""" 校验Dataframe columns 的 x 和 y 列 """ 校验Dataframe columns 的 x 和 y 列
not_columns : 不参与校验的列 not_columns : 不参与校验的列
...@@ -399,9 +397,10 @@ def assert_columns(df,columns,not_columns=[],is_AssertFilter=True,is_reDict = Fa ...@@ -399,9 +397,10 @@ def assert_columns(df,columns,not_columns=[],is_AssertFilter=True,is_reDict = Fa
is_reDict: 默认不返回总结信息 is_reDict: 默认不返回总结信息
is_DelColumnsXY : 默认删除 columns 的 x和y is_DelColumnsXY : 默认删除 columns 的 x和y
code : 提取某列作为错误统计 code : 提取某列作为错误统计
""" """
df['测试结论'] = False df.fillna(False,inplace=True)
df['测试结论'] = 'False'
df['测试描述'] = ""
global reDict global reDict
reDict = defaultdict(int) reDict = defaultdict(int)
...@@ -417,11 +416,11 @@ def assert_columns(df,columns,not_columns=[],is_AssertFilter=True,is_reDict = Fa ...@@ -417,11 +416,11 @@ def assert_columns(df,columns,not_columns=[],is_AssertFilter=True,is_reDict = Fa
if cls not in ['测试结论'] + not_columns : if cls not in ['测试结论'] + not_columns :
try: try:
if code: if code:
df['测试结论'] = df.apply(lambda x : assert_diff(x[cls+'_x'],x[cls+'_y'],cls,x['code']),axis=1).astype(str) _assert = df.apply(lambda x : assert_diff(x[cls+'_x'],x[cls+'_y'],cls,x['code']),axis=1)
df['测试描述']+=_assert.fillna('').astype(str)
else: else:
_assert = df.apply(lambda x : assert_diff(x[cls+'_x'],x[cls+'_y'],cls),axis=1)
df['测试结论'] = df.apply(lambda x : assert_diff(x[cls+'_x'],x[cls+'_y'],cls),axis=1).astype(str) df['测试描述'] += _assert.fillna('').astype(str)
df[cls] = df.apply(lambda x : str_connect(x[cls+'_x'],x[cls+'_y']),axis=1) df[cls] = df.apply(lambda x : str_connect(x[cls+'_x'],x[cls+'_y']),axis=1)
if is_DelColumnsXY: if is_DelColumnsXY:
del df[cls+'_x'] del df[cls+'_x']
...@@ -429,11 +428,14 @@ def assert_columns(df,columns,not_columns=[],is_AssertFilter=True,is_reDict = Fa ...@@ -429,11 +428,14 @@ def assert_columns(df,columns,not_columns=[],is_AssertFilter=True,is_reDict = Fa
except: except:
pass pass
if is_AssertFilter: if is_AssertFilter:
df = df.loc[df['assert']!=True] df = df.loc[df['测试结论']!=True]
df['测试描述'] = df['测试描述'].apply(lambda x : x.strip('\n').strip(' '))
df.loc[df['测试描述'] == '','测试结论'] = True
if is_reDict: if is_reDict:
return df,dict(reDict) return df,dict(reDict)
else:return df else:return df
def matching_data(df,key): def matching_data(df,key):
""" """
feature 特征匹配规则 feature 特征匹配规则
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment