Commit 548f837a authored by 张鹏程's avatar 张鹏程

特征计算-用户特征相关

parent 1e4265f7
This diff is collapsed.
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'lxml.etree._ElementTree'>\n",
"<class 'bytes'>\n",
"b'<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">\\n<!-- saved from url=(0193)http://10.2.2.29//image/0001001/00-012/2019/20190516/10200010/8a82a7886aa1ad3e016abe1ac9de1d1b/interface/other/8a82a7886aa1ad3e016abe1ac9e11d1d//8a82a7886aa1ad3e016abe1ac9e11d1d-attachment.html --><!-- Generated by F12 developer tools. This might not be an accurate representation of the original source file --><html><head><title>&#20010;&#20154;&#20449;&#29992;&#25253;&#21578;</title>&#13;\\n<script language=\"JavaScript\" src=\"viewctrl.js\"/>&#13;\\n&#13;\\n<style type=\"text/css\"/>&#13;\\n&#13;\\n<meta name=\"GENERATOR\" content=\"Microsoft FrontPage 5.0\"/>&#13;\\n<meta content=\"text/html; charset=GBK\" http-equiv=\"Content-Type\"/><link rel=\"stylesheet\" type=\"text/css\" href=\"css/style1.css\"/>&#13;\\n<style type=\"text/css\">.tdStyle {&#13;\\n\\tBORDER-BOTTOM: 1px solid; BORDER-LEFT: 1px solid; BORDER-TOP: 1px solid; BORDER-RIGHT: 1px solid&#13;\\n}&#13;\\n</style>&#13;\\n&#13;\\n<style type=\"text/css\">.style1 {&#13;\\n\\tCOLOR: #0066cc; FONT-WEIGHT: bold&#13;\\n}&#13;\\n.style4 {&#13;\\n\\tCOLOR: #0066cc; FONT-SIZE: 14px&#13;\\n}&#13;\\nTD {&#13;\\n\\tFONT-FAMILY: &#23435;&#20307;; FONT-SIZE: 9pt&#13;\\n}&#13;\\n</style>&#13;\\n&#13;\\n<script language=\"JavaScript\">&lt;!--&#13;\\n //&#23631;&#34109;&#40736;&#26631;&#21491;&#38190;&#12289;Ctrl+N&#12289;Shift+F10&#12289;F11&#12289;F5&#21047;&#26032;&#12289;&#36864;&#26684;&#38190; &#13;\\nfunction document.oncontextmenu(){event.returnValue=false;}//&#23631;&#34109;&#40736;&#26631;&#21491;&#38190; &#13;\\nfunction window.onhelp(){return false} //&#23631;&#34109;F1&#24110;&#21161; &#13;\\nfunction document.onkeydown() &#13;\\n{ &#13;\\n if ((window.event.altKey)&amp;&amp; &#13;\\n ((window.event.keyCode==37) || //&#23631;&#34109; Alt+ &#26041;&#21521;&#38190; &#8592; &#13;\\n (window.event.keyCode==39))) //&#23631;&#34109; Alt+ &#26041;&#21521;&#38190; &#8594; &#13;\\n { &#13;\\n alert(\"&#19981;&#20934;&#20320;&#20351;&#29992;ALT+&#26041;&#21521;&#38190;&#21069;&#36827;&#25110;&#21518;&#36864;&#32593;&#39029;&#65281;\"); &#13;\\n event.returnValue=false; &#13;\\n } &#13;\\n /* &#27880;&#65306;&#36825;&#36824;&#19981;&#26159;&#30495;&#27491;&#22320;&#23631;&#34109; Alt+ &#26041;&#21521;&#38190;&#65292; &#13;\\n &#22240;&#20026; Alt+ &#26041;&#21521;&#38190;&#24377;&#20986;&#35686;&#21578;&#26694;&#26102;&#65292;&#25353;&#20303; Alt &#38190;&#19981;&#25918;&#65292; &#13;\\n &#29992;&#40736;&#26631;&#28857;&#25481;&#35686;&#21578;&#26694;&#65292;&#36825;&#31181;&#23631;&#34109;&#26041;&#27861;&#23601;&#22833;&#25928;&#20102;&#12290;&#20197;&#21518;&#33509; &#13;\\n &#26377;&#21738;&#20301;&#39640;&#25163;&#26377;&#30495;&#27491;&#23631;&#34109; Alt &#38190;&#30340;&#26041;&#27861;&#65292;&#35831;&#21578;&#30693;&#12290;*/ &#13;\\n &#13;\\n if ((event.keyCode==8) || //&#23631;&#34109;&#36864;&#26684;&#21024;&#38500;&#38190; &#13;\\n (event.keyCode==116) || //&#23631;&#34109; F5 &#21047;&#26032;&#38190; &#13;\\n (event.ctrlKey &amp;&amp; event.keyCode==82)){ //Ctrl + R &#13;\\n event.keyCode=0; &#13;\\n event.returnValue=false; &#13;\\n } &#13;\\n if (event.keyCode==122){event.k e y C o d e = 0 ; e v e n t . r e t u r n V a l u e = f a l s e ; } / / O\\\\=&#21276; 1 1 &#13; \\n i f ( e v e n t . c t r l K e y &amp; &amp; e v e n t . k e y C o d e = = 7 8 ) e v e n t . r e t u r n V a l u e = f a l s e ; / / O\\\\=</script></head></html>'\n"
]
}
],
"source": [
"from lxml import etree\n",
"\n",
"html=etree.parse('/Users/zhangpengcheng/Desktop/征信报告html解析/一代征信报告/13082119950823527X.htm',etree.HTMLParser()) #指定解析器HTMLParser会根据文件修复HTML文件中缺失的如声明信息\n",
"result=etree.tostring(html) #解析成字节\n",
"#result=etree.tostringlist(html) #解析成列表\n",
"print(type(html))\n",
"print(type(result))\n",
"print(result)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"[<Element html at 0x107c7f508>,\n",
" <Element head at 0x107c60f08>,\n",
" <Element title at 0x107ddd848>,\n",
" <Element script at 0x107ddd148>,\n",
" <Element style at 0x107ddd108>,\n",
" <Element meta at 0x107ddd288>,\n",
" <Element meta at 0x107ddd8c8>,\n",
" <Element link at 0x107ddd908>,\n",
" <Element style at 0x107ddd948>,\n",
" <Element style at 0x107ddd088>,\n",
" <Element script at 0x107ddd988>]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"html.xpath('//*')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"ename": "ImportError",
"evalue": "html5lib not found, please install it",
"output_type": "error",
"traceback": [
"---------------------------------------------------------------------------",
"ImportError Traceback (most recent call last)",
"<ipython-input-7-3b3fda0ebdae> in <module>()\n----> 1 pd.read_html('/Users/zhangpengcheng/Desktop/征信报告html解析/一代征信报告/13082119950823527X.htm')\n",
"/Users/zhangpengcheng/.pyenv/versions/3.6.0/lib/python3.6/site-packages/pandas/io/html.py in read_html(io, match, flavor, header, index_col, skiprows, attrs, parse_dates, tupleize_cols, thousands, encoding, decimal, converters, na_values, keep_default_na)\n 913 thousands=thousands, attrs=attrs, encoding=encoding,\n 914 decimal=decimal, converters=converters, na_values=na_values,\n--> 915 keep_default_na=keep_default_na)\n",
"/Users/zhangpengcheng/.pyenv/versions/3.6.0/lib/python3.6/site-packages/pandas/io/html.py in _parse(flavor, io, match, attrs, encoding, **kwargs)\n 737 retained = None\n 738 for flav in flavor:\n--> 739 parser = _parser_dispatch(flav)\n 740 p = parser(io, compiled_match, attrs, encoding)\n 741 \n",
"/Users/zhangpengcheng/.pyenv/versions/3.6.0/lib/python3.6/site-packages/pandas/io/html.py in _parser_dispatch(flavor)\n 680 if flavor in ('bs4', 'html5lib'):\n 681 if not _HAS_HTML5LIB:\n--> 682 raise ImportError(\"html5lib not found, please install it\")\n 683 if not _HAS_BS4:\n 684 raise ImportError(\n",
"ImportError: html5lib not found, please install it"
]
}
],
"source": [
"pd.read_html('/Users/zhangpengcheng/Desktop/征信报告html解析/一代征信报告/13082119950823527X.htm',)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
# -*- coding:utf-8 -*- # -*- coding:utf-8 -*-
from features.Base_Features import BaseFeatures as BF from features.Base_Features import BaseFeatures as BF
from service import Sql_complexOrder,Sql_contactsOperator from service import Sql_complexOrder,Sql_contactsOperator
from service.Sql_revealTheBottomChannel import * from service.Sql_RevealTheBottomChannel import *
import pandas as pd import pandas as pd
import datetime import datetime
class __INIT__(BF): class __INIT__(BF):
"""通讯录地址特征""" """兜底特征一期"""
def __init__(self): def __init__(self):
time_now = datetime.datetime.strptime(datetime.datetime.strftime(datetime.datetime.now(),'%Y-%m-%d %H:%M:%S'),'%Y-%m-%d %H:%M:%S') time_now = datetime.datetime.strptime(datetime.datetime.strftime(datetime.datetime.now(),'%Y-%m-%d %H:%M:%S'),'%Y-%m-%d %H:%M:%S')
BF._apply_at = time_now BF._apply_at = time_now
...@@ -19,7 +19,7 @@ class __INIT__(BF): ...@@ -19,7 +19,7 @@ class __INIT__(BF):
if self._df_apply_from.empty == False: if self._df_apply_from.empty == False:
BF._apply_at = datetime.datetime.strptime(BF._df_apply_from['apply_time'].astype(str).values[0],'%Y-%m-%d %H:%M:%S') BF._apply_at = datetime.datetime.strptime(BF._df_apply_from['apply_time'].astype(str).values[0],'%Y-%m-%d %H:%M:%S')
BF.df_loan_all = Sql_contactsOperator.getSql_loanAll(str(tuple([self.user_id])).replace(',)',')')) BF.df_loan_all = getSql_loanAll(str(tuple([self.user_id])).replace(',)',')'))
BF.df_loan_rollBack = pd.DataFrame() BF.df_loan_rollBack = pd.DataFrame()
if BF.df_loan_all.empty == False: if BF.df_loan_all.empty == False:
BF.df_loan_rollBack = Sql_complexOrder.get_Loan_rollBack(BF.df_loan_all,BF._apply_at) BF.df_loan_rollBack = Sql_complexOrder.get_Loan_rollBack(BF.df_loan_all,BF._apply_at)
......
...@@ -3,17 +3,19 @@ ...@@ -3,17 +3,19 @@
from features.Base_Features import BaseFeatures as BF from features.Base_Features import BaseFeatures as BF
from service import Sql_contactsOperator from service import Sql_contactsOperator
from service.Sql_complexOrder import * from service.Sql_complexOrder import *
import pandas as pd import pandas as pd
import datetime import datetime
class __INIT__(BF): class __INIT__(BF):
def __init__(self): def __init__(self):
time_now = datetime.datetime.strptime(datetime.datetime.strftime(datetime.datetime.now(),'%Y-%m-%d 00:00:00'),'%Y-%m-%d %H:%M:%S') time_now = datetime.datetime.strptime(datetime.datetime.strftime(datetime.datetime.now(),'%Y-%m-%d %H:%M:%S'),'%Y-%m-%d %H:%M:%S')
BF.df_loan_all = Sql_contactsOperator.getSql_loanAll(str(tuple([self.user_id])).replace(',)',')')) BF.df_loan_all = Sql_contactsOperator.getSql_loanAll(str(tuple([self.user_id])).replace(',)',')'))
BF._apply_at = time_now BF._apply_at = time_now
BF._df_apply_all = Sql_contactsOperator.getSql_userApplyQuotaRecord(str(tuple([self.user_id])).replace(',)',')'))
if self.orderId: if self.orderId:
BF._df_apply_all = Sql_contactsOperator.getSql_userApplyQuotaRecord(str(tuple([self.user_id])).replace(',)',')')) # BF._df_apply_all = Sql_contactsOperator.getSql_userApplyQuotaRecord(str(tuple([self.user_id])).replace(',)',')'))
_df_apply_orderId = BF._df_apply_all.loc[BF._df_apply_all['order_no'] == self.orderId] _df_apply_orderId = BF._df_apply_all.loc[BF._df_apply_all['order_no'] == self.orderId]
if _df_apply_orderId.empty == False: if _df_apply_orderId.empty == False:
_df_apply_orderId['apply_time'] = _df_apply_orderId['apply_time'].apply(lambda x : datetime.datetime.strptime(datetime.datetime.strftime(x,'%Y-%m-%d %H:%M:%S'),'%Y-%m-%d %H:%M:%S')) _df_apply_orderId['apply_time'] = _df_apply_orderId['apply_time'].apply(lambda x : datetime.datetime.strptime(datetime.datetime.strftime(x,'%Y-%m-%d %H:%M:%S'),'%Y-%m-%d %H:%M:%S'))
......
...@@ -47,4 +47,36 @@ def getSql_RevealTheBottomChannel(user_id,apply_time): ...@@ -47,4 +47,36 @@ def getSql_RevealTheBottomChannel(user_id,apply_time):
order by m.loan_paid_at desc order by m.loan_paid_at desc
""".format(user_id,apply_time) """.format(user_id,apply_time)
_df = pd.read_sql(_sql,con = con_tuomin_xyqb) _df = pd.read_sql(_sql,con = con_tuomin_xyqb)
return _df
def getSql_loanAll(user_id):
"""获取用户所有放款订单明细 business_type 9 兜底"""
_sql = """
select
IFNULL(aqr.order_no ,'') order_no,loan.business_type
,loan.user_id,loan.id as loan_id,loan.progress,fest.contract_loan_amount,fest.loan_paid_at,fest.monthly_interest_rate
,case when loan.created_at >='2018-05-08' then aqr.apply_from
when loan.created_at < '2018-05-08' then loan.created_from
else null
end as applied_from
,if (sub.created_at is not null, sub.created_at ,loan.created_at) apply_time
,plan.id as plan_id,plan.term_no, plan.repayment_status, plan.deadline as deadline,DATE_ADD(plan.deadline,INTERVAL 1 second) as deadline_new
,plan.repaid_at
,plan.current_repayment,plan.required_repayment
from loan_application_manifest_history as fest
left join loan_application_history as loan on fest.loan_application_history_id = loan.id
left join loan_submit_info as sub on sub.loan_id = loan.id
left join repay as plan on plan.loan_application_history_id = loan.id
left join repayment_record as record on record.repayment_plan_id = plan.id
left join loan_account_ext as lae on lae.loan_id = loan.id
left join apply_quota_record as aqr on aqr.order_no = lae.order_no
where
loan.user_id in {}
and loan.progress in (15,16,65)
and fest.transaction_status in (2,5)
and loan.business_type in (0,2,4,6,9)
order by loan.id,plan.id
""".format(user_id)
_df = pd.read_sql(_sql,con_tuomin_xyqb)
return _df return _df
\ No newline at end of file
...@@ -44,17 +44,22 @@ def get_Loan_rollBack(df_loan,apply_time,passdueDay_Negative=False): ...@@ -44,17 +44,22 @@ def get_Loan_rollBack(df_loan,apply_time,passdueDay_Negative=False):
df_loan['repaid_at'] = df_loan['repaid_at'].apply(lambda x : datetime.datetime.strptime(datetime.datetime.strftime(x,'%Y-%m-%d %H:%M:%S'),'%Y-%m-%d %H:%M:%S')) df_loan['repaid_at'] = df_loan['repaid_at'].apply(lambda x : datetime.datetime.strptime(datetime.datetime.strftime(x,'%Y-%m-%d %H:%M:%S'),'%Y-%m-%d %H:%M:%S'))
df_loan['deadline'] = df_loan['deadline'].apply(lambda x : datetime.datetime.strptime(datetime.datetime.strftime(x,'%Y-%m-%d 00:00:00'),'%Y-%m-%d 00:00:00')) df_loan['deadline'] = df_loan['deadline'].apply(lambda x : datetime.datetime.strptime(datetime.datetime.strftime(x,'%Y-%m-%d 00:00:00'),'%Y-%m-%d 00:00:00'))
df_loan['deadline'] = df_loan['deadline'].apply(lambda x : datetime.datetime.strptime(datetime.datetime.strftime(x,'%Y-%m-%d 00:00:00'),'%Y-%m-%d 00:00:00')) df_loan['deadline'] = df_loan['deadline'].apply(lambda x : datetime.datetime.strptime(datetime.datetime.strftime(x,'%Y-%m-%d 00:00:00'),'%Y-%m-%d 00:00:00'))
df_loan['apply_time'] = df_loan['apply_time'].apply(lambda x : datetime.datetime.strptime(datetime.datetime.strftime(x,'%Y-%m-%d 00:00:00'),'%Y-%m-%d %H:%M:%S')) df_loan['apply_time'] = df_loan['apply_time'].apply(lambda x : datetime.datetime.strptime(datetime.datetime.strftime(x,'%Y-%m-%d %H:%M:%S'),'%Y-%m-%d %H:%M:%S'))
df_loan['passdue_day'] = 0 df_loan['passdue_day'] = 0
df_loan = df_loan.loc[(df_loan['apply_time'] <= df_loan['observationTime'])] df_loan = df_loan.loc[(df_loan['apply_time'] <= df_loan['observationTime'])]
if df_loan.empty == False: if df_loan.empty == False:
df_loan.loc[(df_loan['repaid_at'] != '2000-01-01')&(df_loan['repaid_at']>df_loan['observationTime']),'repaid_at'] = datetime.datetime.strptime('2000-01-01 00:00:00','%Y-%m-%d 00:00:00') df_loan.loc[(df_loan['repaid_at'] != '2000-01-01')&(df_loan['repaid_at']>df_loan['observationTime']),'repaid_at'] = datetime.datetime.strptime('2000-01-01 00:00:00','%Y-%m-%d 00:00:00')
# 还款时间正常 ,还款状态!=3 # 还款时间正常 ,还款状态!=3
df_loan.loc[(df_loan['repaid_at'] != '2000-01-01') & (df_loan['repayment_status'] !=3 ),'repaid_at'] = datetime.datetime.strptime('2000-01-01 00:00:00','%Y-%m-%d 00:00:00') df_loan.loc[(df_loan['repaid_at'] != '2000-01-01') & (df_loan['repayment_status'] !=3 ),'repaid_at'] = datetime.datetime.strptime('2000-01-01 00:00:00','%Y-%m-%d 00:00:00')
df_loan.loc[(df_loan['repaid_at'] == '2000-01-01'),'repayment_status'] = 0 # df_loan.loc[(df_loan['repaid_at'] == '2000-01-01'),'repayment_status'] = 0
df_loan.loc[(df_loan['repaid_at'] != '2000-01-01'),'passdue_day'] = (df_loan['repaid_at'] - df_loan['deadline']).dt.days df_loan.loc[(df_loan['repaid_at'] == '2000-01-01')& (df_loan['repayment_status'] != 3),'repayment_status'] = 0
df_loan.loc[(df_loan['repaid_at'] == '2000-01-01')&(df_loan['observationTime'] > df_loan['deadline']) ,'passdue_day'] = (df_loan['observationTime'] - df_loan['deadline']).dt.days
df_loan.loc[(df_loan['repayment_status'] == 3),'passdue_day'] = (df_loan['repaid_at'] - df_loan['deadline']).dt.days
df_loan.loc[(df_loan['repayment_status'] != 3)&(df_loan['observationTime'] > df_loan['deadline']) ,'passdue_day'] = (df_loan['observationTime'] - df_loan['deadline']).dt.days
df_loan.loc[(df_loan['repaid_at'] == '2000-01-01')&(df_loan['repayment_status']==0)&(df_loan['observationTime']<df_loan['deadline']),'passdue_day'] = (df_loan['observationTime'] - df_loan['deadline']).dt.days df_loan.loc[(df_loan['repaid_at'] == '2000-01-01')&(df_loan['repayment_status']==0)&(df_loan['observationTime']<df_loan['deadline']),'passdue_day'] = (df_loan['observationTime'] - df_loan['deadline']).dt.days
# df_loan.loc[(df_loan['repaid_at'] != '2000-01-01'),'passdue_day'] = (df_loan['repaid_at'] - df_loan['deadline']).dt.days
# df_loan.loc[(df_loan['repaid_at'] == '2000-01-01')&(df_loan['observationTime'] > df_loan['deadline']) ,'passdue_day'] = (df_loan['observationTime'] - df_loan['deadline']).dt.days
# df_loan.loc[(df_loan['repaid_at'] == '2000-01-01')&(df_loan['repayment_status']==0)&(df_loan['observationTime']<df_loan['deadline']),'passdue_day'] = (df_loan['observationTime'] - df_loan['deadline']).dt.days
if passdueDay_Negative == False: if passdueDay_Negative == False:
df_loan.loc[(df_loan['passdue_day'] < 0),'passdue_day'] = 0 df_loan.loc[(df_loan['passdue_day'] < 0),'passdue_day'] = 0
......
...@@ -249,6 +249,7 @@ def getSql_loanAll(user_id): ...@@ -249,6 +249,7 @@ def getSql_loanAll(user_id):
loan.user_id in {} loan.user_id in {}
and loan.progress in (15,16,65) and loan.progress in (15,16,65)
and fest.transaction_status in (2,5) and fest.transaction_status in (2,5)
and loan.business_type in (0,2kl)
order by loan.id,plan.id order by loan.id,plan.id
""".format(user_id) """.format(user_id)
_df = pd.read_sql(_sql,con_tuomin_xyqb) _df = pd.read_sql(_sql,con_tuomin_xyqb)
......
...@@ -43,23 +43,23 @@ def apps(): ...@@ -43,23 +43,23 @@ def apps():
if __name__ == "__main__": if __name__ == "__main__":
app = apps() # app = apps()
server = HTTPServer(app) # server = HTTPServer(app)
[i.setFormatter(LogFormatter()) for i in logging.getLogger().handlers] # [i.setFormatter(LogFormatter()) for i in logging.getLogger().handlers]
tornado.options.parse_command_line() # tornado.options.parse_command_line()
#
# #== 本地调试 # # #== 本地调试
app.listen(23011) # app.listen(23011)
IOLoop.instance().start() # IOLoop.instance().start()
# from tornado.options import define, options from tornado.options import define, options
# define("port", default=23010, help="run on the given port ", type=int) define("port", default=23010, help="run on the given port ", type=int)
# define("log_path", default='/tmp', help="log path ", type=str) define("log_path", default='/tmp', help="log path ", type=str)
# tornado.options.parse_command_line() tornado.options.parse_command_line()
# app = apps() app = apps()
# http_server = tornado.httpserver.HTTPServer(app) http_server = tornado.httpserver.HTTPServer(app)
# http_server.bind(options.port) http_server.bind(options.port)
# http_server.start(num_processes=0) http_server.start(num_processes=0)
# tornado.ioloop.IOLoop.instance().start() tornado.ioloop.IOLoop.instance().start()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment