Commit 3bd346ff authored by 张鹏程's avatar 张鹏程

设置下载json 下载

parent c702a1bc
...@@ -7,4 +7,5 @@ FEATURE_HOST_MY = 'http://localhost:23010' ...@@ -7,4 +7,5 @@ FEATURE_HOST_MY = 'http://localhost:23010'
url_reportanalysis = '/report' url_reportanalysis = '/report'
TO_JSON = FILE_PATH+'/to_json'
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
import threading import threading
from utils import JsonUtil from utils import JsonUtil
from handler.Base_Handler import BaseHandler from handler.Base_Handler import BaseHandler
import json
from config import settings
from service.PersonalInformation_Service import PersonalInformation # 个人基本信息 from service.PersonalInformation_Service import PersonalInformation # 个人基本信息
from service.InformationSummary_Service import InformationSummary # 信息概要 from service.InformationSummary_Service import InformationSummary # 信息概要
from service.TransactionDetails_Service import TransactionDetails # 信贷交易明细 from service.TransactionDetails_Service import TransactionDetails # 信贷交易明细
...@@ -14,7 +16,7 @@ from service.InitHtml_Service import InitHtml # 初始化HTML ...@@ -14,7 +16,7 @@ from service.InitHtml_Service import InitHtml # 初始化HTML
class ReportAnalysis(BaseHandler): class ReportAnalysis(BaseHandler):
def post(self): def post(self):
self._filepath = self.get_argument('filepath', default=None) self._filepath = self.get_argument('filepath', default=None)
self._isdownload = self.get_argument('isdownload',default=None)
if self._filepath == None or self._filepath == '': if self._filepath == None or self._filepath == '':
self.write(JsonUtil.build_json(code = JsonUtil.Constants.Code_Params_Error, self.write(JsonUtil.build_json(code = JsonUtil.Constants.Code_Params_Error,
mssage=JsonUtil.Constants.Msg_Params_Error.format('filepath',self._filepath))) mssage=JsonUtil.Constants.Msg_Params_Error.format('filepath',self._filepath)))
...@@ -54,6 +56,13 @@ class ReportAnalysis(BaseHandler): ...@@ -54,6 +56,13 @@ class ReportAnalysis(BaseHandler):
# QueryInfomation(html.menu_dict) # QueryInfomation(html.menu_dict)
result = Result.get_result() result = Result.get_result()
Result.clear_result() Result.clear_result()
# '/Users/zhangpengcheng/Documents/量化派代码管理/credit-report-api/html/data.json'
if self._isdownload:
with open(settings.TO_JSON+'/'+self._filepath.split('/')[-1]+'.json', 'w',encoding='utf8') as file_obj:
file_obj.write(json.dumps(result,ensure_ascii=False))
file_obj.close()
self.write(JsonUtil.build_json(report = result, self.write(JsonUtil.build_json(report = result,
code=JsonUtil.Constants.Code_Success, code=JsonUtil.Constants.Code_Success,
mssage=JsonUtil.Constants.Msg_Success)) mssage=JsonUtil.Constants.Msg_Success))
......
...@@ -17,7 +17,7 @@ from tornado.options import define, options ...@@ -17,7 +17,7 @@ from tornado.options import define, options
from handler import ReportAnalysis_Handler from handler import ReportAnalysis_Handler
from tornado.options import define, options from tornado.options import define, options
define("port", default=23010, help="run on the given port ", type=int) define("port", default=20010, help="run on the given port ", type=int)
define("log_path", default='/tmp', help="log path ", type=str) define("log_path", default='/tmp', help="log path ", type=str)
class LogFormatter(tornado.log.LogFormatter): class LogFormatter(tornado.log.LogFormatter):
...@@ -36,19 +36,19 @@ def apps(): ...@@ -36,19 +36,19 @@ def apps():
if __name__ == "__main__": if __name__ == "__main__":
# app = apps() app = apps()
# server = HTTPServer(app) server = HTTPServer(app)
# [i.setFormatter(LogFormatter()) for i in logging.getLogger().handlers] [i.setFormatter(LogFormatter()) for i in logging.getLogger().handlers]
# tornado.options.parse_command_line() tornado.options.parse_command_line()
#
# # #== 本地调试
# app.listen(23011)
# IOLoop.instance().start()
# #== 本地调试
app.listen(20011)
IOLoop.instance().start()
tornado.options.parse_command_line()
app = apps() # tornado.options.parse_command_line()
http_server = tornado.httpserver.HTTPServer(app) # app = apps()
http_server.bind(options.port) # http_server = tornado.httpserver.HTTPServer(app)
http_server.start() # http_server.bind(options.port)
tornado.ioloop.IOLoop.instance().start() # http_server.start()
# tornado.ioloop.IOLoop.instance().start()
# -*- coding:utf-8 -*-
from bs4 import BeautifulSoup,Comment
from abc import ABCMeta,abstractmethod
import re
from lxml import etree
# path = "/Users/zhangpengcheng/Documents/量化派代码管理/credit_report/html/一代征信报告/432325197803211379.html"
# path = "/Users/zhangpengcheng/Documents/量化派代码管理/credit_report/html/一代征信报告/32052219780226051X.htm"
path = "/Users/zhangpengcheng/Documents/量化派代码管理/credit_report/html/一代征信报告/test.html"
htmlfile = open(path, 'rb')
# print(htmlfile)
htmlhandle = htmlfile.read()
soup = BeautifulSoup(htmlhandle,'lxml')
class Base(object):
def __init__(self):
self.path = "/Users/zhangpengcheng/Documents/量化派代码管理/credit_report/html/一代征信报告/test.html"
htmlfile = open(path, 'rb')
htmlhandle = htmlfile.read()
soup = BeautifulSoup(htmlhandle,'lxml')
comments = soup.findAll(text=lambda text: isinstance(text, Comment))
[comment.extract() for comment in comments] # 去除注释
# [s.extract() for s in soup("style")] # 去除指定标签
self.children = self.get_children(soup)
pass
def get_children(self,soup):
children = []
for child in soup.body.children:
if child != '\n':
children.append(child)
return children
@abstractmethod
def get_json(self,detail,msgJson):
return msgJson
class ReportDetail(Base):
"""报告详情"""
def __init__(self):
Base.__init__(self)
detail = self.children[1].find_all('b')
msgJson = {'报告编号':None,'查询请求时间':None,'报告时间':None}
self.result = {'报告详情':self.get_json(detail,msgJson)}
pass
def get_json(self,detail,msgJson):
for hl in detail:
text = hl.get_text()
for k,v in msgJson.items():
if k in text:
msgJson[k] = text.split(k+':')[1]
return msgJson
class SelectDetail(Base):
"""查询信息"""
def __init__(self):
Base.__init__(self)
detail = self.children[2].table.find_all('tr')
msgJson = {}
self.result = {'查询信息':self.get_json(detail,msgJson)}
pass
# r = ReportDetail()
# s = SelectDetail()
a = AllDatail()
children = []
for child in soup.body.children:
if child != '\n':
children.append(child)
d_identity = {}
table = soup.table
tbody = table.find_all('tbody')
tr_arr = table.find_all("tr")
tds = []
for tr in tr_arr:
tds.append(tr.find_all('td'))
# print(tds[0][0].get_text())
#
# print(tds[1][0].table['class'])
#
# print(tds[1][0].tr.contents[3])
name = []
value = []
tds[0][0].div.get_text()
for i in range(len(tds[1][0].find_all('tr'))):
if tds[1][0].find_all('tr')[i].find(style="WORD-BREAK: break-all") == None:
span = tds[1][0].find_all('tr')[i].find_all('div')
for s in span:
name.append(s.get_text())
else:
span = tds[1][0].find_all('tr')[i].find_all('div')
for s in span:
value.append(s.get_text())
d_identity = dict(zip(name,value))
# for child in children:
#
# # print(type(child))
# tr_arr = child.find_all("tr")
# for tr in tr_arr:
# print(tr)
#
# # print(child.find_all(text="性别"))
# print('----------')
# # print('--------')
# from lxml import etree
# html=etree.HTML(htmlhandle,etree.HTMLParser())
# print(html.text())
# print(html.xpath("//b[contains(text(),'一 个人基本信息')]"))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment