Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
C
credit-report-api
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
QA
credit-report-api
Commits
3bd346ff
Commit
3bd346ff
authored
Aug 23, 2019
by
张鹏程
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
设置下载json 下载
parent
c702a1bc
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
26 additions
and
145 deletions
+26
-145
settings.py
config/settings.py
+1
-0
ReportAnalysis_Handler.py
handler/ReportAnalysis_Handler.py
+10
-1
requirements.txt
requirements.txt
+0
-1
startup.py
startup.py
+15
-15
analysis.py
testfile/analysis.py
+0
-128
__init__.py
to_json/__init__.py
+0
-0
No files found.
config/settings.py
View file @
3bd346ff
...
@@ -7,4 +7,5 @@ FEATURE_HOST_MY = 'http://localhost:23010'
...
@@ -7,4 +7,5 @@ FEATURE_HOST_MY = 'http://localhost:23010'
url_reportanalysis
=
'/report'
url_reportanalysis
=
'/report'
TO_JSON
=
FILE_PATH
+
'/to_json'
handler/ReportAnalysis_Handler.py
View file @
3bd346ff
...
@@ -3,6 +3,8 @@
...
@@ -3,6 +3,8 @@
import
threading
import
threading
from
utils
import
JsonUtil
from
utils
import
JsonUtil
from
handler.Base_Handler
import
BaseHandler
from
handler.Base_Handler
import
BaseHandler
import
json
from
config
import
settings
from
service.PersonalInformation_Service
import
PersonalInformation
# 个人基本信息
from
service.PersonalInformation_Service
import
PersonalInformation
# 个人基本信息
from
service.InformationSummary_Service
import
InformationSummary
# 信息概要
from
service.InformationSummary_Service
import
InformationSummary
# 信息概要
from
service.TransactionDetails_Service
import
TransactionDetails
# 信贷交易明细
from
service.TransactionDetails_Service
import
TransactionDetails
# 信贷交易明细
...
@@ -14,7 +16,7 @@ from service.InitHtml_Service import InitHtml # 初始化HTML
...
@@ -14,7 +16,7 @@ from service.InitHtml_Service import InitHtml # 初始化HTML
class
ReportAnalysis
(
BaseHandler
):
class
ReportAnalysis
(
BaseHandler
):
def
post
(
self
):
def
post
(
self
):
self
.
_filepath
=
self
.
get_argument
(
'filepath'
,
default
=
None
)
self
.
_filepath
=
self
.
get_argument
(
'filepath'
,
default
=
None
)
self
.
_isdownload
=
self
.
get_argument
(
'isdownload'
,
default
=
None
)
if
self
.
_filepath
==
None
or
self
.
_filepath
==
''
:
if
self
.
_filepath
==
None
or
self
.
_filepath
==
''
:
self
.
write
(
JsonUtil
.
build_json
(
code
=
JsonUtil
.
Constants
.
Code_Params_Error
,
self
.
write
(
JsonUtil
.
build_json
(
code
=
JsonUtil
.
Constants
.
Code_Params_Error
,
mssage
=
JsonUtil
.
Constants
.
Msg_Params_Error
.
format
(
'filepath'
,
self
.
_filepath
)))
mssage
=
JsonUtil
.
Constants
.
Msg_Params_Error
.
format
(
'filepath'
,
self
.
_filepath
)))
...
@@ -54,6 +56,13 @@ class ReportAnalysis(BaseHandler):
...
@@ -54,6 +56,13 @@ class ReportAnalysis(BaseHandler):
# QueryInfomation(html.menu_dict)
# QueryInfomation(html.menu_dict)
result
=
Result
.
get_result
()
result
=
Result
.
get_result
()
Result
.
clear_result
()
Result
.
clear_result
()
# '/Users/zhangpengcheng/Documents/量化派代码管理/credit-report-api/html/data.json'
if
self
.
_isdownload
:
with
open
(
settings
.
TO_JSON
+
'/'
+
self
.
_filepath
.
split
(
'/'
)[
-
1
]
+
'.json'
,
'w'
,
encoding
=
'utf8'
)
as
file_obj
:
file_obj
.
write
(
json
.
dumps
(
result
,
ensure_ascii
=
False
))
file_obj
.
close
()
self
.
write
(
JsonUtil
.
build_json
(
report
=
result
,
self
.
write
(
JsonUtil
.
build_json
(
report
=
result
,
code
=
JsonUtil
.
Constants
.
Code_Success
,
code
=
JsonUtil
.
Constants
.
Code_Success
,
mssage
=
JsonUtil
.
Constants
.
Msg_Success
))
mssage
=
JsonUtil
.
Constants
.
Msg_Success
))
...
...
requirements.txt
View file @
3bd346ff
tornado
==5.0.1
tornado
==5.0.1
logging
redis
redis
PyMysql
==0.8.0
PyMysql
==0.8.0
pandas
==0.22.0
pandas
==0.22.0
...
...
startup.py
View file @
3bd346ff
...
@@ -17,7 +17,7 @@ from tornado.options import define, options
...
@@ -17,7 +17,7 @@ from tornado.options import define, options
from
handler
import
ReportAnalysis_Handler
from
handler
import
ReportAnalysis_Handler
from
tornado.options
import
define
,
options
from
tornado.options
import
define
,
options
define
(
"port"
,
default
=
2
3
010
,
help
=
"run on the given port "
,
type
=
int
)
define
(
"port"
,
default
=
2
0
010
,
help
=
"run on the given port "
,
type
=
int
)
define
(
"log_path"
,
default
=
'/tmp'
,
help
=
"log path "
,
type
=
str
)
define
(
"log_path"
,
default
=
'/tmp'
,
help
=
"log path "
,
type
=
str
)
class
LogFormatter
(
tornado
.
log
.
LogFormatter
):
class
LogFormatter
(
tornado
.
log
.
LogFormatter
):
...
@@ -36,19 +36,19 @@ def apps():
...
@@ -36,19 +36,19 @@ def apps():
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
# app = apps()
app
=
apps
()
# server = HTTPServer(app)
server
=
HTTPServer
(
app
)
# [i.setFormatter(LogFormatter()) for i in logging.getLogger().handlers]
[
i
.
setFormatter
(
LogFormatter
())
for
i
in
logging
.
getLogger
()
.
handlers
]
# tornado.options.parse_command_line()
tornado
.
options
.
parse_command_line
()
#
# # #== 本地调试
# app.listen(23011)
# IOLoop.instance().start()
# #== 本地调试
app
.
listen
(
20011
)
IOLoop
.
instance
()
.
start
()
tornado
.
options
.
parse_command_line
()
app
=
apps
()
# tornado.options.parse_command_line()
http_server
=
tornado
.
httpserver
.
HTTPServer
(
app
)
# app = apps()
http_server
.
bind
(
options
.
port
)
# http_server = tornado.httpserver.HTTPServer(app)
http_server
.
start
()
# http_server.bind(options.port)
tornado
.
ioloop
.
IOLoop
.
instance
()
.
start
()
# http_server.start()
# tornado.ioloop.IOLoop.instance().start()
testfile/analysis.py
deleted
100644 → 0
View file @
c702a1bc
# -*- coding:utf-8 -*-
from
bs4
import
BeautifulSoup
,
Comment
from
abc
import
ABCMeta
,
abstractmethod
import
re
from
lxml
import
etree
# path = "/Users/zhangpengcheng/Documents/量化派代码管理/credit_report/html/一代征信报告/432325197803211379.html"
# path = "/Users/zhangpengcheng/Documents/量化派代码管理/credit_report/html/一代征信报告/32052219780226051X.htm"
path
=
"/Users/zhangpengcheng/Documents/量化派代码管理/credit_report/html/一代征信报告/test.html"
htmlfile
=
open
(
path
,
'rb'
)
# print(htmlfile)
htmlhandle
=
htmlfile
.
read
()
soup
=
BeautifulSoup
(
htmlhandle
,
'lxml'
)
class
Base
(
object
):
def
__init__
(
self
):
self
.
path
=
"/Users/zhangpengcheng/Documents/量化派代码管理/credit_report/html/一代征信报告/test.html"
htmlfile
=
open
(
path
,
'rb'
)
htmlhandle
=
htmlfile
.
read
()
soup
=
BeautifulSoup
(
htmlhandle
,
'lxml'
)
comments
=
soup
.
findAll
(
text
=
lambda
text
:
isinstance
(
text
,
Comment
))
[
comment
.
extract
()
for
comment
in
comments
]
# 去除注释
# [s.extract() for s in soup("style")] # 去除指定标签
self
.
children
=
self
.
get_children
(
soup
)
pass
def
get_children
(
self
,
soup
):
children
=
[]
for
child
in
soup
.
body
.
children
:
if
child
!=
'
\n
'
:
children
.
append
(
child
)
return
children
@
abstractmethod
def
get_json
(
self
,
detail
,
msgJson
):
return
msgJson
class
ReportDetail
(
Base
):
"""报告详情"""
def
__init__
(
self
):
Base
.
__init__
(
self
)
detail
=
self
.
children
[
1
]
.
find_all
(
'b'
)
msgJson
=
{
'报告编号'
:
None
,
'查询请求时间'
:
None
,
'报告时间'
:
None
}
self
.
result
=
{
'报告详情'
:
self
.
get_json
(
detail
,
msgJson
)}
pass
def
get_json
(
self
,
detail
,
msgJson
):
for
hl
in
detail
:
text
=
hl
.
get_text
()
for
k
,
v
in
msgJson
.
items
():
if
k
in
text
:
msgJson
[
k
]
=
text
.
split
(
k
+
':'
)[
1
]
return
msgJson
class
SelectDetail
(
Base
):
"""查询信息"""
def
__init__
(
self
):
Base
.
__init__
(
self
)
detail
=
self
.
children
[
2
]
.
table
.
find_all
(
'tr'
)
msgJson
=
{}
self
.
result
=
{
'查询信息'
:
self
.
get_json
(
detail
,
msgJson
)}
pass
# r = ReportDetail()
# s = SelectDetail()
a
=
AllDatail
()
children
=
[]
for
child
in
soup
.
body
.
children
:
if
child
!=
'
\n
'
:
children
.
append
(
child
)
d_identity
=
{}
table
=
soup
.
table
tbody
=
table
.
find_all
(
'tbody'
)
tr_arr
=
table
.
find_all
(
"tr"
)
tds
=
[]
for
tr
in
tr_arr
:
tds
.
append
(
tr
.
find_all
(
'td'
))
# print(tds[0][0].get_text())
#
# print(tds[1][0].table['class'])
#
# print(tds[1][0].tr.contents[3])
name
=
[]
value
=
[]
tds
[
0
][
0
]
.
div
.
get_text
()
for
i
in
range
(
len
(
tds
[
1
][
0
]
.
find_all
(
'tr'
))):
if
tds
[
1
][
0
]
.
find_all
(
'tr'
)[
i
]
.
find
(
style
=
"WORD-BREAK: break-all"
)
==
None
:
span
=
tds
[
1
][
0
]
.
find_all
(
'tr'
)[
i
]
.
find_all
(
'div'
)
for
s
in
span
:
name
.
append
(
s
.
get_text
())
else
:
span
=
tds
[
1
][
0
]
.
find_all
(
'tr'
)[
i
]
.
find_all
(
'div'
)
for
s
in
span
:
value
.
append
(
s
.
get_text
())
d_identity
=
dict
(
zip
(
name
,
value
))
# for child in children:
#
# # print(type(child))
# tr_arr = child.find_all("tr")
# for tr in tr_arr:
# print(tr)
#
# # print(child.find_all(text="性别"))
# print('----------')
# # print('--------')
# from lxml import etree
# html=etree.HTML(htmlhandle,etree.HTMLParser())
# print(html.text())
# print(html.xpath("//b[contains(text(),'一 个人基本信息')]"))
to_json/__init__.py
0 → 100644
View file @
3bd346ff
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment