1 Star 2 Fork 0

王远/zxgk

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
shixin_handler.py 27.54 KB
一键复制 编辑 原始数据 按行查看 历史
wangyuan 提交于 2019-03-13 16:27 . requests
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787
import requests
import re
import os
import random
from lxml import etree
from aip import AipOcr
import time
from config import APP_ID, API_KEY, SECRET_KEY, HEADERS
client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
session = requests.session()
class ZxInfo:
@staticmethod
def get_captche_id():
url = "http://zxgk.court.gov.cn/zhzxgk/index_form.do"
response = requests.request("GET", url, headers=HEADERS)
result = re.search(r'var captchaId = \'(.*)\';', response.text)
print(result)
if result:
print(result.group(1))
captchaid = result.group(1)
return captchaid
@staticmethod
def recognize_image(captchaid):
url = "http://zxgk.court.gov.cn/zhzxgk/captcha.do"
querystring = {"captchaId": captchaid, "random": random.uniform(0, 1)}
if os.path.exists("captcha.jpg"):
os.remove("captcha.jpg")
try:
response = session.request("GET", url, headers=HEADERS, timeout=6, params=querystring)
if response.text:
with open('captcha.jpg', 'wb') as f:
f.write(response.content)
else:
print("retry, response.text is empty")
except Exception as ee:
print(ee)
# 识别
def get_file_content(filepath):
with open(filepath, 'rb') as fp:
return fp.read()
image = get_file_content('captcha.jpg')
# 识别结果
api_result = client.basicGeneral(image)
print(api_result)
try:
if api_result['words_result'][0]:
code = api_result['words_result'][0]['words']
print(code)
os.remove('captcha.jpg')
return {'j_captcha': code, 'captchaId': captchaid}
except Exception as e:
print(e)
return {'j_captcha': '1111', 'captchaId': captchaid}
def zhixing_person_list(self, pname, cardnum, captchaid, current_page=1):
result = self.recognize_image(captchaid)
url = "http://zxgk.court.gov.cn/zhzxgk/newsearch"
payload = {
'currentPage': current_page,
'searchCourtName': '全国法院(包含地方各级法院)',
'selectCourtId': '0',
'selectCourtArrange': '1',
'pname': pname,
'cardNum': cardnum,
'j_captcha': result.get('j_captcha'),
'countNameSelect': '',
'captchaId': result.get('captchaId')
}
response = session.request("POST", url, data=payload, headers=HEADERS)
while "验证码错误" in response.text:
time.sleep(1)
result = self.recognize_image(captchaid)
try:
payload['j_captcha'] = result.get('j_captcha')
except Exception as e:
print(e)
response = session.request("POST", url, data=payload, headers=HEADERS)
else:
temps = re.search('1/\\d{1,4}', response.text).group()
max_page = int(temps.replace('1/', ''))
print("共{}页数据".format(max_page))
all_info = []
for page in range(1, max_page + 1):
print("*" * 100)
print("正在爬取关键词{}第{}页数".format(cardnum, page))
print("*" * 100)
payload['currentPage'] = page
response = session.request("POST", url, data=payload, headers=HEADERS)
while "验证码错误" in response.text:
result = self.recognize_image(captchaid)
payload['j_captcha'] = result.get('j_captcha')
response = session.request("POST", url, data=payload, headers=HEADERS)
else:
html = etree.HTML(response.text)
trs = html.xpath('//table/tbody/tr')
for tr in trs[1:]:
tds = tr.xpath('.//td/text()')
print(tds)
name = tds[1]
case_no = tds[3]
print(name, result.get('j_captcha'), case_no, captchaid)
info = self.zhixing_person_detail(name, cardnum, result.get('j_captcha'), case_no, captchaid)
all_info.append(info)
time.sleep(1)
return all_info
def zhixing_person_detail(self, pname, cardnum, j_captcha_newdel,
casecode_newdel, captchaid_newdel):
url = "http://zxgk.court.gov.cn/zhzxgk/newdetail?pnameNewDel={}&" \
"cardNumNewDel={}&j_captchaNewDel={}&caseCodeNewDel={}&captchaIdNewDel=" \
"{}".format(pname, cardnum, j_captcha_newdel, casecode_newdel, captchaid_newdel)
print(url)
response = requests.request("GET", url, headers=HEADERS)
html = etree.HTML(response.text.encode('utf-8', 'ignore'))
while "验证码错误" in response.text:
print("验证码错误,正在重试")
result = self.recognize_image(captchaid_newdel)
self.zhixing_person_detail(pname, cardnum, result.get('j_captcha'), casecode_newdel, captchaid_newdel)
else:
info = []
bzxr_trs = html.xpath('//table[@id="bzxr"]/tr')
if bzxr_trs:
print("被执行人")
try:
name = html.xpath('//td[@id="pnameDetail"]/text()')[0]
except Exception as e:
print(e)
name = ''
try:
card_id = html.xpath('//td[@id="partyCardNumDetail"]/text()')[0]
except Exception as e:
print(e)
card_id = ''
try:
sexy = html.xpath('//td[@id="Detail"]/text()')[0]
except Exception as e:
print(e)
sexy = ''
try:
court = html.xpath('//td[@id="execCourtNameDetail"]/text()')[0]
except Exception as e:
print(e)
court = ''
try:
case_time = html.xpath('//td[@id="caseCreateTimeDetail"]/text()')[0]
except Exception as e:
print(e)
case_time = ''
try:
case_code = html.xpath('//td[@id="caseCodeDetail"]/text()')[0]
except Exception as e:
print(e)
case_code = ''
try:
target = html.xpath('//td[@id="execMoneyDetail"]/text()')[0]
except Exception as e:
print(e)
target = ''
bzxr_info = {"bzxr": {
"name": name,
"card_id": card_id,
"sexy": sexy,
"court": court,
"case_time": case_time,
"case_code": case_code,
"target": target
}}
info.append(bzxr_info)
else:
pass
zb_trs = html.xpath('//table[@id="zb"]/tr')
if zb_trs:
print("终本案件")
try:
case_code = html.xpath('//td[@id="ahDetail"]/text()')[0]
except Exception as e:
print(e)
case_code = ''
try:
name = html.xpath('//td[@id="xmDetail"]/text()')[0]
except Exception as e:
print(e)
name = ''
try:
sexy = html.xpath('//td[@id="xmDetail"]/text()')[0]
except Exception as e:
print(e)
sexy = ''
try:
card_id = html.xpath('//td[@id="sfzhmDetail"]/text()')[0]
except Exception as e:
print(e)
card_id = ''
try:
court = html.xpath('//td[@id="zxfymcDetail"]/text()')[0]
except Exception as e:
print(e)
court = ''
try:
case_time = html.xpath('//td[@id="larqDetail"]/text()')[0]
except Exception as e:
print(e)
case_time = ''
try:
final_date = html.xpath('//td[@id="jarqDetail"]/text()')[0]
except Exception as e:
print(e)
final_date = ''
try:
target = html.xpath('//td[@id="sqzxbdjeDetail"]/text()')[0]
except Exception as e:
print(e)
target = ''
try:
money = html.xpath('//td[@id="swzxbdjeDetail"]/text()')[0]
except Exception as e:
print(e)
money = ''
zb_info = {"zb": {"case_code": case_code, "name": name, "sexy": sexy, "card_id": card_id,
"court": court, "case_time": case_time, "final_date": final_date, "target": target,
"amount": money}}
info.append(zb_info)
else:
pass
xgl_trs = html.xpath('//table[@id="xgl"]/tr')
if xgl_trs:
print("限制消费人员")
try:
name = html.xpath('//td[@id="inameDetail"]/text()')[0]
except Exception as e:
print(e)
name = ''
try:
sexy = html.xpath('//td[@id="sexDetail"]/text()')[0]
except Exception as e:
print(e)
sexy = ''
try:
card_id = html.xpath('//td[@id="cardNumDetail"]/text()')[0]
except Exception as e:
print(e)
card_id = ''
try:
court = html.xpath('//td[@id="courtNameDetail"]/text()')[0]
except Exception as e:
print(e)
court = ''
try:
area = html.xpath('//td[@id="areaNameDetail"]/text()')[0]
except Exception as e:
print(e)
area = ''
try:
case_code = html.xpath('//td[@id="caseCodeDetail"]/text()')[0]
except Exception as e:
print(e)
case_code = ''
try:
case_time = html.xpath('//td[@id="regDateDetail"]/text()')[0]
except Exception as e:
print(e)
case_time = ''
xgl_info = {"xgl": {
"name": name,
"sexy": sexy,
"card_id": card_id,
"court": court,
"area": area,
"case_code": case_code,
"case_time": case_time
}}
info.append(xgl_info)
else:
pass
sx_trs = html.xpath('//table[@id="sx"]/tr')
if sx_trs:
print("失信被执行人")
try:
name = html.xpath('//td[@id="inameDetail"]/text()')[0]
except Exception as e:
print(e)
name = ''
try:
sexy = html.xpath('//td[@id="sexDetail"]/text()')[0]
except Exception as e:
print(e)
sexy = ''
try:
card_id = html.xpath('//td[@id="cardNumDetail"]/text()')[0]
except Exception as e:
print(e)
card_id = ''
try:
court = html.xpath('//td[@id="courtNameDetail"]/text()')[0]
except Exception as e:
print(e)
court = ''
try:
area = html.xpath('//td[@id="areaNameDetail"]/text()')[0]
except Exception as e:
print(e)
area = ''
try:
gist_id = html.xpath('//td[@id="gistIdDetail"]/text()')[0]
except Exception as e:
print(e)
gist_id = ''
try:
case_time = html.xpath('//td[@id="regDateDetail"]/text()')[0]
except Exception as e:
print(e)
case_time = ''
try:
case_code = html.xpath('//td[@id="caseCodeDetail"]/text()')[0]
except Exception as e:
print(e)
case_code = ''
try:
gist_unit = html.xpath('//td[@id="gistUnitDetail"]/text()')[0]
except Exception as e:
print(e)
gist_unit = ''
try:
duty = html.xpath('//td[@id="dutyDetail"]/text()')[0]
except Exception as e:
print(e)
duty = ''
try:
performance = html.xpath('//td[@id="performanceDetail"]/text()')[0]
except Exception as e:
print(e)
performance = ''
try:
disrupt_typename = html.xpath('//td[@id="disruptTypeNameDetail"]/text()')[0]
except Exception as e:
print(e)
disrupt_typename = ''
try:
publish_date = html.xpath('//td[@id="publishDateDetail"]/text()')[0]
except Exception as e:
print(e)
publish_date = ''
sx_info = {"sx": {
"name": name,
"sexy": sexy,
"card_id": card_id,
"court": court,
"area": area,
"gist_id": gist_id,
"case_time": case_time,
"case_code": case_code,
"gist_unit": gist_unit,
"duty": duty,
"performance": performance,
"disruptTypeName": disrupt_typename,
"publish_date": publish_date
}}
info.append(sx_info)
else:
pass
return {casecode_newdel: info}
def zhixing_company_list(self, cardnum, pname, captchaid, current_page=1):
result = self.recognize_image(captchaid)
url = "http://zxgk.court.gov.cn/zhzxgk/newsearch"
payload = {
'currentPage': current_page,
'searchCourtName': '全国法院(包含地方各级法院)',
'selectCourtId': '0',
'selectCourtArrange': '1',
'pname': pname,
'cardNum': cardnum,
'j_captcha': result.get('j_captcha'),
'countNameSelect': '',
'captchaId': result.get('captchaId')
}
response = session.request("POST", url, data=payload, headers=HEADERS)
while "验证码错误" in response.text:
time.sleep(1)
result = self.recognize_image(captchaid)
try:
payload['j_captcha'] = result.get('j_captcha')
except Exception as e:
print(e)
response = session.request("POST", url, data=payload, headers=HEADERS)
else:
temps = re.search('1/\\d{1,4}', response.text).group()
max_page = int(temps.replace('1/', ''))
print("共{}页数据".format(max_page))
all_info = []
for page in range(1, max_page + 1):
print("*" * 100)
print("正在爬取关键词{}第{}页数".format(cardnum, page))
print("*" * 100)
payload['currentPage'] = page
response = session.request("POST", url, data=payload, headers=HEADERS)
while "验证码错误" in response.text:
result = self.recognize_image(captchaid)
payload['j_captcha'] = result.get('j_captcha')
response = session.request("POST", url, data=payload, headers=HEADERS)
else:
html = etree.HTML(response.text)
trs = html.xpath('//table/tbody/tr')
for tr in trs[1:]:
tds = tr.xpath('.//td/text()')
print(tds)
name = tds[1]
case_no = tds[3]
print(name, result.get('j_captcha'), case_no, captchaid)
info = self.zhixing_company_detail(name, cardnum, result.get('j_captcha'), case_no, captchaid)
all_info.append(info)
time.sleep(0.5)
return all_info
def zhixing_company_detail(self, pname, cardnum, j_captcha_newdel,
casecode_newdel, captchaid_newdel):
url = "http://zxgk.court.gov.cn/zhzxgk/detailZhcx.do?pnameNewDel={}&" \
"cardNumNewDel={}&j_captchaNewDel={}&caseCodeNewDel={}&captchaIdNewDel=" \
"{}".format(pname, cardnum, j_captcha_newdel, casecode_newdel, captchaid_newdel)
print(url)
response = requests.get(url, headers=HEADERS)
html = etree.HTML(response.text)
while "验证码错误" in response.text:
print("验证码错误,正在重试")
result = self.recognize_image(captchaid_newdel)
self.zhixing_company_detail(pname, cardnum, result.get('j_captcha'), casecode_newdel, captchaid_newdel)
else:
info = []
bzxr_trs = html.xpath('//div[text()="被执行人"]')
if bzxr_trs:
print("被执行人")
try:
name = html.xpath('//td[@id="pnameDetail"]/text()')[0]
except Exception as e:
print(e)
name = ''
try:
card_id = html.xpath('//td[@id="partyCardNumDetail"]/text()')[0]
except Exception as e:
print(e)
card_id = ''
try:
sexy = html.xpath('//td[@id="Detail"]/text()')[0]
except Exception as e:
print(e)
sexy = ''
try:
court = html.xpath('//td[@id="execCourtNameDetail"]/text()')[0]
except Exception as e:
print(e)
court = ''
try:
case_time = html.xpath('//td[@id="caseCreateTimeDetail"]/text()')[0]
except Exception as e:
print(e)
case_time = ''
try:
case_code = html.xpath('//td[@id="caseCodeDetail"]/text()')[0]
except Exception as e:
print(e)
case_code = ''
try:
target = html.xpath('//td[@id="execMoneyDetail"]/text()')[0]
except Exception as e:
print(e)
target = ''
bzxr_info = {"bzxr": {
"name": name,
"card_id": card_id,
"sexy": sexy,
"court": court,
"case_time": case_time,
"case_code": case_code,
"target": target
}}
info.append(bzxr_info)
else:
pass
zb_trs = html.xpath('//div[text()="终本案件"]')
if zb_trs:
print("终本案件")
try:
case_code = html.xpath('//td[@id="ahDetail"]/text()')[0]
except Exception as e:
print(e)
case_code = ''
try:
name = html.xpath('//td[@id="xmDetail"]/text()')[0]
except Exception as e:
print(e)
name = ''
try:
card_id = html.xpath('//td[@id="sfzhmDetail"]/text()')[0]
except Exception as e:
print(e)
card_id = ''
try:
court = html.xpath('//td[@id="zxfymcDetail"]/text()')[0]
except Exception as e:
print(e)
court = ''
try:
case_time = html.xpath('//td[@id="larqDetail"]/text()')[0]
except Exception as e:
print(e)
case_time = ''
try:
final_date = html.xpath('//td[@id="jarqDetail"]/text()')[0]
except Exception as e:
print(e)
final_date = ''
try:
target = html.xpath('//td[@id="sqzxbdjeDetail"]/text()')[0]
except Exception as e:
print(e)
target = ''
try:
money = html.xpath('//td[@id="swzxbdjeDetail"]/text()')[0]
except Exception as e:
print(e)
money = ''
zb_info = {"zb": {"case_code": case_code, "name": name, "card_id": card_id,
"court": court, "case_time": case_time, "final_date": final_date, "target": target,
"amount": money}}
info.append(zb_info)
else:
pass
xgl_trs = html.xpath('//div[text()="限制消费人员"]')
if xgl_trs:
print("限制消费人员")
try:
name = html.xpath('//td[@id="inameDetail"]/text()')[0]
except Exception as e:
print(e)
name = ''
try:
sexy = html.xpath('//td[@id="sexDetail"]/text()')[0]
except Exception as e:
print(e)
sexy = ''
try:
card_id = html.xpath('//td[@id="cardNumDetail"]/text()')[0]
except Exception as e:
print(e)
card_id = ''
try:
court = html.xpath('//td[@id="courtNameDetail"]/text()')[0]
except Exception as e:
print(e)
court = ''
try:
area = html.xpath('//td[@id="areaNameDetail"]/text()')[0]
except Exception as e:
print(e)
area = ''
try:
case_code = html.xpath('//td[@id="caseCodeDetail"]/text()')[0]
except Exception as e:
print(e)
case_code = ''
try:
case_time = html.xpath('//td[@id="regDateDetail"]/text()')[0]
except Exception as e:
print(e)
case_time = ''
xgl_info = {"xgl": {
"name": name,
"sexy": sexy,
"card_id": card_id,
"court": court,
"area": area,
"case_code": case_code,
"case_time": case_time
}}
info.append(xgl_info)
else:
pass
sx_trs = html.xpath('//div[text()="失信被执行人"]')
if sx_trs:
print("失信被执行人")
try:
name = html.xpath('//td[@id="inameDetail"]/text()')[0]
except Exception as e:
print(e)
name = ''
try:
card_id = html.xpath('//td[@id="cardNumDetail"]/text()')[0]
except Exception as e:
print(e)
card_id = ''
try:
businessEntityName = html.xpath('//td[@id="businessEntityDetail"]/text()')[0]
except Exception as e:
print(e)
businessEntityName = ''
try:
court = html.xpath('//td[@id="courtNameDetail"]/text()')[0]
except Exception as e:
print(e)
court = ''
try:
area = html.xpath('//td[@id="areaNameDetail"]/text()')[0]
except Exception as e:
print(e)
area = ''
try:
gist_id = html.xpath('//td[@id="gistIdDetail"]/text()')[0]
except Exception as e:
print(e)
gist_id = ''
try:
case_time = html.xpath('//td[@id="regDateDetail"]/text()')[0]
except Exception as e:
print(e)
case_time = ''
try:
case_code = html.xpath('//td[@id="caseCodeDetail"]/text()')[0]
except Exception as e:
print(e)
case_code = ''
try:
gist_unit = html.xpath('//td[@id="gistUnitDetail"]/text()')[0]
except Exception as e:
print(e)
gist_unit = ''
try:
duty = html.xpath('//td[@id="dutyDetail"]/text()')[0]
except Exception as e:
print(e)
duty = ''
try:
performance = html.xpath('//td[@id="performanceDetail"]/text()')[0]
except Exception as e:
print(e)
performance = ''
try:
disrupt_typename = html.xpath('//td[@id="disruptTypeNameDetail"]/text()')[0]
except Exception as e:
print(e)
disrupt_typename = ''
try:
publish_date = html.xpath('//td[@id="publishDateDetail"]/text()')[0]
except Exception as e:
print(e)
publish_date = ''
try:
businessEntity = html.xpath('//td[@id="publishDateDetail"]/../following-sibling::tr/td[2]/text()')[
0]
except Exception as e:
print(e)
businessEntity = ''
sx_info = {"sx": {
"name": name,
"businessEntityName": businessEntityName,
"card_id": card_id,
"court": court,
"area": area,
"gist_id": gist_id,
"case_time": case_time,
"case_code": case_code,
"gist_unit": gist_unit,
"duty": duty,
"performance": performance,
"disruptTypeName": disrupt_typename,
"publish_date": publish_date,
"businessEntity": businessEntity
}}
info.append(sx_info)
else:
pass
return {casecode_newdel: info}
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/wy031530/zxgk.git
git@gitee.com:wy031530/zxgk.git
wy031530
zxgk
zxgk
master

搜索帮助