代码拉取完成,页面将自动刷新
import requests
import lxml
import time
from lxml import etree
j_li = []
session = requests.session()
url = 'https://www.lagou.com/mycenter/collections.html'
cal_url = 'https://www.lagou.com/mycenter/collectPositoin.json'
headers = {
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Content-Length': '25',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'Cookie': '_ga=GA1.2.140279281.1565597374; user_trace_token=20190812160936-86a0449c-bcd8-11e9-894e-525400f775ce; LGUID=20190812160936-86a047d4-bcd8-11e9-894e-525400f775ce; index_location_city=%E4%B8%8A%E6%B5%B7; LG_HAS_LOGIN=1; gray=resume; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%225388642%22%2C%22%24device_id%22%3A%2216d235e1084e0-0c7d99955cc3aa-34564b7b-2073600-16d235e10852af%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%2C%22%24os%22%3A%22Windows%22%2C%22%24browser%22%3A%22Chrome%22%2C%22%24browser_version%22%3A%2270.0.3538.25%22%7D%2C%22first_id%22%3A%2216d235e1084e0-0c7d99955cc3aa-34564b7b-2073600-16d235e10852af%22%7D; JSESSIONID=ABAAABAAADEAAFIC7B37551D947AF03BDD5958C28FD73D2; _gid=GA1.2.2078235352.1570533590; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1569724251,1569808329,1569829537,1570533590; LGSID=20191008191955-8e255c5e-e9bd-11e9-9891-525400f775ce; PRE_UTM=; PRE_HOST=; PRE_SITE=; PRE_LAND=https%3A%2F%2Fpassport.lagou.com%2Flogin%2Flogin.html%3Fsignature%3DF182FCD8D12CC6727CAF55EEAA0F63A7%26service%3Dhttp%25253A%25252F%25252Fwww.lagou.com%25252Fmycenter%25252Fcollections.html%25253FpageNo%25253D20%26action%3Dlogin%26serviceId%3Dlagou%26ts%3D1570533594234; gate_login_token=11a818d60b515c89d0c12ea212c33a54e6a3a3e78b329ed3; LG_LOGIN_USER_ID=3c89068e96647f2a6e9c6cdaa6f2fbf8768e0657c83e973e; _putrc=ED543FD8CF457C4F; login=true; unick=%E5%90%B4%E8%B6%85; showExpriedIndex=1; showExpriedCompanyHome=1; showExpriedMyPublish=1; hasDeliver=362; privacyPolicyPopup=false; X_HTTP_TOKEN=c9128fc60817126f606335075123906486a0263e5a; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1570533601; LGRID=20191008192006-94b6fab9-e9bd-11e9-a53a-5254005c3644',
'Host': 'www.lagou.com',
'Origin': 'https://www.lagou.com',
'Referer': 'https://www.lagou.com/mycenter/collections.html?pageNo=1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3732.400 QQBrowser/10.5.3819.400',
'X-Anit-Forge-Code': '29791447',
'X-Anit-Forge-Token': 'b67d70c1-c4e0-4440-b890-9d42e95aa2f2',
'X-Requested-With': 'XMLHttpRequest',
}
def page(p):
params = {
'pageNo': p,
}
r = requests.get(url, params=params, headers=headers)
print(r.status_code, r.url)
html = r.content.decode()
with open('lagou.html', 'w', encoding='utf8')as f:
f.write(html)
html_xp = etree.HTML(html)
job_li = html_xp.xpath('//form[@id="collectionsForm"]/ul/li')
for job in job_li:
job_id = job.xpath('./@data-id')[0]
state = job.xpath('./div/span[last()]/text()')[0]
# a_href=job.xpath('./a[last()]/@href')
print(job_id, state)
if '已下线' in state:
j_li.append(job_id)
data = {
'positionId': job_id,
'type': 0,
}
qw = requests.post(cal_url, data=data, headers=headers)
# qw = requests.post(cal_url, json=data, headers=headers)
print(qw.status_code, qw.url, qw.json())
time.sleep(1)
for i in range(1, 21):
page(i)
print(j_li)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。