1 Star 0 Fork 0

钱途/监控爬虫

Create your Gitee Account
Explore and code with more than 12 million developers,Free private repositories !:)
Sign up
文件
Clone or Download
telzhenxuan11.py 2.75 KB
Copy Edit Raw Blame History
钱途 authored 2020-12-02 09:21 . selenium拦截network信息爬虫
# -*- coding:utf-8 -*-
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver import Chrome
from selenium.webdriver import ChromeOptions
from browsermobproxy import Server
from selenium.webdriver.common.keys import Keys
server = Server(r'D:\test\browsermob-proxy-2.1.4\bin\browsermob-proxy.bat') #拦截network 信息用
server.start()
proxy = server.create_proxy()
option = ChromeOptions()
option.add_experimental_option('excludeSwitches', ['enable-automation']) #防止webdriver被屏蔽
option.add_argument('--proxy-server={0}'.format(proxy.proxy))
option.add_argument('--ignore-certificate-errors') #去除网站不安全提示
driver = Chrome(options=option)
proxy.new_har("douyin", options={'captureHeaders': True, 'captureContent': True})
driver.get('https://fxg.jinritemai.com/login')
time.sleep(60)
# input_str = driver.find_element_by_id('order_id')
# input_str.send_keys('4722606443658190139')
time.sleep(1)
jg = open('臻选抖店数据.txt', 'w', encoding='utf-8')
list1 = []
try:
for num in range(1, 350):
button = driver.find_element_by_xpath('//li//div//input[@type="text"]') #分别对应三个层级的标签
button.send_keys(str(num))
button.send_keys(Keys.ENTER)
time.sleep(10)
result = proxy.har
log = result["log"]
entries = log["entries"]
for entrie in entries:
response = entrie["response"]
response1 = str(response)
response2 = response1.split('post')
for line in response2:
line = line.strip()
if '_tel' in line:
try:
line1 = line.split('_tel":"')[1].split('"')[0]
line1 = line1.strip()
print(line1)
jg.write(line1+'\n')
except Exception as e:
print(e)
# for entry in result['log']['entries']:
# jg.write(str(entry))
except Exception as e:
print(e)
# for entry in result['log']['entries']:
# _url = entry['request']['url']
# # 根据URL找到数据接口
# if "/api/v2/aweme/post" in _url:
# _response = entry['response']
# _content = _response['content']['text']
# # 获取接口返回内容
# print(_content)
server.stop()
#driver.quit()
# jg1 = open('臻选未下载记录.txt', 'w', encoding='utf-8')
# jg = open('臻选抖店数据.txt', 'w', encoding='utf-8')
# f1 = open('臻选订单号.txt', 'r', encoding='utf-8')
#
#
#
jg.close()
# jg1.close()
# f1.close()
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/buau654/monitoring_crawlers.git
git@gitee.com:buau654/monitoring_crawlers.git
buau654
monitoring_crawlers
监控爬虫
master

Search