master

Branches (1)

Manage

Manage

master

monitoring_crawlers
/
telzhenxuan11.py

# -*- coding:utf-8 -*-
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver import Chrome
from selenium.webdriver import ChromeOptions
from browsermobproxy import Server
from selenium.webdriver.common.keys import Keys


server = Server(r'D:\test\browsermob-proxy-2.1.4\bin\browsermob-proxy.bat') #拦截network 信息用
server.start()
proxy = server.create_proxy()


option = ChromeOptions()
option.add_experimental_option('excludeSwitches', ['enable-automation']) #防止webdriver被屏蔽
option.add_argument('--proxy-server={0}'.format(proxy.proxy))
option.add_argument('--ignore-certificate-errors') #去除网站不安全提示

driver = Chrome(options=option)

proxy.new_har("douyin", options={'captureHeaders': True, 'captureContent': True})

driver.get('https://fxg.jinritemai.com/login')

time.sleep(60)
# input_str = driver.find_element_by_id('order_id')
# input_str.send_keys('4722606443658190139')
time.sleep(1)

jg = open('臻选抖店数据.txt', 'w', encoding='utf-8')
list1 = []

try:
    for num in range(1, 350):
        button = driver.find_element_by_xpath('//li//div//input[@type="text"]')   #分别对应三个层级的标签
        button.send_keys(str(num))
        button.send_keys(Keys.ENTER)
        time.sleep(10)
        result = proxy.har
        log = result["log"]
        entries = log["entries"]

        for entrie in entries:
            response = entrie["response"]
            response1 = str(response)
            response2 = response1.split('post')
            for line in response2:
                line = line.strip()
                if '_tel' in line:
                    try:
                        line1 = line.split('_tel":"')[1].split('"')[0]
                        line1 = line1.strip()
                        print(line1)
                        jg.write(line1+'\n')
                    except Exception as e:
                        print(e)

        # for entry in result['log']['entries']:
        #     jg.write(str(entry))

except Exception as e:
    print(e)

# for entry in result['log']['entries']:
#     _url = entry['request']['url']
#     # 根据URL找到数据接口
#     if "/api/v2/aweme/post" in _url:
#         _response = entry['response']
#         _content = _response['content']['text']
#         # 获取接口返回内容
#         print(_content)

server.stop()
#driver.quit()


# jg1 = open('臻选未下载记录.txt', 'w', encoding='utf-8')
# jg = open('臻选抖店数据.txt', 'w', encoding='utf-8')
# f1 = open('臻选订单号.txt', 'r', encoding='utf-8')
#
#
#
jg.close()
# jg1.close()
# f1.close()