代码拉取完成,页面将自动刷新
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
from pyquery import PyQuery as pq
import pymysql.cursors
import re
# mysql 连接
connection = pymysql.connect(host='localhost',
user='root',
password='root',
db='py',
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
cursor = connection.cursor()
# 打开浏览器
ops = webdriver.ChromeOptions()
prefs = {
'profile.default_content_setting_values': {
'images': 2,
}
}
ops.add_experimental_option('prefs', prefs)
ops.add_argument("--proxy-server=http://116.17.102.49:3128")
driver = webdriver.Chrome(chrome_options=ops)
# 登录
def login():
driver.get("http://www.hezongyy.com/#/login")
# try:
# user = WebDriverWait(driver, 10).until(
# EC.presence_of_element_located((By.CSS_SELECTOR, "#login > div.main > div > div.inputBox > form > div:nth-child(1) > div > div.el-input > input"))
# )
# password = WebDriverWait(driver, 10).until(
# EC.presence_of_element_located((By.CSS_SELECTOR, "#login > div.main > div > div.inputBox > form > div:nth-child(2) > div > div.el-input.el-input--suffix > input"))
# )
# submit = WebDriverWait(driver, 10).until(
# EC.presence_of_element_located((By.CSS_SELECTOR, "#login > div.main > div > div.inputBox > form > div:nth-child(4) > div > button"))
# )
#
# user.send_keys('青白江百草堂')
# password.send_keys('aa123456')
# submit.click()
# time.sleep(3)
# except BaseException as e:
# print(e)
# driver.close()
# 读取列表
def lists(page):
try:
# 跳转诊所专区
driver.execute_script('location.href="https://www.hezongyy.com/#/clinicArea"')
# 跳转诊所专区全部列表
href = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "#app > div > div.menu > div > div.menuAll > div > div > ul.sortOneActive > li > div:nth-child(1)"))
)
href.click()
time.sleep(3)
s = list(range(1, page + 1, 1))
for i in s:
print(i)
if i != 1:
next = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR,"#app > div > div.clinciList > div.container > div.page > div > button.btn-next"))
)
next.click()
WebDriverWait(driver, 10).until(
EC.presence_of_element_located(
(By.CSS_SELECTOR, "#app > div > div.clinciList > div.container > div.goodsList > ul > li:nth-child(1)"))
)
time.sleep(2)
dom = pq(driver.page_source, parser="html")
lists = dom("#app > div > div.clinciList > div.container > div.goodsList > ul > li.goods").items()
for item in lists:
price = item.find("div > div > div.price > span").text()
name = item.find("div > div > p.name").text()
price = re.search('(\d+\.\d+)',price).group()
cj = item.find("div > div > p.company.height24").text()
gg = item.find("div > div > p:nth-child(5)").text()[3:]
xq = item.find("div > div > p:nth-child(6) > span:nth-child(1)").text()[3:]
jzl = item.find("div > div > p:nth-child(6) > span:nth-child(2)").text()[4:]
kc = item.find("div > div > p:nth-child(7) > span:nth-child(1)").text()[3:]
zbz = item.find("div > div > p:nth-child(7) > span:nth-child(3)").text()[4:]
sql = "INSERT INTO `test_goods` (`name`, `price`, `cj`, `gg`, `jzl`, `zbz`, `xq`, `kc`) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"
print(sql % (name, price, cj, gg, jzl, zbz, xq, kc))
cursor.execute(sql, (name, price, cj, gg, jzl, zbz, xq, kc))
connection.commit()
except BaseException as e:
print(e)
def main():
login()
# lists(2)
# connection.close()
# driver.close()
# connection.close()
if __name__ == '__main__':
main()
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。