1 Star 0 Fork 0

夏先生/python

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
hzzssp.py 4.30 KB
一键复制 编辑 原始数据 按行查看 历史
夏先生 提交于 2020-11-25 11:48 . 'first'
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
from pyquery import PyQuery as pq
import pymysql.cursors
import re
# mysql 连接
connection = pymysql.connect(host='localhost',
user='root',
password='root',
db='py',
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
cursor = connection.cursor()
# 打开浏览器
ops = webdriver.ChromeOptions()
prefs = {
'profile.default_content_setting_values': {
'images': 2,
}
}
ops.add_experimental_option('prefs', prefs)
ops.add_argument("--proxy-server=http://116.17.102.49:3128")
driver = webdriver.Chrome(chrome_options=ops)
# 登录
def login():
driver.get("http://www.hezongyy.com/#/login")
# try:
# user = WebDriverWait(driver, 10).until(
# EC.presence_of_element_located((By.CSS_SELECTOR, "#login > div.main > div > div.inputBox > form > div:nth-child(1) > div > div.el-input > input"))
# )
# password = WebDriverWait(driver, 10).until(
# EC.presence_of_element_located((By.CSS_SELECTOR, "#login > div.main > div > div.inputBox > form > div:nth-child(2) > div > div.el-input.el-input--suffix > input"))
# )
# submit = WebDriverWait(driver, 10).until(
# EC.presence_of_element_located((By.CSS_SELECTOR, "#login > div.main > div > div.inputBox > form > div:nth-child(4) > div > button"))
# )
#
# user.send_keys('青白江百草堂')
# password.send_keys('aa123456')
# submit.click()
# time.sleep(3)
# except BaseException as e:
# print(e)
# driver.close()
# 读取列表
def lists(page):
try:
# 跳转诊所专区
driver.execute_script('location.href="https://www.hezongyy.com/#/clinicArea"')
# 跳转诊所专区全部列表
href = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "#app > div > div.menu > div > div.menuAll > div > div > ul.sortOneActive > li > div:nth-child(1)"))
)
href.click()
time.sleep(3)
s = list(range(1, page + 1, 1))
for i in s:
print(i)
if i != 1:
next = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR,"#app > div > div.clinciList > div.container > div.page > div > button.btn-next"))
)
next.click()
WebDriverWait(driver, 10).until(
EC.presence_of_element_located(
(By.CSS_SELECTOR, "#app > div > div.clinciList > div.container > div.goodsList > ul > li:nth-child(1)"))
)
time.sleep(2)
dom = pq(driver.page_source, parser="html")
lists = dom("#app > div > div.clinciList > div.container > div.goodsList > ul > li.goods").items()
for item in lists:
price = item.find("div > div > div.price > span").text()
name = item.find("div > div > p.name").text()
price = re.search('(\d+\.\d+)',price).group()
cj = item.find("div > div > p.company.height24").text()
gg = item.find("div > div > p:nth-child(5)").text()[3:]
xq = item.find("div > div > p:nth-child(6) > span:nth-child(1)").text()[3:]
jzl = item.find("div > div > p:nth-child(6) > span:nth-child(2)").text()[4:]
kc = item.find("div > div > p:nth-child(7) > span:nth-child(1)").text()[3:]
zbz = item.find("div > div > p:nth-child(7) > span:nth-child(3)").text()[4:]
sql = "INSERT INTO `test_goods` (`name`, `price`, `cj`, `gg`, `jzl`, `zbz`, `xq`, `kc`) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"
print(sql % (name, price, cj, gg, jzl, zbz, xq, kc))
cursor.execute(sql, (name, price, cj, gg, jzl, zbz, xq, kc))
connection.commit()
except BaseException as e:
print(e)
def main():
login()
# lists(2)
# connection.close()
# driver.close()
# connection.close()
if __name__ == '__main__':
main()
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/xiayen/python.git
git@gitee.com:xiayen/python.git
xiayen
python
python
master

搜索帮助