代码拉取完成,页面将自动刷新
#-*- codeing = utf-8 -*-
#@Time : 2021/2/1 15:51
#@Author : Zyh
#@File : finallytest.py
#@Software : PyCharm
import re
import urllib.response,urllib.request
import urllib.error
import xlwt
from bs4 import BeautifulSoup
def main():
baseurl = "https://www.dota2.com.cn/heroes"
baseurl2 = "https://www.dota2.com.cn/items/index.htm"
AllHtml = getAllHtml(baseurl)
datalist = getData(AllHtml)
dbpath = ""
saveDB()
# savepath = "./Dota2Hero.xls"
# saveData(datalist,savepath)
#------re
findLink = re.compile(r'<a class="heroPickerIconLink.*" href="(.*?)".*>',re.S)
findImgSrc = re.compile(r'<img.*src="(.*?)/>',re.S)
findName = re.compile(r'<p><span>(.*)</span><br/>(.*?) </p>',re.S)
def getAllHtml(baseurl):
dataList = []
html = getHtml(baseurl)
soup = BeautifulSoup(html,"html.parser")
for item in soup.find_all("a"):
# data = []
item = str(item)
# print(type(item))
html = re.findall(findLink,item)
if html:
# print(type(html))
# data.append(html)
dataList.append(html)
dataList.sort(key=lambda k:k[0].lower())
# print(dataList)
return dataList
def getHtml(baseurl):
head = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36"
}
request = urllib.request.Request(baseurl,headers=head)
html = ""
try:
response = urllib.request.urlopen(request)
html = response.read().decode("utf-8")
except urllib.error as e :
if hasattr(e,"code"):
print(e.code)
return html
def getData(Allhteml):
imgfornt = "https://www.dota2.com.cn"
datalist = []
i = 0
for item1 in Allhteml:
# print(type(item[0]))
html = getHtml(item1[0])
soup = BeautifulSoup(html,"html.parser")
for item in soup.find_all("div",class_="top_hero_card"):
data = []
# print(type(item))
# print(item)
item = str(item)
imgSrc = re.findall(findImgSrc,item)
#---clear str
img = re.sub("(.[0-9]+)","",imgSrc[0])
name = re.findall(findName,item)
# print(name[0][1])
if imgSrc:
data.append(item1[0])
data.append(imgfornt+img)
data.append(name[0][1])
data.append(name[0][0])
datalist.append(data)
# print(datalist)
print("Di %d tiao"%i)
i+=1
# if i >2:
# break
# print(datalist[0])
# print(len(datalist[0]))
print("findSuccess")
return datalist
def saveData(datalist,savepath):
book = xlwt.Workbook(encoding="utf-8",style_compression=0)
sheet = book.add_sheet('Dota2Hero', cell_overwrite_ok=True)
col = ("英雄链接", "英雄图片", "英雄英文名称","英雄中文名称")
len1 = len(col)
for i in range(0, len1):
sheet.write(0, i, col[i])
for x,item in enumerate(datalist):
for j in range(0,len1):
sheet.write(x+1,j,item[j])
book.save(savepath)
print("savescuess")
def saveDB():
return
if __name__ == '__main__':
main()
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。