master

分支 (1)

管理

管理

master

Dota2pc
/
finallytest.py

#-*- codeing = utf-8 -*-
#@Time : 2021/2/1 15:51
#@Author : Zyh
#@File : finallytest.py
#@Software : PyCharm


import re
import urllib.response,urllib.request
import urllib.error
import xlwt
from bs4 import BeautifulSoup


def main():
    baseurl = "https://www.dota2.com.cn/heroes"
    baseurl2 = "https://www.dota2.com.cn/items/index.htm"
    AllHtml = getAllHtml(baseurl)
    datalist = getData(AllHtml)
    dbpath = ""
    saveDB()
    # savepath = "./Dota2Hero.xls"
    # saveData(datalist,savepath)


#------re
findLink = re.compile(r'<a class="heroPickerIconLink.*" href="(.*?)".*>',re.S)

findImgSrc = re.compile(r'<img.*src="(.*?)/>',re.S)

findName = re.compile(r'<p><span>(.*)</span><br/>(.*?)					</p>',re.S)


def getAllHtml(baseurl):
    dataList = []
    html = getHtml(baseurl)
    soup = BeautifulSoup(html,"html.parser")
    for item in soup.find_all("a"):
        # data = []
        item = str(item)
        # print(type(item))
        html = re.findall(findLink,item)
        if html:
            # print(type(html))
            # data.append(html)
            dataList.append(html)
    dataList.sort(key=lambda k:k[0].lower())
    # print(dataList)
    return dataList

def getHtml(baseurl):
    head = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36"
    }
    request = urllib.request.Request(baseurl,headers=head)
    html = ""
    try:
        response = urllib.request.urlopen(request)
        html = response.read().decode("utf-8")
    except urllib.error as e :
        if hasattr(e,"code"):
            print(e.code)
    return html


def getData(Allhteml):
    imgfornt = "https://www.dota2.com.cn"
    datalist = []
    i = 0
    for item1 in Allhteml:
        # print(type(item[0]))
        html = getHtml(item1[0])
        soup = BeautifulSoup(html,"html.parser")
        for item in soup.find_all("div",class_="top_hero_card"):
            data = []
            # print(type(item))
            # print(item)
            item = str(item)
            imgSrc = re.findall(findImgSrc,item)
            #---clear str
            img = re.sub("(.[0-9]+)","",imgSrc[0])
            name = re.findall(findName,item)
            # print(name[0][1])
            if imgSrc:
                data.append(item1[0])
                data.append(imgfornt+img)
                data.append(name[0][1])
                data.append(name[0][0])
            datalist.append(data)
            # print(datalist)
            print("Di %d tiao"%i)
            i+=1
        # if i >2:
        #     break
    # print(datalist[0])
    # print(len(datalist[0]))
    print("findSuccess")
    return datalist

def saveData(datalist,savepath):
    book = xlwt.Workbook(encoding="utf-8",style_compression=0)
    sheet = book.add_sheet('Dota2Hero', cell_overwrite_ok=True)
    col = ("英雄链接", "英雄图片", "英雄英文名称","英雄中文名称")
    len1 = len(col)
    for i in range(0, len1):
        sheet.write(0, i, col[i])
    for x,item in enumerate(datalist):
        for j in range(0,len1):
            sheet.write(x+1,j,item[j])
    book.save(savepath)
    print("savescuess")

def saveDB():
    return

if __name__ == '__main__':
    main()