master

分支 (1)

管理

管理

master

crawler-operation
/
lesson14

第一题
import scrapy


class A1Spider(scrapy.Spider):
    name = 'a1'
    allowed_domains = ['https://www.imooc.com/course/list']
    start_urls = ['http://https://www.imooc.com/course/list/']

    def parse(self, response):
        author = response.xpath("//div[@class='img']/@style").getall()
        booktype = response.xpath("//p[@class='title ellipsis2']/text()").getall()
        book1 = response.xpath("//p[@class='one']/text()").getall()
        book2 = response.xpath("//p[@class='two clearfix']/span/text()").getall()
        for i,j,k,l in zip(author,booktype,book1,book2):
            yield{
                i:j
            },{
                k:l
            }


第二题
 def parse(self, response):
        myitem = Test1Item()
        print(response.url)
        name1 = response.xpath("//li/h2/a/text()").getall()
        number1 = response.xpath("//li/span/text()[2]").getall()
        cash1 = response.xpath("//li/a/img//@src").getall()
        print(name1)
        for i in number1:
            print(i[1:])
        print(cash1)


第三题
 def parse(self, response):
        myitem = Test1Item()
        print(response.url)
        a1 = response.xpath("//div[@class='title']/a/text()").getall()
        a2 = response.xpath("//div[@class='houseInfo']/text()").getall()
        a3 = response.xpath("//div[@class='totalPrice totalPrice2']/span/text()").getall()
        a4 = response.xpath("//div[@class='unitPrice']/span/text()").getall()
        for i,j,k,l in zip(a1,a2,a3,a4):
            yield{
                i:j
            },{
                k:l
            }


第四题
def parse(self, response):
        myitem = Test1Item()
        print(response.url)
        name1 = response.xpath("//li[@class='item']/a/p[2]/text()").getall()
        number1 = response.xpath("//div[@class='comm_box clearfix']/p/span/text()").getall()
        cash1 = response.xpath("//div[@class='comm_box clearfix']/p[@class='score']/text()").getall()
        imgurl1 = response.xpath("//div[@class='comm_box clearfix']/p[@class='comm_num']/text()").getall()
        test1 = response.xpath("//p[@class='price']/span[@class='y rmb']/text()").getall()
        test2 = response.xpath("//p[@class='price']/text()").getall()
        test3 = response.xpath("//p[@class='price']/span[@class='qi']/text()").getall()
        print(name1)
        for i in range(len(number1)):
            print(number1[i]+" "+cash1[i])
        print(imgurl1)
        for i in range(len(test3)):
            print(test1[i]+test2[i]+test3[i])


第五题
def parse(self, response):
        myitem = Test1Item()
        print(response.url)
        xhtml = response.json()
        for i in range(50):
            print(xhtml['payload']['players'][i]['playerProfile']['displayName'])
            print(xhtml['payload']['players'][i]['teamProfile']['displayAbbr'])
            print(xhtml['payload']['players'][i]['statAverage']['pointsPg'])