代码拉取完成,页面将自动刷新
第一题
import scrapy
class A1Spider(scrapy.Spider):
name = 'a1'
allowed_domains = ['https://www.imooc.com/course/list']
start_urls = ['http://https://www.imooc.com/course/list/']
def parse(self, response):
author = response.xpath("//div[@class='img']/@style").getall()
booktype = response.xpath("//p[@class='title ellipsis2']/text()").getall()
book1 = response.xpath("//p[@class='one']/text()").getall()
book2 = response.xpath("//p[@class='two clearfix']/span/text()").getall()
for i,j,k,l in zip(author,booktype,book1,book2):
yield{
i:j
},{
k:l
}
第二题
def parse(self, response):
myitem = Test1Item()
print(response.url)
name1 = response.xpath("//li/h2/a/text()").getall()
number1 = response.xpath("//li/span/text()[2]").getall()
cash1 = response.xpath("//li/a/img//@src").getall()
print(name1)
for i in number1:
print(i[1:])
print(cash1)
第三题
def parse(self, response):
myitem = Test1Item()
print(response.url)
a1 = response.xpath("//div[@class='title']/a/text()").getall()
a2 = response.xpath("//div[@class='houseInfo']/text()").getall()
a3 = response.xpath("//div[@class='totalPrice totalPrice2']/span/text()").getall()
a4 = response.xpath("//div[@class='unitPrice']/span/text()").getall()
for i,j,k,l in zip(a1,a2,a3,a4):
yield{
i:j
},{
k:l
}
第四题
def parse(self, response):
myitem = Test1Item()
print(response.url)
name1 = response.xpath("//li[@class='item']/a/p[2]/text()").getall()
number1 = response.xpath("//div[@class='comm_box clearfix']/p/span/text()").getall()
cash1 = response.xpath("//div[@class='comm_box clearfix']/p[@class='score']/text()").getall()
imgurl1 = response.xpath("//div[@class='comm_box clearfix']/p[@class='comm_num']/text()").getall()
test1 = response.xpath("//p[@class='price']/span[@class='y rmb']/text()").getall()
test2 = response.xpath("//p[@class='price']/text()").getall()
test3 = response.xpath("//p[@class='price']/span[@class='qi']/text()").getall()
print(name1)
for i in range(len(number1)):
print(number1[i]+" "+cash1[i])
print(imgurl1)
for i in range(len(test3)):
print(test1[i]+test2[i]+test3[i])
第五题
def parse(self, response):
myitem = Test1Item()
print(response.url)
xhtml = response.json()
for i in range(50):
print(xhtml['payload']['players'][i]['playerProfile']['displayName'])
print(xhtml['payload']['players'][i]['teamProfile']['displayAbbr'])
print(xhtml['payload']['players'][i]['statAverage']['pointsPg'])
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。