1 Star 0 Fork 0

周欣/爬虫zy

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
12.03 3.19 KB
一键复制 编辑 原始数据 按行查看 历史
周欣 提交于 2021-12-08 02:12 . add 12.03.
1.
import scrapy
class LessonSpider(scrapy.Spider):
name = 'lesson'
allowed_domains = ['https://www.imooc.com/course/list']
start_urls = ['https://www.imooc.com/course/list']
def parse(self, response):
print(response.status)
biaoti = response.xpath("//div[@class='img']/@style").getall()
print(biaoti)
xzcs = response.xpath("//p[@class='title ellipsis2']/text()").getall()
print(xzcs)
tplj = response.xpath("//a[@class='item free ']/p[2]/text()").getall()
print(tplj)
danjia = response.xpath("//span[@class='l']/text()").getall()
print(danjia)
2.
import scrapy
class LessonSpider(scrapy.Spider):
name = 'lesson'
allowed_domains = ['http://www.1ppt.com/moban/']
start_urls = ['http://www.1ppt.com/moban/']
def parse(self, response):
print(response.status)
biaoti = response.xpath("//dl[@class='dlbox']/dd/ul/li/h2/a/text()").getall()
print(biaoti)
xzcs = response.xpath("//ul[@class='tplist']/li/span/text()[2]").getall()
print(xzcs)
tplj = response.xpath("//ul[@class='tplist']/li/a/img/@src").getall()
print(tplj)
3.
import scrapy
class LessonSpider(scrapy.Spider):
name = 'lesson'
allowed_domains = ['https://jh.lianjia.com/ershoufang/']
start_urls = ['https://jh.lianjia.com/ershoufang/']
def parse(self, response):
print(response.status)
biaoti = response.xpath("//div[@class='title']/a/text()").getall()
print(biaoti)
xzcs = response.xpath("//div[@class='houseInfo']/text()").getall()
print(xzcs)
tplj = response.xpath("//div[@class='totalPrice totalPrice2']/span/text()").getall()
print(tplj)
danjia = response.xpath("//div[@class='unitPrice']/span/text()").getall()
print(danjia)
4.
import scrapy
class LessonSpider(scrapy.Spider):
name = 'lesson'
allowed_domains = ['https://hotel.qunar.com/']
start_urls = ['https://hotel.qunar.com/']
def parse(self, response):
print(response.status)
biaoti = response.xpath("//p[@class='name text-elli-one']/text()").getall()
print(biaoti)
xzcs = response.xpath("//p[@class='score']/span/text()").getall()
print(xzcs)
tplj = response.xpath("//p[@class='comm_num']/text()").getall()
print(tplj)
danjia = response.xpath("//p[@class='price']/text()").getall()
print(danjia)
5.
import scrapy
class LessonSpider(scrapy.Spider):
name = 'lesson'
allowed_domains = ['https://china.nba.com/static/data/league/playerstats_All_All_All_0_All_false_2021_2_All_Team_points_All_perGame.json']
start_urls = ['https://china.nba.com/static/data/league/playerstats_All_All_All_0_All_false_2021_2_All_Team_points_All_perGame.json']
def parse(self, response):
print(response.status)
xhtml = response.json()
for i in range(50):
print(xhtml['payload']['players'][i]['playerProfile']['displayName'])
print(xhtml['payload']['players'][i]['teamProfile']['displayAbbr'])
print(xhtml['payload']['players'][i]['statAverage']['pointsPg'])
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/zhou-xin607/zx2.git
git@gitee.com:zhou-xin607/zx2.git
zhou-xin607
zx2
爬虫zy
master

搜索帮助

0d507c66 1850385 C8b1a773 1850385