1 Star 1 Fork 1

李赟辉/梨视频短视频抓取

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
downloads_video.py 3.75 KB
一键复制 编辑 原始数据 按行查看 历史
17611666527 提交于 2020-05-05 22:36 . 梨视频短视频抓取
import re
import os
import requests
from urllib.request import urlretrieve
from setting import start_url
from setting import category
header = {
'Access-Control-Allow-Credentials': 'true',
'Access-Control-Allow-Methods': 'GET,HEAD,PUT,POST,DELETE',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Content-Encoding': 'gzip',
'Content-Type': 'application/json;charset=UTF-8',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36',
'Cookie': '__mta=251081401.1572934390047.1574775365015.1574775365018.30; '
'_lxsdk_cuid=16b88512b3fc8-057b10a5bdb7e7-e343166-15f900-16b88512b3fc8; '
'_lxsdk=16b88512b3fc8-057b10a5bdb7e7-e343166-15f900-16b88512b3fc8; '
'_hc.v=183cbc21-7956-a2b2-824f-a4c810ea98a1.1561360150; s_ViewType=10; aburl=1; '
'_dp.ac.v=d20caa18-0d30-4a21-8641-bc4a673ddb06; '
'ctu=02ae32092e43de1eab6ed9dfdc3a765f976569575ed66b947d717def70ba6012; '
'ua=%E6%97%B1%E5%9C%B0%E6%9C%89%E6%A0%B9%E8%83%A1%E8%90%9D%E5%8D%9C; '
'uudid=cms41448392-3c4a-b185-3248-267203d258ea; __utma=1.1256113168.1575279313.1575279313.1575279313.1; '
'__utmz=1.1575279313.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); '
'Hm_lvt_dbeeb675516927da776beeb1d9802bd4=1574775348,1574910357,1575366638,1575451098; '
'radius=223.192.87.133; cy=219; cye=dongguan; _lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; '
'dper=36092ccdaea90c4337e1ad6f901f3bb9f74a8aee3a0d912b18047b51352d0730ccc138dbb340ebe57e1eb655c3a3e9af7577e979acc5478cc386c427747717a3c465e1bfee27aa6a941a09615b3b2f1210263ae7388116e462be5767885e7d03; ll=7fd06e815b796be3df069dec7836c3df; uamo=17611666527; _lxsdk_s=16efcbceecc-e9-ec4-dbb%7C%7C689 '
}
def Schedule(blocknum, blocksize, totalsize):
'''''
blocknum:已经下载的数据块
blocksize:数据块的大小
totalsize:远程文件的大小
'''
per = 100.0 * blocknum * blocksize / totalsize
if per > 100:
per = 100
print('当前下载进度:%d' % per)
# 获取当前请求页面的视频id
def get_video_id(url):
resp = requests.get(url=url, headers=header).text
res = re.compile(r'<a href="(video_.+?)" class="vervideo-lilink actplay">')
video_id_list = res.findall(resp)
if video_id_list:
return video_id_list
else:
return None
# 获取视频地址
def get_video_url(url):
response = requests.get(url=url, headers=header).text
resp_url = re.compile('srcUrl="(.+?.mp4)"')
video_url = resp_url.findall(response)[0]
resp_name = re.compile('<h1 class="video-tt">(.+?)</h1>')
video_name = resp_name.findall(response)[0].replace('"','').replace('/','').replace('\\','').replace(':','').replace('*','').replace('?','').replace('<','').replace('>','').replace('|','')
return video_url, video_name
# 循环请求网页,下载视频到本地
def run():
num = 0
while True:
req_url = start_url.replace('$', category).replace('#', str(num))
print('当前请求地址{}'.format(req_url))
video_id_list = get_video_id(req_url)
if video_id_list:
for video in video_id_list:
url = 'https://www.pearvideo.com/' + video
video_url, video_name = get_video_url(url)
path = r'D:\code\梨视频\video\{}.mp4'.format(video_name)
if os.path.exists(path=path):
pass
else:
urlretrieve(video_url, path, Schedule)
print('下拉继续获取。。。')
num += 12
else:
print('当前分类数据爬取完毕!')
break
if __name__ == '__main__':
run()
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/love_yun/pear_video.git
git@gitee.com:love_yun/pear_video.git
love_yun
pear_video
梨视频短视频抓取
master

搜索帮助