代码拉取完成,页面将自动刷新
同步操作将从 李赟辉/梨视频短视频抓取 强制同步,此操作会覆盖自 Fork 仓库以来所做的任何修改,且无法恢复!!!
确定后同步将在后台操作,完成时将刷新页面,请耐心等待。
import re
import os
import requests
from urllib.request import urlretrieve
from setting import start_url
from setting import category
header = {
'Access-Control-Allow-Credentials': 'true',
'Access-Control-Allow-Methods': 'GET,HEAD,PUT,POST,DELETE',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Content-Encoding': 'gzip',
'Content-Type': 'application/json;charset=UTF-8',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36',
'Cookie': '__mta=251081401.1572934390047.1574775365015.1574775365018.30; '
'_lxsdk_cuid=16b88512b3fc8-057b10a5bdb7e7-e343166-15f900-16b88512b3fc8; '
'_lxsdk=16b88512b3fc8-057b10a5bdb7e7-e343166-15f900-16b88512b3fc8; '
'_hc.v=183cbc21-7956-a2b2-824f-a4c810ea98a1.1561360150; s_ViewType=10; aburl=1; '
'_dp.ac.v=d20caa18-0d30-4a21-8641-bc4a673ddb06; '
'ctu=02ae32092e43de1eab6ed9dfdc3a765f976569575ed66b947d717def70ba6012; '
'ua=%E6%97%B1%E5%9C%B0%E6%9C%89%E6%A0%B9%E8%83%A1%E8%90%9D%E5%8D%9C; '
'uudid=cms41448392-3c4a-b185-3248-267203d258ea; __utma=1.1256113168.1575279313.1575279313.1575279313.1; '
'__utmz=1.1575279313.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); '
'Hm_lvt_dbeeb675516927da776beeb1d9802bd4=1574775348,1574910357,1575366638,1575451098; '
'radius=223.192.87.133; cy=219; cye=dongguan; _lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; '
'dper=36092ccdaea90c4337e1ad6f901f3bb9f74a8aee3a0d912b18047b51352d0730ccc138dbb340ebe57e1eb655c3a3e9af7577e979acc5478cc386c427747717a3c465e1bfee27aa6a941a09615b3b2f1210263ae7388116e462be5767885e7d03; ll=7fd06e815b796be3df069dec7836c3df; uamo=17611666527; _lxsdk_s=16efcbceecc-e9-ec4-dbb%7C%7C689 '
}
def Schedule(blocknum, blocksize, totalsize):
'''''
blocknum:已经下载的数据块
blocksize:数据块的大小
totalsize:远程文件的大小
'''
per = 100.0 * blocknum * blocksize / totalsize
if per > 100:
per = 100
print('当前下载进度:%d' % per)
# 获取当前请求页面的视频id
def get_video_id(url):
resp = requests.get(url=url, headers=header).text
res = re.compile(r'<a href="(video_.+?)" class="vervideo-lilink actplay">')
video_id_list = res.findall(resp)
if video_id_list:
return video_id_list
else:
return None
# 获取视频地址
def get_video_url(url):
response = requests.get(url=url, headers=header).text
resp_url = re.compile('srcUrl="(.+?.mp4)"')
video_url = resp_url.findall(response)[0]
resp_name = re.compile('<h1 class="video-tt">(.+?)</h1>')
video_name = resp_name.findall(response)[0].replace('"','').replace('/','').replace('\\','').replace(':','').replace('*','').replace('?','').replace('<','').replace('>','').replace('|','')
return video_url, video_name
# 循环请求网页,下载视频到本地
def run():
num = 0
while True:
req_url = start_url.replace('$', category).replace('#', str(num))
print('当前请求地址{}'.format(req_url))
video_id_list = get_video_id(req_url)
if video_id_list:
for video in video_id_list:
url = 'https://www.pearvideo.com/' + video
video_url, video_name = get_video_url(url)
path = r'D:\code\梨视频\video\{}.mp4'.format(video_name)
if os.path.exists(path=path):
pass
else:
urlretrieve(video_url, path, Schedule)
print('下拉继续获取。。。')
num += 12
else:
print('当前分类数据爬取完毕!')
break
if __name__ == '__main__':
run()
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。