1 Star 1 Fork 1

李赟辉/梨视频短视频抓取

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
video_downloads.py 6.14 KB
一键复制 编辑 原始数据 按行查看 历史
17611666527 提交于 2020-05-05 22:36 . 梨视频短视频抓取
import re
import os
import requests
import tkinter
from tkinter import ttk
from urllib.request import urlretrieve
start_url = 'https://www.pearvideo.com/category_loading.jsp?reqType=5&categoryId=$&start=#'
header = {
'Access-Control-Allow-Credentials': 'true',
'Access-Control-Allow-Methods': 'GET,HEAD,PUT,POST,DELETE',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Content-Encoding': 'gzip',
'Content-Type': 'application/json;charset=UTF-8',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36',
'Cookie': '__mta=251081401.1572934390047.1574775365015.1574775365018.30; '
'_lxsdk_cuid=16b88512b3fc8-057b10a5bdb7e7-e343166-15f900-16b88512b3fc8; '
'_lxsdk=16b88512b3fc8-057b10a5bdb7e7-e343166-15f900-16b88512b3fc8; '
'_hc.v=183cbc21-7956-a2b2-824f-a4c810ea98a1.1561360150; s_ViewType=10; aburl=1; '
'_dp.ac.v=d20caa18-0d30-4a21-8641-bc4a673ddb06; '
'ctu=02ae32092e43de1eab6ed9dfdc3a765f976569575ed66b947d717def70ba6012; '
'ua=%E6%97%B1%E5%9C%B0%E6%9C%89%E6%A0%B9%E8%83%A1%E8%90%9D%E5%8D%9C; '
'uudid=cms41448392-3c4a-b185-3248-267203d258ea; __utma=1.1256113168.1575279313.1575279313.1575279313.1; '
'__utmz=1.1575279313.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); '
'Hm_lvt_dbeeb675516927da776beeb1d9802bd4=1574775348,1574910357,1575366638,1575451098; '
'radius=223.192.87.133; cy=219; cye=dongguan; _lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; '
'dper=36092ccdaea90c4337e1ad6f901f3bb9f74a8aee3a0d912b18047b51352d0730ccc138dbb340ebe57e1eb655c3a3e9af7577e979acc5478cc386c427747717a3c465e1bfee27aa6a941a09615b3b2f1210263ae7388116e462be5767885e7d03; ll=7fd06e815b796be3df069dec7836c3df; uamo=17611666527; _lxsdk_s=16efcbceecc-e9-ec4-dbb%7C%7C689 '
}
def Schedule(blocknum, blocksize, totalsize):
'''''
blocknum:已经下载的数据块
blocksize:数据块的大小
totalsize:远程文件的大小
'''
per = 100.0 * blocknum * blocksize / totalsize
if per > 100:
per = 100
print(per)
text.insert(tkinter.END, '当前下载进度:%d'%per)
# 获取当前请求页面的视频id
def get_video_id(url):
resp = requests.get(url=url, headers=header).text
res = re.compile(r'<a href="(video_.+?)" class="vervideo-lilink actplay">')
video_id_list = res.findall(resp)
if video_id_list:
return video_id_list
else:
return None
# 获取视频地址
def get_video_url(url):
response = requests.get(url=url, headers=header).text
resp_url = re.compile('srcUrl="(.+?.mp4)"')
video_url = resp_url.findall(response)[0]
resp_name = re.compile('<h1 class="video-tt">(.+?)</h1>')
video_name = resp_name.findall(response)[0].replace('"','').replace('/','').replace('\\','').replace(':','').replace('*','').replace('?','').replace('<','').replace('>','').replace('|','')
return video_url, video_name
# 循环请求网页,下载视频到本地
def run():
text.insert(tkinter.END, '撒嘛')
num = 0
category = get_category()
while True:
req_url = start_url.replace('$', category).replace('#', str(num))
text.insert(tkinter.END,'正在获取数据,请稍等。。。')
video_id_list = get_video_id(req_url)
if video_id_list:
for video in video_id_list:
url = 'https://www.pearvideo.com/' + video
video_url, video_name = get_video_url(url)
os.makedirs('video', exist_ok=True)
path = './video/{}.mp4'.format(video_name)
if os.path.exists(path=path):
pass
else:
text.insert(tkinter.END, '视频{}开始下载...'.format(video_name))
urlretrieve(video_url, path, Schedule)
text.insert(tkinter.END, '视频{}下载完成...'.format(video_name))
print('下拉继续获取。。。')
num += 12
else:
print('当前分类数据爬取完毕!')
break
def get_category(*args): # 处理事件,*args表示可变参数
text.insert(tkinter.END, '去他妈的视频爬取')
value = cmb.get()
if value == '社会':
category = '1'
return category
elif value == '世界':
category = '2'
return category
elif value == '财富':
category = '3'
return category
elif value == '娱乐':
category = '4'
return category
elif value == '生活':
category = '5'
return category
elif value == '汽车':
category = '31'
return category
elif value == '美食':
category = '6'
return category
elif value == '科技':
category = '8'
return category
elif value == '体育':
category = '9'
return category
elif value == '音乐':
category = '59'
return category
elif value == '旗帜':
category = '8889'
return category
# 图像化界面
root = tkinter.Tk()
# 添加标题
root.title('梨视频短视频下载器')
# 设置窗口大小
root.geometry('465x400')
# 添加标签
label = tkinter.Label(root, text='请选择分类: ',font=('楷体',18))
# 标签定位
label.grid()
# 文本显示框
text = tkinter.Text(root, font=('楷体',18), width=38,heigh=12)
text.grid(row=1, columnspan=2)
# 创建下拉菜单
cmb = ttk.Combobox(root, font=('楷体',18))
cmb.grid(row=0,column=1)
# 设置下拉菜单中的值
cmb['value'] = ('社会','世界','财富','娱乐','生活','汽车','美食','科技','体育','音乐','旗帜')
# 设置默认值,即默认下拉框中的内容
cmb.current(1)
cmb.bind("<<ComboboxSelected>>",get_category)
# 下载按钮
button_download = tkinter.Button(root, text='点击下载',font=('楷体',18), command=run)
button_download.grid(row=2, column=0, sticky=tkinter.W) # sticky=W 位置最西边
button_stop = tkinter.Button(root, text='停止下载',font=('楷体',18), command=root.quit)
button_stop.grid(row=2, column=1, sticky=tkinter.E)
# 窗口显示
root.mainloop()
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/love_yun/pear_video.git
git@gitee.com:love_yun/pear_video.git
love_yun
pear_video
梨视频短视频抓取
master

搜索帮助