1 Star 0 Fork 0

Chengzi/video-scrawler

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
555电影爬虫.py 2.90 KB
一键复制 编辑 原始数据 按行查看 历史
Chengzi 提交于 2021-05-27 17:08 . first commit
import codecs
import json
import requests
import re
import threadpool
import ip
import jiexi
from fake_useragent import UserAgent
import csv
import pandas as pd
import numpy as np
import sys
ua = UserAgent()
session = requests.session()
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 "
"Safari/537.36 SLBrowser/7.0.0.1071 SLBChan/30 "
}
video_headers = {'Connection': 'close'}
proxy = {
"http": "http://117.41.186.201:888"
}
print(proxy)
rows = []
def get_video(t,pg):
d = session.get("https://www.o8tv.com/api.php/provide/vod/?ac=detail&t={}&pg={}".format(t,pg), headers=None,
proxies=proxy, timeout=5)
d_js = d.json()
print(d_js["page"], "页")
for single in d_js["list"]:
singlerow = (single["vod_id"], single["type_id"], single["vod_name"]
, single["vod_class"], single["vod_pic"],
single["vod_actor"], single["vod_blurb"], single["vod_remarks"],
single["vod_area"], single["vod_score_all"], single["vod_play_url"])
rows.append(singlerow)
print(singlerow)
def run(i):
r = session.get("https://www.o8tv.com/api.php/provide/vod/?ac=detail&t={}&pg=1".format(i), headers=None,
proxies=proxy, timeout=5)
pagecount = r.json()["pagecount"]
if r.status_code == 200:
for pg in range(1, pagecount+1):
get_video(i,pg)
#
# pool = threadpool.ThreadPool(3) # 线程池设置,最多同时跑两个线程
# tasks = threadpool.makeRequests(get_video,i, range(1, pagecount + 1))
# # makeRequests构造线程task请求,第一个参数是线程函数,第二个是参数数组
# [pool.putRequest(task) for task in tasks]
# # 列表推导式,putRequest向线程池里加task,让pool自己去调度task
# pool.wait() # 等所有任务结束
print("电影结束")
else:
print("errror")
if __name__ == '__main__':
list = [1,2,3,4,13,15,44,45,78,79,80,81,82,83,84,85,86,87,88,90,91,92,94,96,97,98,106,108,109,110,111,112]
pool = threadpool.ThreadPool(3) # 线程池设置,最多同时跑两个线程
tasks = threadpool.makeRequests(run, list)
# makeRequests构造线程task请求,第一个参数是线程函数,第二个是参数数组
[pool.putRequest(task) for task in tasks]
# 列表推导式,putRequest向线程池里加task,让pool自己去调度task
pool.wait() # 等所有任务结束
headers22 = ['vod_id', 'type_id', 'vod_name', 'vod_class', 'vod_pic', 'vod_actor', 'vod_blurb',
'vod_remarks', 'vod_area', 'vod_score_all', 'vod_play_url']
f = codecs.open("chengzisee.csv", 'wb', "utf8")
writer = csv.writer(f)
writer.writerow(headers22)
# 多组数据存放list列表里面
datas = rows
writer.writerows(datas)
f.close()
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/chengzisee/video-scrawler.git
git@gitee.com:chengzisee/video-scrawler.git
chengzisee
video-scrawler
video-scrawler
master

搜索帮助