1 Star 0 Fork 0

sailist/豆瓣爬虫

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
get_tv.py 3.70 KB
一键复制 编辑 原始数据 按行查看 历史
from request_handle import *
import requests
import time
import re
class Eyelab:
ca_types = {'全部':0,'全部节目':-3,
'生活服务':494,'青少':493,
'专题':491,'音乐戏曲':13,
'法制':9,'财经':7,
'新闻':5,'电影':4,
'综艺':2,'体育':1,
'其他':492,'电视剧':3}
tvhead = 'http://pro.eye.lab.kuyun.com/api/eye/EYETvLeaderboardAction?'
pghead = 'http://pro.eye.lab.kuyun.com/api/eye/EYEEpgLeaderboardAction?'
request_type = 'pg'
data = None
ca_type = 0
datarange = None
timerange = '0000_2400'
def __init__(self):
nowtime = time.localtime(time.time())
self.inirequest = "http://pro.eye.lab.kuyun.com/api/eye/EYEEpgLeaderboardAction?"
def set_data(self,year,mouth,day):
self.data = "{}-{:0>2d}-{:0>2d}".format(year,mouth,day)
def set_range(self,startdate,enddate=None):
if enddate == None:
enddate = [startdate[0],startdate[1],startdate[2]+1]
self.datarange = ['-'.join(['{:0>2d}'.format(d) for d in startdate]),
'-'.join(['{:0>2d}'.format(d) for d in enddate])]
def set_time(self,timerange):
self.timerange = timerange
def set_type(self,request_type = 'pg'):
'''
'pg' use 'date'
'tv' use 'daterange'
'''
self.request_type = request_type
def set_catype(self,ca_type):
self.ca_type = ca_type
def check(self):
if self.request_type == 'pg':
if self.data == None:
raise RuntimeError("request_type = pg but no data")
if self.request_type == 'tv':
if self.datarange == None:
raise RuntimeError("request_type = tv but no datarange")
def load(self):
self.check()
if self.request_type == 'pg':#节目排行
command = self.pghead + "time_range={}&tv_type=0" \
"&ca_type={}" \
"&date={}+00%3A00%3A00&uid=&_=1521967401603"\
.format(self.timerange,self.ca_type,self.data)
if self.request_type == 'tv':
command = self.tvhead + 'tv_type=0' \
'&time_range={}' \
'&start={}+00%3A00%3A00' \
'&end={}' \
'+00%3A00%3A00&uid=&_=1521976832129'\
.format(self.timerange,self.datarange[0],self.datarange[1])
response = requests.get(command)
if response.status_code == 200:
content = response.text
else:
raise RuntimeError('status_code = {} with command: ||{}||'.format(response.status_code,command))
content = get_subcontent(content,'[',']',True)
content = re.split("\}[^\}]{|\{|\}", content)
content = [c for c in content if len(c) > 0]
return self.get_recorddict(content)
def get_recorddict(self,lists):
whole_dict = []
for l in lists:#l 包含了一个节目的各种信息
spl = l.split(',')
record_dict = dict()
for s in spl:#s包含了一个节目的一个信息
adict = s.split(':')
if len(adict) < 2 :
continue
adict = [a.replace('"','') for a in adict]
record_dict[adict[0]] = adict[1]
if len(record_dict) > 0:
whole_dict = whole_dict + [record_dict]
return whole_dict
e = Eyelab()
e.set_data(2018,3,25)
e.set_type()
t = e.load()
for i in t:
for i1 in i:
print(i1,i[i1])
print("=========")
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/sailist/watercress_reptile.git
git@gitee.com:sailist/watercress_reptile.git
sailist
watercress_reptile
豆瓣爬虫
master

搜索帮助

0d507c66 1850385 C8b1a773 1850385