1 Star 0 Fork 0

bluedream_pp/pythonFirstInHead

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
dianyingtiantang.py 3.72 KB
一键复制 编辑 原始数据 按行查看 历史
# -*- coding: utf-8 -*-
import requests
from requests.exceptions import RequestException
import urllib2
import httplib
import os
import re
import time
make_where = ['中国']
def getdownload(url):
# print url
user_agent = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.82 Safari/537.36'
headers = {'User-Agent': user_agent, 'timeout': 900}
request = urllib2.Request(url, headers=headers)
try:
response = urllib2.urlopen(request)
# if response.getcode() == 200:
# print response.getcode()
except (IOError, httplib.HTTPException,httplib.BadStatusLine) as e:
print url + '下载页面发生了异常'
time.sleep(60)
return ''
else:
c = response.read()
c = c.decode('gb2312', 'ignore').encode('utf-8', 'ignore')
pattern = re.compile('<a href="ftp://(.*?)">', re.S)
# print conent
items = re.findall(pattern, c)
return "".join(items)
# print getdownload("http://www.ygdy8.net/html/gndy/dyzz/20171103/55422.html");
#
# url = 'http://www.ygdy8.net/html/gndy/dyzz/index.html' #这是电影天堂最新电影的网站
def page(url,i):
user_agent = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.82 Safari/537.36'
headers = {'User-Agent': user_agent, 'timeout': 900}
request = urllib2.Request(url, headers=headers)
try:
response = urllib2.urlopen(request)
except (IOError, httplib.HTTPException,httplib.BadStatusLine) as e:
print url + '列表页面发生异常',e
time.sleep(60)
return ''
else:
conent = response.read()
conent = conent.decode('gb2312','ignore').encode('utf-8','ignore')
pattern = re.compile ('<table width="100%" border="0" cellspacing="0" cellpadding="0"'
' class="tbspan" style="margin-top:6px">.*?</table>',re.S)
# 先把前十页的电影,把评分大于 8 的电影打印出来
# 把下载的链接下载下来
items = re.findall(pattern,conent)#先把含有最新电影的网页代码选出来,再进行下一次筛选
str1 = ''.join(items)
pattern = re.compile ('<a href="(.*?)" class="ulink">(.*?)</a>.*?<td colspan.*?>(.*?)</td>',re.S)
news = re.findall(pattern, str1)
index = str(i)
file = open('/Users/wangyifei/dianyingtiantang/movieList'+index+'.mv','w')#创建一个txt文件保存爬到的电影名,简介,下载页面
file.write('最新电影:\n\n')
for j in news:
# file.write('片名:'+j[1]+'\n'+'简介:'+j[2]+'\n'+'下载地址:'+getdownload('http://www.ygdy8.net'+j[0])+'\n'+'\n')
ptn = re.compile('◎豆瓣评分 (.*?)/10',re.S)
score = re.findall(ptn,j[1])
if len(score) == 0:
if True in map(lambda x: x in j[2] , make_where):
print '下载',j[1]
file.write('片名:'+j[1]+'\n'+'简介:'+j[2]+'\n'+'下载地址:'+'http://www.dytt8.net'+j[0]+'\n'+'\n')
else:
if int(score[1]) > 8 :
print '下载',j[1]
file.write('片名:' + j[1] + '\n' + '简介:' + j[2] + '\n' + '下载地址:' + 'http://www.dytt8.net' + j[
0] + '\n' + '\n')
file.close()
page_index = 1
try:
for i in range(1,100):
page_index = i
page("http://www.dytt8.net/html/gndy/dyzz/list_23_"+str(i)+".html",i)
print i
except Exception,e:
print "发生异常了",e
time.sleep(60)
for i in range(page_index+1, 100):
page_index = i
page("http://www.dytt8.net/html/gndy/dyzz/list_23_" + str(i) + ".html", i)
print i
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/bluedream_pp/pythonFirstInHead.git
git@gitee.com:bluedream_pp/pythonFirstInHead.git
bluedream_pp
pythonFirstInHead
pythonFirstInHead
master

搜索帮助