1 Star 0 Fork 11

WSS/北网-2分院人工智能-1804C-资管

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
多线下载.py 4.41 KB
一键复制 编辑 原始数据 按行查看 历史
zhangpengju 提交于 2018-10-12 08:32 . '小说下载框架及x23us实例'
from spider import Spider
import threading,_thread
from collections import deque
import glob
'''
url = 'https://www.x23us.com/html/66/66656/'
book_author_regex = '<meta name="og:novel:author" content="(.*?)"/> '
book_name_regex = '<meta name="og:novel:book_name" content="(.*?)"/>'
book_chapter_regex = '<td class="L"><a href="(.*?).html">(.*?)</a></td>'
x = Spider(url).get_info(book_author = book_author_regex,
book_name = book_name_regex,
book_chapter=book_chapter_regex,
)
'''
class Duoxian(Spider):
def __init__(self,
url = 'https://www.x23us.com/html/66/66656/',
charset = 'gbk',
):
super().__init__()
#Spiedr.__init__(self,url,charset)
self.book_info_dict = self.book_info()
self.book_name = self.book_info_dict['book_name'][0]
self.book_info = deque(list(enumerate(self.book_info_dict['book_info'])))
self.book_author = self.book_info_dict['book_author'][0]
def book_info(self):
book_info_regex = '<td class="L"><a href="(.*?)">(.*?)</a></td>'
book_name_regex = '<meta property="og:title" content="(.*?)"/>'
book_author_regex = '<meta name="og:novel:author" content="(.*?)"/>'
if not glob.glob('save_index.txt'):
return self.get_info(book_info = book_info_regex,
book_name = book_name_regex,
book_author = book_author_regex,
)
else:
with open('save_index.txt','r') as ff:
site = ff.read()
print(site)
info_dict = self.get_info(book_info = book_info_regex,
book_name = book_name_regex,
book_author = book_author_regex,
)
info_dict['book_info'] = info_dict['book_info'][int(site):]
return info_dict
def chapter_info(self,chapter_url):
chapter_content_regex = '<dd id="contents">(.*?)</dd>'
return Spider(chapter_url).get_info(
chapter_content = chapter_content_regex)
def book_save(self):
while True:
info = self.book_info.popleft()
print(info[0])
if glob.glob('save_index.txt'):
with open('save_index.txt','r') as f:
site = f.read()
with open('save_index.txt','w') as f:
f.write(str(int(site)+1))
else:
with open('save_index.txt','w') as f:
f.write('0')
chapter_url = self.url+info[1][0]
chapter_name = info[1][1]
print(chapter_name)
#print(chapter_name,threading.current_thread())
content = self.chapter_info(chapter_url)['chapter_content'][0]
with open('书名:{}--作者:{}.txt'.format(self.book_name,
self.book_author),'a') as f:
f.write(chapter_name)
f.write('\n\n')
f.write(content)
f.write('\n\n')
'''
book_info_dict = self.book_info()
book_name = book_info_dict['book_name'][0]
book_info = book_info_dict['book_info']
book_author = book_info_dict['book_author'][0]
#print(book_info)
with open('书名:{}--作者:{}.txt'.format(book_name,
book_author),'a') as f:
for url_id,chapter_name in book_info[:3]:
print(chapter_name)
chapter_url = self.url+url_id
content = self.chapter_info(chapter_url)['chapter_content'][0]
f.write(chapter_name)
f.write('\n\n')
f.write(content)
f.write('\n\n')'''
def my_thread(self):
pool = []
for i in range(1):
pool.append(threading.Thread(target=self.book_save))
for i in pool:
i.start()
if __name__ == '__main__':
Duoxian().book_save()
#Duoxian().my_thread()
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/WANGSUHAIS/BeiWang-2FenYuanRenGongZhiNen-1804C-u.git
git@gitee.com:WANGSUHAIS/BeiWang-2FenYuanRenGongZhiNen-1804C-u.git
WANGSUHAIS
BeiWang-2FenYuanRenGongZhiNen-1804C-u
北网-2分院人工智能-1804C-资管
master

搜索帮助