master

分支 (1)

管理

管理

master

BeiWang-2FenYuanRenGongZhiNen-1804C-u
/
多线下载.py

from spider import Spider
import threading,_thread
from collections import deque
import glob


'''
url = 'https://www.x23us.com/html/66/66656/'

book_author_regex = '<meta name="og:novel:author" content="(.*?)"/> '

book_name_regex = '<meta name="og:novel:book_name" content="(.*?)"/>'

book_chapter_regex = '<td class="L"><a href="(.*?).html">(.*?)</a></td>'

x = Spider(url).get_info(book_author = book_author_regex,
                         book_name = book_name_regex,
                         book_chapter=book_chapter_regex,
                         )

'''

class Duoxian(Spider):

    def __init__(self,
                 url = 'https://www.x23us.com/html/66/66656/',
                 charset = 'gbk',
                 ):
        super().__init__()
        #Spiedr.__init__(self,url,charset)
        self.book_info_dict = self.book_info()

        self.book_name = self.book_info_dict['book_name'][0]

        self.book_info = deque(list(enumerate(self.book_info_dict['book_info'])))

        self.book_author = self.book_info_dict['book_author'][0]

    def book_info(self):

        book_info_regex = '<td class="L"><a href="(.*?)">(.*?)</a></td>'

        book_name_regex = '<meta property="og:title" content="(.*?)"/>'

        book_author_regex = '<meta name="og:novel:author" content="(.*?)"/>'

        if not glob.glob('save_index.txt'):

            return self.get_info(book_info = book_info_regex,
                              book_name = book_name_regex,
                              book_author = book_author_regex,
                              )
        else:
            with open('save_index.txt','r') as ff:

                site = ff.read()
                print(site)

            info_dict =  self.get_info(book_info = book_info_regex,
                              book_name = book_name_regex,
                              book_author = book_author_regex,
                              )

            info_dict['book_info'] = info_dict['book_info'][int(site):]

            return info_dict


    def chapter_info(self,chapter_url):


        chapter_content_regex = '<dd id="contents">(.*?)</dd>'

        return Spider(chapter_url).get_info(
            chapter_content = chapter_content_regex)


    def book_save(self):

        while True:

            info = self.book_info.popleft()
            print(info[0])

            if glob.glob('save_index.txt'):

                with open('save_index.txt','r') as f:

                    site = f.read()

                with open('save_index.txt','w') as f:

                    f.write(str(int(site)+1))

            else:
                with open('save_index.txt','w') as f:

                    f.write('0')

            chapter_url = self.url+info[1][0]
            chapter_name = info[1][1]
            print(chapter_name)
            #print(chapter_name,threading.current_thread())
            content = self.chapter_info(chapter_url)['chapter_content'][0]

            with open('书名：{}--作者：{}.txt'.format(self.book_name,
                                                self.book_author),'a') as f:

                f.write(chapter_name)
                f.write('\n\n')
                f.write(content)
                f.write('\n\n')


        '''

        book_info_dict = self.book_info()

        book_name = book_info_dict['book_name'][0]

        book_info = book_info_dict['book_info']

        book_author = book_info_dict['book_author'][0]
        #print(book_info)

        with open('书名：{}--作者：{}.txt'.format(book_name,
                                            book_author),'a') as f:

            for url_id,chapter_name in book_info[:3]:
                print(chapter_name)
                chapter_url = self.url+url_id
                content = self.chapter_info(chapter_url)['chapter_content'][0]
                f.write(chapter_name)
                f.write('\n\n')
                f.write(content)
                f.write('\n\n')'''

    def my_thread(self):

        pool = []

        for i in range(1):

            pool.append(threading.Thread(target=self.book_save))
        for i in pool:

            i.start()


if __name__ == '__main__':

    Duoxian().book_save()

    #Duoxian().my_thread()