1 Star 3 Fork 0

萧石/public_data

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
独步小说网_白银时代.py 2.12 KB
一键复制 编辑 原始数据 按行查看 历史
萧石 提交于 2024-11-21 09:51 . 独步小说网
import requests
# 设置代理
proxies = {
'http': 'http://127.0.0.1:15732',
'https': 'http://127.0.0.1:15732'
}
def get_text(url = "https://www.dbxsd.com/book/p7836/3031015.html"):
headers = {
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"accept-language": "zh-CN,zh;q=0.9",
"cache-control": "no-cache",
"pragma": "no-cache",
"priority": "u=0, i",
"referer": "https://www.dbxsd.com/book/p7836/",
"sec-ch-ua": "\"Chromium\";v=\"124\", \"Google Chrome\";v=\"124\", \"Not-A.Brand\";v=\"99\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Linux\"",
"sec-fetch-dest": "document",
"sec-fetch-mode": "navigate",
"sec-fetch-site": "same-origin",
"sec-fetch-user": "?1",
"upgrade-insecure-requests": "1",
"user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
}
cookies = {
"playcount": "15"
}
response = requests.get(url, headers=headers, cookies=cookies,proxies=proxies)
response.encoding = "utf-8"
print(response.url,response)
from bs4 import BeautifulSoup
soup = BeautifulSoup(response.text, "html.parser")
datas = soup.find('div',id="cont-body").find_all('p')
text =''''''
for data in datas:
text +=data.text.strip() + '\n'
print(text)
return text
def main():
url_dict ={
'https://www.dbxsc.com/book/p7836/3031015.html':16,
'https://www.dbxsd.com/book/p7836/3031016.html':27,
'https://www.dbxsd.com/book/p7836/3031017.html':15,
'https://www.dbxsd.com/book/p7836/3031018.html':26
}
for url,lens in url_dict.items():
for page in range(1,lens+1):
if page == 1:
text = get_text(url)
else:
text = get_text(url=url.replace('.html',f'_{page}.html'))
with open('白银时代1.txt', 'a', encoding='utf-8') as f:
f.write(text+'\n\n\n')
pass
if __name__ == "__main__":
main()
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/beihai_xiaoshi/public_data.git
git@gitee.com:beihai_xiaoshi/public_data.git
beihai_xiaoshi
public_data
public_data
master

搜索帮助