1 Star 0 Fork 0

xyislove/python-test

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
豆瓣电影分类爬取requests版-初级.py 3.14 KB
一键复制 编辑 原始数据 按行查看 历史
xyislove 提交于 2022-01-03 23:16 . 2022.1.3
from http.cookiejar import Cookie
import requests
import re
from fake_useragent import UserAgent
import csv
import codecs # 修正写入csv乱码问题
import os
import random
import json
import pprint
def re_get(url, headers,cookie):
resp = requests.get(url=url, headers=headers,cookies=cookie)
resp.close()
return resp
def write_txt(resp, i):
data = resp.json()
with open(f'./豆瓣电影相关/动画分类排行{i}_json.txt', mode='w', encoding='utf-8') as f:
json.dump(data, f)
if __name__ == '__main__':
ua = UserAgent()
headers = {'User-Agent': ua.random}
# 最初级cookie的用法 即 直接复制粘贴 但cookie必须是字典格式 dic格式的
cookie={
'Cookie':'bid=A9x7UhKR-kQ; douban-fav-remind=1; __gads=ID=a8b2eead276238ae-2292ce7449cc0075:T=1634119699:RT=1634119699:S=ALNI_MYLUtx9tRos0IJTohhMeYy-4tYzlQ; ll="118395"; _vwo_uuid_v2=D9358C018C053888C5BFF75014EF814DE|6e73b2569811eb5db049e650e44d3396; ap_v=0,6.0; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1641191010%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DPyyu4gGTjKfeTGDuOyIpXEESeU_AqbCiT4qasEZkXgtDFwuaahv3dc9D1ak4yMxN%26wd%3D%26eqid%3D93aa498b0075cfa80000000361d29659%22%5D; _pk_ses.100001.4cf6=*; __utma=30149280.726177789.1634119708.1641021977.1641191011.9; __utmz=30149280.1641191011.9.7.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; __utmc=30149280; __utma=223695111.2017483695.1636534214.1641021977.1641191011.7; __utmb=223695111.0.10.1641191011; __utmc=223695111; __utmz=223695111.1641191011.7.5.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; push_noty_num=0; push_doumail_num=0; dbcl2="229186203:lAXosxyymmo"; ck=5q7k; __utmt=1; __utmv=30149280.22918; __utmb=30149280.6.10.1641191011; _ga=GA1.2.726177789.1634119708; _gid=GA1.2.496071198.1641191387; _pk_id.100001.4cf6=ec7bc008968bb104.1636534214.7.1641191391.1641023683.'
}
for i in range(0, 130, 20):
# print(i)
url = f'https://movie.douban.com/j/chart/top_list?type=25&interval_id=100%3A90&action=&start={i}&limit=20'
print(url)
resp = re_get(url, headers,cookie)
print(resp.status_code)
resp.close()
# print(resp)
write_txt(resp, i)
print(f'前{i}完成......')
print('全部完成!!!')
# 注:重点 即 当request获取的是json类型的数据 用.text方法获取的是str 用.json()方法获取的是python类型的数据 建议用json()
# print(type(inital_data))
# data = inital_data.text
# with open('./豆瓣_str.txt', mode='w', encoding='utf-8') as f:
# f.write(data)
# print(type(data))
# print(data)
# executable_data = inital_data.json()
# # 错误写法 注 重点 即 python类型的数据 只有str即只有字符串可以直接用write()函数写入
# with open('./豆瓣_py.txt', mode='w', encoding='utf-8') as f:
# f.write(executable_data)
# ###############################################################################3
# with open('./豆瓣_json.txt', mode='a', encoding='utf-8') as f:
# json.dump(executable_data, f)
# print(type(executable_data))
# pprint.pprint(executable_data)
# print(executable_data)
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/xyislove/python-test.git
git@gitee.com:xyislove/python-test.git
xyislove
python-test
python-test
master

搜索帮助