1 Star 0 Fork 1

CarlosWukong/Spider_XHS

forked from liuxiangcen/Spider_XHS 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
home.py 3.50 KB
一键复制 编辑 原始数据 按行查看 历史
Your Name 提交于 2024-02-06 11:08 . a
import requests
from one import OneNote
from profile import Profile
from xhs_utils.xhs_util import get_headers, get_params, js, check_cookies
class Home:
def __init__(self, cookies=None):
if cookies is None:
self.cookies = check_cookies()
else:
self.cookies = cookies
self.more_url = 'https://edith.xiaohongshu.com/api/sns/web/v1/user_posted'
self.profile = Profile(self.cookies)
self.oneNote = OneNote(self.cookies)
self.headers = get_headers()
self.params = get_params()
# 主页
def get_all_note_info(self, url):
profile = self.profile.get_profile_info(url)
note_id_list = []
user_id = profile.userId
cursor = ''
self.params['user_id'] = user_id
self.params['cursor'] = cursor
while True:
api = f"/api/sns/web/v1/user_posted?num=30&cursor={cursor}&user_id={user_id}&image_scenes="
ret = js.call('get_xs', api, '', self.cookies['a1'])
self.headers['x-s'], self.headers['x-t'] = ret['X-s'], str(ret['X-t'])
response = requests.get(self.more_url, headers=self.headers, cookies=self.cookies, params=self.params)
res = response.json()
data = res["data"]
if not data["notes"][0]:
print(f"用户{user_id}没有笔记")
break
cursor, has_more, note_list = data["cursor"], data["has_more"], data["notes"]
self.params['cursor'] = cursor
for note in note_list:
note_id_list.append(note['note_id'])
if not has_more:
break
return note_id_list, profile
# 主页
def save_all_note_info(self, url, need_cover=False):
profile = self.profile.save_profile_info(url)
user_id = profile.userId
cursor = ''
self.params['user_id'] = user_id
self.params['cursor'] = cursor
index = 0
while True:
api = f"/api/sns/web/v1/user_posted?num=30&cursor={cursor}&user_id={user_id}&image_scenes="
ret = js.call('get_xs', api, '', self.cookies['a1'])
self.headers['x-s'], self.headers['x-t'] = ret['X-s'], str(ret['X-t'])
response = requests.get(self.more_url, headers=self.headers, cookies=self.cookies, params=self.params)
res = response.json()
data = res["data"]
if not data["notes"][0]:
print(f"用户{user_id}没有笔记")
break
cursor, has_more, note_list = data["cursor"], data["has_more"], data["notes"]
self.params['cursor'] = cursor
for note in note_list:
index += 1
info = f'第{index}个笔记, '
self.oneNote.save_one_note_info(self.oneNote.detail_url + note['note_id'], need_cover, info)
if not has_more:
break
print(f'用户 {profile.nickname} 全部视频信息保存成功')
def main(self, url_list):
# url_list = [
# 'https://www.xiaohongshu.com/user/profile/6185ce66000000001000705b',
# 'https://www.xiaohongshu.com/user/profile/6034d6f20000000001006fbb',
# ]
for url in url_list:
try:
self.save_all_note_info(url)
except:
print(f'用户 {url} 查询失败')
if __name__ == '__main__':
home = Home()
url_list = [
"https://www.xiaohongshu.com/user/profile/5b31d099e8ac2b122cd65999"
]
home.main(url_list)
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/carloswukong/Spider_XHS.git
git@gitee.com:carloswukong/Spider_XHS.git
carloswukong
Spider_XHS
Spider_XHS
master

搜索帮助