1 Star 0 Fork 0

xyislove/python-test

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
网易云音乐评论抓取.py 4.85 KB
一键复制 编辑 原始数据 按行查看 历史
xyislove 提交于 2022-01-28 20:54 . 22.1.28
# 网易云评论抓取 含js逆向
# 'sortTypeList': [{'sortType': 1,
# 'sortTypeName': '按推荐排序',
# 'target': 'order_by_alg'},
# {'sortType': 2,
# 'sortTypeName': '按热度排序',
# 'target': 'order_by_hot'},
# {'sortType': 3,
# 'sortTypeName': '按时间排序',
# 'target': 'order_by_time'}],
#
from email import header
from re import M
import bs4
import requests
import random
import os
from fake_useragent import UserAgent
import json
import pprint
# pip install pycryptodome 安装加密算法库
from Crypto.Cipher import AES
# 导入b64解密模块
from base64 import b64encode
# js解密 逆向的 python模拟
url = 'https://music.163.com/weapi/comment/resource/comments/get?csrf_token=' # 评论的请求 其中
# "https://music.163.com/weapi/comment/resource/comments/get?csrf_token="
# csrf_token=参数表示 是否登录 请求方式为post
# 1.找到未加密的参数
# 2.依据网易云的逻辑加密参数 parmas , encSeckey
# 3.请求,获取评论信息
# 要点 查看 initiator
# 在程序发送请求的代码行处 设置断点 目的寻找发送的原始参数
# 原始参数
# cursor: -1
# offset: 0
# orderType: 1
# pageNo: 1
# pageSize: 20
# rid: "R_SO_4_66282"
# threadId: "R_SO_4_66282"
# var bVj7c = window.asrsea(JSON.stringify(i7b), bsR1x(["流泪", "强"]), bsR1x(Xp4t.md), bsR1x(["爱心", "女孩", "惊恐", "大笑"]));
# window.asrsea(参数(即原始参数),参数2,参数3,参数4)
# # 加密的js过程
# """
# !function() {
# function a(a) {
# var d, e, b = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", c = "";
# for (d = 0; a > d; d += 1) #循环16次
# e = Math.random() * b.length, #随机数
# e = Math.floor(e),#取整 1
# c += b.charAt(e);#取字符串的某位置
# return c
# }
# function b(a, b) {
# var c = CryptoJS.enc.Utf8.parse(b)
# , d = CryptoJS.enc.Utf8.parse("0102030405060708")
# , e = CryptoJS.enc.Utf8.parse(a)
# , f = CryptoJS.AES.encrypt(e, c, {
# iv: d,
# mode: CryptoJS.mode.CBC
# });
# return f.toString()
# }
# function c(a, b, c) {
# var d, e;
# return setMaxDigits(131),
# d = new RSAKeyPair(b,"",c),
# e = encryptedString(d, a)
# }
# function d(d, e, f, g) { data ,010001,定值,
# var h = {}
# , i = a(16);
# h.encText = b(d, g),
# h.encText = b(h.encText, i), #返回的就是parmas
# h.encSecKey = c(i, e, f), #得到 encSecKey 其中参数e,f是定值
# return h
# }
# function e(a, b, d, e) {
# var f = {};
# return f.encText = c(a + e, b, d),
# f
# }
# window.asrsea = d,
# window.ecnonasr = e
# }();
# """
e = '010001'
f = '00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7'
g = '0CoJUm6Qyw8W8jud'
i = "IE06IfCaHglEITpd"
encSecKey = "83ff3e4570dc881c7c5a3c2c33aa7506872ee86bf9e19005e3f881cb0dca0df1921c9121b58e82a06b88e8a80cfa521a3b959fb53b7f8ec2bfbb0f25918f6daf771ac41196702baeb24ff181fdbb36ad901b0d17ea3d6ee78aee7e9858f3ce33273cf2f0b69302aeef55caaa15a8ec5aa6530a0be08f6458b59e4f7dbaba0376"
data = {
'csrf_token': "",
'cursor': "-1",
'offset': "0",
'orderType': "1",
'pageNo': "1",
'pageSize': "20",
'rid': "R_SO_4_66282",
'threadId': "R_SO_4_66282"
}
# print('hello,fuck')
# print(f)
# py模拟的加密过程 开始
def to_16(data):
pad = 16-len(data) % 16
data += chr(pad)*pad
# print(data)
return data
def get_params(data, g, i):
one_time = parsms_enc(data, g)
two_time = parsms_enc(one_time, i)
print(two_time)
return two_time
def parsms_enc(data, key):
# key 为密钥
# IV 为偏移量
iv = "0102030405060708"
# mode 加密模式
data = to_16(data)
aes = AES.new(key=key.encode('utf-8'), IV=iv.encode('utf-8'),
mode=AES.MODE_CBC) # 创建加密器
bs = aes.encrypt(data.encode('utf-8'))
return str(b64encode(bs), 'utf-8') # 转化为字符串返回
# 加密过程 结束
if __name__ == '__main__':
data = json.dumps(data) # 将参数转换为json格式的字符串
print(data)
# to_16(data)
# get_params(data, g, i)
ua = UserAgent()
headers = {
'User-Agent': ua.random
}
enc_data = {
'params': get_params(data, g, i),
'encSecKey': encSecKey
}
resp = requests.post(url=url, headers=headers, data=enc_data)
html_srouce = resp.json()
resp.close()
pprint.pprint(html_srouce)
with open('./评论.txt', mode='w', encoding='utf-8') as f:
json.dump(html_srouce, f)
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/xyislove/python-test.git
git@gitee.com:xyislove/python-test.git
xyislove
python-test
python-test
master

搜索帮助