1 Star 0 Fork 1

cthousand/爬虫项目6 JS逆向之url参数模拟

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
js逆向之加密参数破解.py 2.94 KB
一键复制 编辑 原始数据 按行查看 历史
cthousand 提交于 2022-05-02 04:41 . update js逆向.py.
import requests
from playwright.sync_api import sync_playwright
from loguru import logger
import pymysql
class Spider():
# init
def __init__(self):
self.BASEURL = 'https://spa6.scrape.center'
self.MAX_PAGE = 10
self.LIMIT = 10
self.browser = sync_playwright().start().chromium.launch()
self.page = self.browser.new_page()
self.page.route('https://spa6.scrape.center/js/chunk-19c920f8.c3a1129d.js',
lambda route: route.fulfill(path='./项目6/chunk.js'))
self.page.route('https://spa6.scrape.center/js/chunk-4dec7ef0.e4c2b130.js',
lambda route: route.fulfill(path='./项目6/chunk-id.js'))
self.page.goto(self.BASEURL)
self.INDEX_URL = self.BASEURL + \
'/api/movie?limit={limit}&offset={offset}&token={token}'
self.DETAIL_URL = self.BASEURL+'/api/movie/{id}/?token={token}'
self.db = pymysql.connect(host='cq13292957303.mysql.rds.aliyuncs.com', user='qianqian',
password='Chenqian1234', database='test1')
self.cursor = self.db.cursor()
# todo:token解密
def get_token(self, params):
result = self.page.evaluate(
'()=>{return window.encrypt("%s")}' % params)
return result
# todo: download_method
def download(self, url):
res = requests.get(url=url)
logger.info(res.url)
return res
# todo:解析:拿到id
def parse_getId(self, json):
for i in json['results']:
id = i['id']
yield id
# todo:id揭秘
def get_id(self, params):
result = self.page.evaluate(
'()=>{return window.encrypt_id("%s")}' % params)
return result
# todo:解析>拿到标题,评分,简介
def parse_getData(self, json):
title = json['name']
rating = json['score']
brif = json['drama']
data = [title, rating, brif]
logger.info(data)
return data
# todo:保存
def save(self, data):
sql = 'insert into movies(title,rating_num,brif) values(%s,%s,%s)\
on duplicate key update title=%s,rating_num=%s,brif=%s'
self.cursor.execute(sql, data*2)
self.db.commit()
# todo:方法调度
def main(self):
for i in range(self.MAX_PAGE):
index_url = self.INDEX_URL.format(
limit=self.LIMIT, offset=i*10, token=self.get_token('/api/movie'))
res = self.download(url=index_url)
id = self.parse_getId(res.json())
for i in id:
detail_url = self.DETAIL_URL.format(id=self.get_id(
i), token=self.get_token('/api/movie/'+self.get_id(i)))
res = self.download(url=detail_url)
data = self.parse_getData(res.json())
self.save(data)
self.db.close()
logger.info('over!')
if __name__ == "__main__":
spider = Spider()
spider.main()
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/cthousand/item-6.git
git@gitee.com:cthousand/item-6.git
cthousand
item-6
爬虫项目6 JS逆向之url参数模拟
master

搜索帮助