5 Star 1 Fork 0

王泽华/高级软件工程

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
crawler.py 3.89 KB
一键复制 编辑 原始数据 按行查看 历史
王泽华 提交于 2023-12-12 02:54 . update crawler.py.
import requests
from bs4 import BeautifulSoup
import codecs
import urllib.parse
import re
from loguru import logger
igxe_json_fmt = r"https://www.igxe.cn/product/trade/{appid:d}/{igxe_id:d}"
igxe_search_page_fmt = r"https://www.igxe.cn/market/{game:s}?keyword={name:s}"
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Safari/605.1.15"
,"Cookie": "csrf_token=IjE0YWVlYWI0Y2JhZDFiMGNmZGY3ZmJkZjZjNTJlZWRkMGEyNDhiYTMi.GFlb1g.8CGHYCR6O1gMvQIWr3bLL4N6KL0; session=1-Pt9Bdn4IG1Yi-p_kNY_I4sh8fbmxIiPpzVx6K7ivtAhm2042156616; NTES_YD_SESS=5ucdjX3n7JAMUXs23QcKQ95TOUNfHSD12mzM2HOV2SAqLRgoLmWedXm2C7MqSX8W9dxe8vTZhP5HIJvyCqpwVF6iq3wFB7C0Cv4HZGjty4gp1nITWYAOctKA3efhSKLYUIuHI6xdvqc5z0exKh_PBcPlxM5hTDEpv1Mrtsi56gohmd2HQN9ZNTuNGo_V2bhzs.Z4CUyt8ApYbhaKOiL8GdAfoL.Ua.x6CxvbXWS5FpO6a; P_INFO=18292166376|1702349370|1|netease_buff|00&99|null&null&null#bej&null#10#0|&0||18292166376; S_INFO=1702349370|0|0&60##|18292166376; remember_me=U1098270992|tzzQQwCrGf4pGYXR7ipObGP9AIhJ1AoQ; Locale-Supported=zh-Hans; game=csgo; Device-Id=amtunX6EXm1MThoJJ2vU"
,"X-Requested-With": "XMLHttpRequest",
}
headerss = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Safari/605.1.15"
,"X-Requested-With": "XMLHttpRequest",
}
def get_igxe_id(game: str, name: str):
r = requests.get(
igxe_search_page_fmt.format(game=game, name=urllib.parse.quote(name)),
headers=headers,
timeout=30,
)
assert r.status_code == 200, (
"Falied to get igxe id of " + name + " with code: " + str(r.status_code)
)
# parse html
soup = BeautifulSoup(r.text, "html.parser")
data_list = soup.find_all(class_="list list")
assert len(data_list) == 1, "unmatched data list"
candidates = [
a for a in data_list[0].find_all("a") if a.find(class_="name").text == name
]
if len(candidates) == 1:
# the correct one
a = candidates[0]
return eval(re.search(r"/product/\d+/(\d+)", a.attrs["href"]).group(1))
else:
# 0 or >1 candidates; stop
logger.warning("unmatched candidates: " + str(candidates))
return 0
for page in range(1, 2):
response = requests.get(f"https://buff.163.com/api/market/goods?game=csgo&page_num={page}&min_price=10&max_price=500&sort_by=sell_num.desc", headers=headers)
#解码response
assert response.json()["code"] == "OK", str(response.json())
items = response.json()["data"]["items"]
for item in items:
good_name = item.get("name", "0")
print(good_name)
igxe_id = get_igxe_id(game="csgo", name=good_name)
igxeresponse = requests.get(
igxe_json_fmt.format(appid=730, igxe_id=igxe_id),
headers=headerss,
timeout=30,
)
match = re.search(r'"reference_price": "(\d+\.\d+)"', igxeresponse.text)
if match:
first_reference_price = match.group(1)
print("igxe价格:", first_reference_price)
else:
print("没有找到igxe参考价格")
quick_price = eval(item.get("quick_price", "0"))
print("快速价格:",quick_price)
sell_min_price = eval(item.get("sell_min_price", "0"))
print("最低卖价:",sell_min_price)
buy_max_price = eval(item.get("buy_max_price", "0"))
print("求购价格:",buy_max_price)
sell_num = item.get("sell_num", "0")
print("销售量:",sell_num)
steam_market_url = item.get("steam_market_url", "0")
print("steam市场链接:",steam_market_url)
print("igxe id:", igxe_id)
nested_item = item.get("goods_info", {})
steam_price_cny = eval(nested_item.get("steam_price_cny", "0"))
print("steam价格:", steam_price_cny)
#print(decoded_response)
# 解析 HTML 代码
#print(soup)
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/rexwangzehua/advanced-software-engineering.git
git@gitee.com:rexwangzehua/advanced-software-engineering.git
rexwangzehua
advanced-software-engineering
高级软件工程
master

搜索帮助