5 Star 1 Fork 0

王泽华/高级软件工程

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
newcrawler.py 5.63 KB
一键复制 编辑 原始数据 按行查看 历史
import requests
from bs4 import BeautifulSoup
import codecs
import urllib.parse
import re
from loguru import logger
import time
import random
igxe_json_fmt = r"https://www.igxe.cn/product/trade/{appid:d}/{igxe_id:d}"
igxe_search_page_fmt = r"https://www.igxe.cn/market/{game:s}?keyword={name:s}"
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Safari/605.1.15"
,"Cookie": "csrf_token=IjIyMWViMzZjNTMwY2E3MmU0ZjdkOWEzZTUyMmIwOWI0YTBhYzNjZTAi.GI4h0A.GBB7195yBqeJE6NCZh7mm3EgqGU; session=1-zoYIpB99oRThC7qJS5RNzk8uKrOu_S_m2_Fu74RvtyTA2042156616; remember_me=U1098270992|zbFehPUg6HFsTsGFkkcIKvhp7WyczZvk; NTES_YD_SESS=ELSNaJPhxY4ZFjXdMwrEcp.JxB6QHkW83rJV0EZpaA9CRcNfRVhy3uVwILmCnuFhd3ayF4P2z_EgBY4bICvsQj8ACisjlLIUI49g2T0Mb9Nv_ujhEGnKfILUXt8soFbYWbNf9KU.tvufNxjw6HyUITwNW.4XJWMkdYf9Ljhsb_RfDChVZadVYeKzxz_Zw28R7shJP2az4y52IMHKLbfC96G4qkI_ePKFvPb1l_Rs3lTq7; P_INFO=18292166376|1705807897|1|netease_buff|00&99|null&null&null#sxi&610400#10#0|&0||18292166376; S_INFO=1705807897|0|0&60##|18292166376; Locale-Supported=zh-Hans; game=csgo; Device-Id=amtunX6EXm1MThoJJ2vU"
,"X-Requested-With": "XMLHttpRequest",
}
headerss = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Safari/605.1.15"
,"X-Requested-With": "XMLHttpRequest",
}
def get_igxe_id(game: str, name: str):
r = requests.get(
igxe_search_page_fmt.format(game=game, name=urllib.parse.quote(name)),
headers=headers,
timeout=30,
)
assert r.status_code == 200, (
"Falied to get igxe id of " + name + " with code: " + str(r.status_code)
)
# parse html
soup = BeautifulSoup(r.text, "html.parser")
data_list = soup.find_all(class_="list list")
assert len(data_list) == 1, "unmatched data list"
candidates = [
a for a in data_list[0].find_all("a") if a.find(class_="name").text == name
]
if len(candidates) == 1:
# the correct one
a = candidates[0]
return eval(re.search(r"/product/\d+/(\d+)", a.attrs["href"]).group(1))
else:
# 0 or >1 candidates; stop
logger.warning("unmatched candidates: " + str(candidates))
return 0
def crawler():
# file = codecs.open("test.txt", "a+", "utf-8")
datas = []
for page in range(1, 2):
#随机时停0.01-0.1秒
time.sleep(random.uniform(0.01, 0.1))
response = requests.get(f"https://buff.163.com/api/market/goods?game=csgo&page_num={page}&min_price=10&max_price=500&sort_by=sell_num.desc", headers=headers)
#解码response
assert response.json()["code"] == "OK", str(response.json())
items = response.json()["data"]["items"]
for item in items:
# file.write(str(item))
data = {}
good_name = item.get("name", "0")
data["name"] = good_name
# print(good_name)
igxe_id = get_igxe_id(game="csgo", name=good_name)
igxeresponse = requests.get(
igxe_json_fmt.format(appid=730, igxe_id=igxe_id),
headers=headerss,
timeout=30,
)
match = re.search(r'"reference_price": "(\d+\.\d+)"', igxeresponse.text)
if match:
first_reference_price = match.group(1)
data["igxe_price"] = first_reference_price
# print("igxe价格:", first_reference_price)
else:
data["igxe_price"] = "--"
# print("没有找到igxe参考价格")
quick_price = eval(item.get("quick_price", "0"))
data["quick_price"] = quick_price
# print("快速价格:",quick_price)
sell_min_price = eval(item.get("sell_min_price", "0"))
data["sell_min_price"] = sell_min_price
# print("最低卖价:",sell_min_price)
buy_max_price = eval(item.get("buy_max_price", "0"))
data["buy_max_price"] = buy_max_price
# print("求购价格:",buy_max_price)
sell_num = item.get("sell_num", "0")
data["sell_num"] = sell_num
# print("销售量:",sell_num)
steam_market_url = item.get("steam_market_url", "0")
data["steam_market_url"] = steam_market_url
# print("steam市场链接:",steam_market_url)
data["igxe_id"] = igxe_id
# print("igxe id:", igxe_id)
nested_item = item.get("goods_info", {})
steam_price_cny = eval(nested_item.get("steam_price_cny", "0"))
data["steam_price_cny"] = steam_price_cny
# print("steam价格:", steam_price_cny)
buff_id = item.get("id", "0")
data["buff_id"] = buff_id
buff_market_url = f"https://buff.163.com/goods/{buff_id}?from=market#tab=selling"
data["buff_market_url"] = buff_market_url
#print("buff市场链接:",f"https://buff.163.com/goods/{buff_id}?from=market#tab=selling")
igxe_market_url = f"https://www.igxe.cn/product/730/{igxe_id}"
data["igxe_market_url"] = igxe_market_url
#print("igxe市场链接:",f"https://www.igxe.cn/product/730/{igxe_id}")
#最低折扣信息(可在此添加也可在前端添加)
#min_price= min(float(sell_min_price), float(first_reference_price))
#discount = round(min_price/float(steam_price_cny), 2)
#print("最低折扣:", discount)
#data["discount"] = "discount"
datas.append(data)
return datas
# file.close()
#print(decoded_response)
# 解析 HTML 代码
#print(soup)
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/rexwangzehua/advanced-software-engineering.git
git@gitee.com:rexwangzehua/advanced-software-engineering.git
rexwangzehua
advanced-software-engineering
高级软件工程
master

搜索帮助