代码拉取完成,页面将自动刷新
# encoding: utf-8
# @Time : 2024/4/30 13:15
# @Author : Torres-圣君
# @File : run_spider.py
# @Sofaware : PyCharm
import time
import pandas as pd
import requests
import json
from openpyxl import Workbook
from prettytable import PrettyTable
from city import get_city_data
def extract_first_price(data):
o_index = data.find('O')
if o_index != -1 and len(data) > o_index + 4:
price_str = data[o_index + 1:o_index + 5] # Get the four digits following the first 'O'
try:
price = int(price_str)
return price
except ValueError:
print(f"Error converting {price_str} to an integer.")
return 0
else:
print(f"No valid price found in {data}")
return 0
class GetTrains:
def __init__(self, date, begin_id, end_id):
# 请求的目标链接
self.url = "https://kyfw.12306.cn/otn/leftTicket/query"
# cookies
self.cookies = {
'_uab_collina': '171324859263120074949415',
'JSESSIONID': '708D9C6917F9858184F462E86DC45BD0',
'_jc_save_fromStation': '%u82CF%u5DDE%2CSZH',
'_jc_save_toStation': '%u6C5D%u5DDE%2CROF',
'_jc_save_fromDate': '2024-04-30',
'_jc_save_wfdc_flag': 'dc',
'route': '9036359bb8a8a461c164a04f8f50b252',
'BIGipServerotn': '1172832522.24610.0000',
'BIGipServerpassport': '854065418.50215.0000',
'guidesStatus': 'off',
'highContrastMode': 'defaltMode',
'cursorStatus': 'off',
'_jc_save_toDate': '2024-04-30',
}
# 构建请求头
self.headers = {
'Accept': '*/*',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6,zh-TW;q=0.5',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'If-Modified-Since': '0',
'Pragma': 'no-cache',
'Referer': 'https://www.12306.cn/index/index.html',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0',
'X-Requested-With': 'XMLHttpRequest',
}
# 构建请求所需参数
self.params = {
"leftTicketDTO.train_date": date,
"leftTicketDTO.from_station": begin_id,
"leftTicketDTO.to_station": end_id,
"purpose_codes": "ADULT"
}
# 实例化美化表格对象
self.pt = PrettyTable()
def run(self):
# 对目标网址发送请求
res = requests.get(
self.url, headers=self.headers, params=self.params, cookies=self.cookies
).json()
data_list = res['data']['result']
# 构造表格的表头,用于展示和保存
header_list = [
['车次', '出发时间', '到达时间', '历时', '二等座票价']
]
# 将表头信息添加进展示表格的表头
self.pt.field_names = header_list[0]
for data in data_list:
# 格式化添加表数据
# print(data)
trains_msg = self.format_data(data)
# print(trains_msg)
# 将数据添加进列表,用于保存
header_list.append(trains_msg)
# 打印表格
print(self.pt)
# 返回车次信息列表
return header_list
def format_data(self, data):
# 将返回的数据以'|'进行分隔
all_data_list = data.split('|')
# print(all_data_list)
# 提取车次的信息
train_number = all_data_list[3]
# 只有当车次以'G'开头时才继续处理
if train_number.startswith('G'):
price = extract_first_price(all_data_list[39])
trains_msg = [
train_number,
all_data_list[8],
all_data_list[9],
all_data_list[10],
price
]
# 增添表内容
self.pt.add_row(trains_msg)
# 将提取的信息返回,用于保存
return trains_msg
else:
# 如果车次不以'G'开头,则返回None或其他适当的值
return None
def save_data(self, trains_data_list, begin, end):
wb = Workbook()
sheet = wb.create_sheet("车次信息", -1)
# 遍历表格索引,写入数据
for x in range(len(trains_data_list)):
for y in range(len(trains_data_list[x])):
sheet.cell(x + 1, y + 1).value = trains_data_list[x][y]
wb.save(f"{begin}_{end}.xlsx")
print("数据保存完成!")
if __name__ == '__main__':
date = "2024-08-10"
# 读取生成的json文件
with open('city_data.json', 'r') as f:
city_list = json.load(f)
begin = "武汉"
end = "广州"
city_list = json.load(open('city_data.json', 'r'))
# 获取城市对应的英文代码
begin_id = city_list[begin]
end_id = city_list[end]
gt = GetTrains(date, begin_id, end_id)
trains_data_list = gt.run()
# 是否需要保存数据
gt.save_data(trains_data_list, begin, end)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。