代码拉取完成,页面将自动刷新
import time
import re
import requests
from bs4 import BeautifulSoup
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
}
special_city = [
"北京",
"天津",
"上海",
"重庆",
"新疆",
"香港",
"澳门",
"台湾"]
pattern = r'星期([一二三四五六日])\d{1,2}月(\d{1,2})日(.*?)(-?\d{1,2}℃) ~ (-?\d{1,2}℃)'
# 用于直辖市或者市
def get_future_wea(url):
print(url)
url = url[:-1]
url = "{}15tian/".format(url)
rsp = requests.get(url, headers=headers)
soup = BeautifulSoup(rsp.text, 'html.parser')
info_list = soup.select_one(".ten-list.weather-icon")
if not hasattr(info_list, 'contents'):
print(url)
return
for content in info_list.contents:
if content.contents:
tag = content.contents[0]
# print(tag.text)
match = re.match(pattern, tag.text)
if match:
# 提取匹配的信息
day_of_week = match.group(1)
date = match.group(2)
weather = match.group(3)
high_temperature = match.group(4)
low_temperature = match.group(5)
# 打印结果
# print("星期:", day_of_week)
# print("日期:", f"3月{date}日")
# print("天气:", weather)
# print("最高温度:", high_temperature)
# print("最低温度:", low_temperature)
# 写入数据库
else:
print("未匹配到有效信息:{}".format(tag))
# print(info_list)
pass
# 省份
def get_info_by_city(city_url):
city_rsp = requests.get(city_url, headers=headers)
rsp_soup = BeautifulSoup(city_rsp.text, 'html.parser')
tbody = rsp_soup.find_all("td", class_='tb')
for item in tbody:
a_item = item.select_one('a')
n_url = a_item.attrs['href']
n_content = a_item.contents[0]
print(n_content)
get_future_wea(n_url)
if __name__ == '__main__':
rsp = requests.get('https://www.tianqi2345.com/', headers=headers)
# print(rsp.text)
rsp_soup = BeautifulSoup(rsp.text, 'html.parser')
china_list = rsp_soup.select('.chinaList')
all_city = china_list[0].select('a')
for item in all_city:
city_word = item.contents[0]
city_url = item.attrs['href']
time.sleep(1)
if city_word not in special_city:
print(city_word)
get_info_by_city(city_url)
else:
get_future_wea(city_url)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。