1 Star 0 Fork 0

线下阿狗/PY_wea_爬虫

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
main.py 2.60 KB
一键复制 编辑 原始数据 按行查看 历史
lemon 提交于 2024-03-11 00:37 . 'feat:feat'
import time
import re
import requests
from bs4 import BeautifulSoup
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
}
special_city = [
"北京",
"天津",
"上海",
"重庆",
"新疆",
"香港",
"澳门",
"台湾"]
pattern = r'星期([一二三四五六日])\d{1,2}月(\d{1,2})日(.*?)(-?\d{1,2}℃) ~ (-?\d{1,2}℃)'
# 用于直辖市或者市
def get_future_wea(url):
print(url)
url = url[:-1]
url = "{}15tian/".format(url)
rsp = requests.get(url, headers=headers)
soup = BeautifulSoup(rsp.text, 'html.parser')
info_list = soup.select_one(".ten-list.weather-icon")
if not hasattr(info_list, 'contents'):
print(url)
return
for content in info_list.contents:
if content.contents:
tag = content.contents[0]
# print(tag.text)
match = re.match(pattern, tag.text)
if match:
# 提取匹配的信息
day_of_week = match.group(1)
date = match.group(2)
weather = match.group(3)
high_temperature = match.group(4)
low_temperature = match.group(5)
# 打印结果
# print("星期:", day_of_week)
# print("日期:", f"3月{date}日")
# print("天气:", weather)
# print("最高温度:", high_temperature)
# print("最低温度:", low_temperature)
# 写入数据库
else:
print("未匹配到有效信息:{}".format(tag))
# print(info_list)
pass
# 省份
def get_info_by_city(city_url):
city_rsp = requests.get(city_url, headers=headers)
rsp_soup = BeautifulSoup(city_rsp.text, 'html.parser')
tbody = rsp_soup.find_all("td", class_='tb')
for item in tbody:
a_item = item.select_one('a')
n_url = a_item.attrs['href']
n_content = a_item.contents[0]
print(n_content)
get_future_wea(n_url)
if __name__ == '__main__':
rsp = requests.get('https://www.tianqi2345.com/', headers=headers)
# print(rsp.text)
rsp_soup = BeautifulSoup(rsp.text, 'html.parser')
china_list = rsp_soup.select('.chinaList')
all_city = china_list[0].select('a')
for item in all_city:
city_word = item.contents[0]
city_url = item.attrs['href']
time.sleep(1)
if city_word not in special_city:
print(city_word)
get_info_by_city(city_url)
else:
get_future_wea(city_url)
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/dogbrother/wea_scr.git
git@gitee.com:dogbrother/wea_scr.git
dogbrother
wea_scr
PY_wea_爬虫
master

搜索帮助