master

分支 (1)

管理

管理

master

wea_scr
/
main.py

import time
import re
import requests
from bs4 import BeautifulSoup


headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
}
special_city = [
    "北京",
    "天津",
    "上海",
    "重庆",
    "新疆",
    "香港",
    "澳门",
    "台湾"]
pattern = r'星期([一二三四五六日])\d{1,2}月(\d{1,2})日(.*?)(-?\d{1,2}℃) ~ (-?\d{1,2}℃)'


# 用于直辖市或者市
def get_future_wea(url):
    print(url)
    url = url[:-1]
    url = "{}15tian/".format(url)
    rsp = requests.get(url, headers=headers)
    soup = BeautifulSoup(rsp.text, 'html.parser')
    info_list = soup.select_one(".ten-list.weather-icon")
    if not hasattr(info_list, 'contents'):
        print(url)
        return
    for content in info_list.contents:
        if content.contents:
            tag = content.contents[0]
            # print(tag.text)
            match = re.match(pattern, tag.text)
            if match:
                # 提取匹配的信息
                day_of_week = match.group(1)
                date = match.group(2)
                weather = match.group(3)
                high_temperature = match.group(4)
                low_temperature = match.group(5)
                # 打印结果
                # print("星期:", day_of_week)
                # print("日期:", f"3月{date}日")
                # print("天气:", weather)
                # print("最高温度:", high_temperature)
                # print("最低温度:", low_temperature)
                # 写入数据库
            else:
                print("未匹配到有效信息:{}".format(tag))

    # print(info_list)
    pass

# 省份
def get_info_by_city(city_url):
    city_rsp = requests.get(city_url, headers=headers)
    rsp_soup = BeautifulSoup(city_rsp.text, 'html.parser')
    tbody = rsp_soup.find_all("td", class_='tb')
    for item in tbody:
        a_item = item.select_one('a')
        n_url = a_item.attrs['href']
        n_content = a_item.contents[0]
        print(n_content)
        get_future_wea(n_url)


if __name__ == '__main__':

    rsp = requests.get('https://www.tianqi2345.com/', headers=headers)
    # print(rsp.text)
    rsp_soup = BeautifulSoup(rsp.text, 'html.parser')
    china_list = rsp_soup.select('.chinaList')
    all_city = china_list[0].select('a')
    for item in all_city:
        city_word = item.contents[0]
        city_url = item.attrs['href']
        time.sleep(1)
        if city_word not in special_city:
            print(city_word)
            get_info_by_city(city_url)
        else:
            get_future_wea(city_url)